
# author:  Norm Matloff

# consider regression analysis, i.e. estimating conditional expected
# values, in this case using a parametric model

# residual analysis is often used to assess the fit, with for instance a
# region of mainly large positive residuals indicating that the model
# underfits in that region 

# the code here, though, analyzes residuals in
# the sense of 

#    r_i = parmhat_i - nonparmhat_i

# where parmhat_i is the parametric regression estimate at point i and
# nonparmhat_i is the corresponding k-NN estimate; the former is
# provided by the user, and the latter by the code here

# a "heat map" (color- or dot-size-coded Z-axis) of smoothed residuals
# is plotted

# xy:  data matrix or frame
# yvar:  index of the response variable
# xvars:  indices of the predictors variables
# parmhat:  element i is the parametric estimated regression at observation i
# m:  the m most frequent positive and m most frequent negative
#     residuals (according to density estimation) are plotted
# dispcols:  display for these 2 predictors
# k:  number of nearest neighbors to use in smoothing
# cls:  see Smoother.R
# coding: "dot size" or "color" coding of Z-axis
# retres:  if TRUE, the smoothed residuals are returned

resdisp <- function(xy,yvar,xvars,parmhat,m,dispcols=xvars,k=NULL,
      cls=NULL,coding="dot size",retres=F) {
   require(ggplot2)
   if (is.null(k)) k <- min(ceiling(sqrt(nrow(xy))),250)
   xd <- xy[,dispcols]
   nonparmhat <- smoothz(xy[,c(xvars,yvar)],knnreg,k=k,cls=cls)
   resids <- parmhat - nonparmhat
   posresids <- which(resids > 0)
   negresids <- which(resids < 0)
   xdens <- smoothz(xy[,xvars],knndens,k=k,cls=cls)
   # find the indices of the m most frequent positive resids, and m 
   # most frequent neg 
   freqpos <- findtop(xdens,posresids,m)
   freqneg <- findtop(xdens,negresids,m)
   xtrms <- c(freqneg,freqpos)
   df <- data.frame(xd[xtrms,],resids[xtrms])
   nms <- names(xd)
   names(df) <- c(nms,"resids")
   baseplot <- ggplot(df) 
   if (coding == "dot size") {
      print(baseplot + 
         geom_point(aes_string(x=nms[1],y=nms[2],size="resids")))
   } else
      print(baseplot + 
         geom_point(aes_string(x=nms[1],y=nms[2],color="resids")))
   if (retres) resids
}

