
# author:  Norm Matloff

# for each point (X1,X2) in the data set, calculate and plot the ratio
# of the regressions of Y on (X1,X2) for the 2 subgroups defined by X3

# uses kNN; default value of k is sqrt(n) 

library(ggplot2)

# xyz:  data matrix or frame
# yvar:  index of the Y variable
# xvars:  index of the 2 X variables
# grpvar:  index of the group-defining variable
# numergrp:  value of the group variable corresponding to the 
#    numerator in the ratios
# k:  number of nearest neighbors
# xlb:  optional label for the horizontal axis
# ylb:  optional label for the vertical axis
# coding:  use "color" or "dot size" to represent "Y-axis"
# samp:  if positive, display only a random sample of 'samp" points (but
#        still compute ratios on the basis of the full data)
# cls:  Snow cluster, if present
# nchunks:  number of chunks; see Smoother.R 

# returns a ggplot2 plot

ratioest <- function(xyz,yvar,xvars,grpvar,numergrp,
      k=NULL,coding="dot size",samp=0,
      xlb=NULL,ylb=NULL,cls=NULL,nchunks=length(cls)) {
   require(ggplot2)
   if (is.null(k)) k <- min(floor(sqrt(nrow(xyz))),250)
   nrx <- nrow(xyz)
   rws <- split(1:nrx,xyz[,grpvar])
   if (length(rws) != 2) stop("must have exactly 2 groups")
   if (rws[[1]][1] == numergrp) {
      rows1 <- rws[[1]]
      rows0 <- rws[[2]]
   } else {
      rows1 <- rws[[2]]
      rows0 <- rws[[1]]
   }
   x <- xyz[,xvars]
   y <- xyz[,yvar]
   x1 <- x[rows1,]
   x0 <- x[rows0,]
   y1 <- y[rows1]
   y0 <- y[rows0]
   # estimated regression values within group 1
   eyhat1 <- smoothz(cbind(x1,y1),knnreg,k,cls=cls,nchunks=nchunks)
   # estimated regression values within group 0
   eyhat0 <- smoothz(cbind(x0,y0),knnreg,k,cls=cls,nchunks=nchunks)
   # eyhat1all, eyhat0all will be estimated regression functions for
   # groups 1, 0 on ALL observations, regardless of group membership
   eyhat1all <- vector(length=nrx)
   eyhat0all <- vector(length=nrx)
   eyhat1all[rows1] <- eyhat1
   eyhat0all[rows0] <- eyhat0
   eyhat1all[rows0] <- smoothzpred(x0,x1,eyhat1,cls=cls,nchunks=nchunks)
   eyhat0all[rows1] <- smoothzpred(x1,x0,eyhat0,cls=cls,nchunks=nchunks)
   rat <- eyhat1all / eyhat0all
   if (samp == 0) {
     xs <- x
     ratio <- rat
   } else {
      rws <- sample(1:nrx,samp)
      xs <- x[rws,]
      ratio <- rat[rws]
   }
   dfx <- data.frame(xs,ratio)
   # for now; change later, e.g. via evalq()
   names(dfx) <- c("X1","X2","ratio")  
   if (coding == "color") {
      gp <- ggplot(dfx,aes(X1,X2,rat,colour=ratio)) 
      gp <- gp + geom_point()
   } else {
      gp <- ggplot(dfx,aes(X1,X2,ratio)) 
      gp <- gp + geom_point(aes(size=ratio))
   }
   if (!is.null(xlb)) gp <- gp + xlab(xlb)
   if (!is.null(ylb)) gp <- gp + ylab(ylb)
   gp
}

