
# author:  Norm Matloff

# avoids the "black screen problem" in scatter plots, including for
# regression

# plots the estimated density of (X1,X2), or the estimated regression
# function of Y against (X1,X2), by color-coding the density heights 

# uses kNN; default value of k is sqrt(n) 

library(ggplot2)

# x:  data, (X1,X2) in the density case, (X1,X2,Y) in the regression
#    case (senses the case via ncol(x)
# k:  number of nearest neighbors
# heat:  if true, graph by color, else by dot size
# cls:  snow cluster

twodimest <- function(cls,x,k=NULL,
      heat=F,xlb=NULL,ylb=NULL) {
   if (is.null(k)) k <- floor(sqrt(nrow(x)))
   dens <- ncol(x) == 2
   sf <- if (dens) knndens else knnreg
   x <- x[complete.cases(x),]  
   est <- smoothz(cls,x,sf,k)
   dfx <- data.frame(x,est)
   # for now; change later, e.g. via evalq()
   names(dfx) <- c("X1","X2","est")  
   if (heat) {
      gp <- ggplot(dfx,aes(X1,X2,est,colour=est)) + geom_point(size=1.8)
   } else {
      gp <- ggplot(dfx,aes(X1,X2,est)) 
      gp <- gp + geom_point(aes(size=est))
   }
   if (!is.null(xlb)) gp <- gp + xlab(xlb)
   if (!is.null(ylb)) gp <- gp + ylab(ylb)
   gp
}

sim <- function(n,sig,cls) {
   n2 <- 2 * n
   xa <- matrix(rnorm(n2,mean=0,sd=sig),ncol=2)   
   xb <- matrix(rnorm(n2,mean=1,sd=sig),ncol=2)   
   pop1 <- sample(1:n,n/2,replace=F)
   pop2 <- setdiff(1:n,pop1)
   x <- rbind(xa[pop1,],xb[pop2,])
   twodimdens(x,k=NULL,cls)
}

