\name{boundary}
\alias{boundary}

\title{
Boundary curve plots.
}

\description{

Group comparison via comparison of regression boundary curves.
Nonparametrically estimates the regression function of Z on X and Y, and
plots the boundary curve separating the points (X,Y) having
higher-than-age condition mean Z, and the points having
lower-than-average values.  By plotting one curve for each defined
subgroup, this function enables exploration of the interaction of the
groups and X, Y and Z.  The estimation is optionally done via parallel
computation.

}

\usage{
boundary(xyz,zvar=1,xvars=2:3,grpvar=NULL,bval=NULL,bandhw=0.2,k=NULL,
      xlb=NULL,ylb=NULL,clr=c("blue","red","darkgreen","brown"),
      cls=NULL,nchunks=NULL)
}

\arguments{
   \item{xyz}{The data, in data frame or matrix form.}
   \item{zvar}{Column of \code{xyz} to be serve as the Z variable.}
   \item{xvars}{Columns of \code{xyz} to be serve as the 2 X
   variables.}
   \item{grpvar}{Column for the grouping variable; vector or factor.  If
      NULL, just one boundary curve will be plotted.}
   \item{bval}{Boundary value.  The curve is an estimate of the set of
      points (u,v) for which r(u,v) equals \code{bval}, where r() is the
      regression function of Z against X and Y.  The default value is
      the estimated overall ungrouped, unconditional mean Z.}
   \item{bandhw}{The boundary curve is estimated by first finding an
      estimated band around the boundary.  No formulas, but you should
      use small values of this argument for larger data sets.}
   \item{checkna}{If TRUE, remove any row having at least one NA value
      among the displayed columns.}
   \item{xlb}{Label for horizontal axis.}
   \item{ylb}{Label for vertical axis.}
   \item{clr}{A character vector specifying the colors of the curves.
      Is recycled if it is shorter than the number of groups.}
   \item{cls}{Cluster to use (see the \code{parallel} package) for
      parallel computation.}
   \item{nchunks}{Number of chunks to break the computation into.}
}


\details{The estimated boundary curves will be computed and plotted.
If \code{cls} is non-null, the computation will be done in parallel.
}

\value{A \code{ggplot2} object, which when printed causes the graph to
be displayed.  In addition, the coordinates of some random data points
are printed out (from within the code, not the return object), so as to
identify on which side of a curve the higher/lower-than average points
lie.
}

\author{
Norm Matloff <matloff@cs.ucdavis.edu>
}

% \keyword{
% }

% \seealso{
% }

\examples{
ad <- read.csv("http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",header=T)
ad$gt50 <- as.integer(ad[,15] == " >50K")
tmp <- ad[,5]  # education, recode to years
tmp <- ifelse(tmp==3,50,tmp)
tmp <- ifelse(tmp==4,70,tmp)
tmp <- ifelse(tmp==5,90,tmp)
tmp <- ifelse(tmp==6,100,tmp)
tmp <- ifelse(tmp==7,110,tmp)
tmp <- ifelse(tmp==9,120,tmp)
tmp <- ifelse(tmp==10,130,tmp)
tmp <- ifelse(tmp==11,140,tmp)
tmp <- ifelse(tmp==12,140,tmp)
tmp <- ifelse(tmp==13,160,tmp)
tmp <- ifelse(tmp==14,180,tmp)
tmp <- ifelse(tmp==15,200,tmp)
tmp <- ifelse(tmp==16,210,tmp)
ad$edu <- tmp / 10
ad$age <- ad[,1]
ad$male <- as.integer(ad[,10]==" Male")
boundary(ad,16,18:17,19,xlb="age",ylb="edu",cls=c2,bval=0.20)
}


