# get.statistics.per.locus.information.for.hybrid.and.its.two.parents.R

source("./statistics.per.locus.file.helper.functions.R")
source("./upper.quartile.normalization.R")


"get.upper.quartile.read.count.information.for.three.conditions" <- function(array.of.all.condition.1.read.count.arrays, array.of.all.condition.2.read.count.arrays,
										array.of.all.condition.3.read.count.arrays, num.loci){

	#cat("HEY!\n")
	#print(dim(array.of.all.condition.1.read.count.arrays))
	num.replicates.for.condition.1 <- dim(array.of.all.condition.1.read.count.arrays)[1]
	num.replicates.for.condition.2 <- dim(array.of.all.condition.2.read.count.arrays)[1]
	num.replicates.for.condition.3 <- dim(array.of.all.condition.3.read.count.arrays)[1]

	#cat(sprintf("DEBUG, %.0f, %.0f, %.0f\n", num.replicates.for.condition.1, num.replicates.for.condition.2, num.replicates.for.condition.3))
	#cat("HEY!\n")
	condition.1.replicate.mask <- c(array(TRUE, dim=num.replicates.for.condition.1), array(FALSE, dim=num.replicates.for.condition.2), array(FALSE, dim=num.replicates.for.condition.3))
	condition.2.replicate.mask <- c(array(FALSE, dim=num.replicates.for.condition.1), array(TRUE, dim=num.replicates.for.condition.2), array(FALSE, dim=num.replicates.for.condition.3))
	condition.3.replicate.mask <- c(array(FALSE, dim=num.replicates.for.condition.1), array(FALSE, dim=num.replicates.for.condition.2), array(TRUE, dim=num.replicates.for.condition.3))

	num.replicates.over.all.conditions <- (num.replicates.for.condition.1 + num.replicates.for.condition.2 + num.replicates.for.condition.3)

	array.of.all.read.count.arrays <- array(NA, dim=c(num.replicates.over.all.conditions, num.loci))
	array.of.all.read.count.arrays[condition.1.replicate.mask,] <- array.of.all.condition.1.read.count.arrays
	array.of.all.read.count.arrays[condition.2.replicate.mask,] <- array.of.all.condition.2.read.count.arrays
	array.of.all.read.count.arrays[condition.3.replicate.mask,] <- array.of.all.condition.3.read.count.arrays
	
	array.of.all.upper.quartile.read.counts <- get.upper.quartile.read.counts.for.multiple.read.count.arrays(array.of.all.read.count.arrays)

	list( array.of.all.condition.1.upper.quartile.read.counts=array.of.all.upper.quartile.read.counts[condition.1.replicate.mask],
		array.of.all.condition.2.upper.quartile.read.counts=array.of.all.upper.quartile.read.counts[condition.2.replicate.mask],
		array.of.all.condition.3.upper.quartile.read.counts=array.of.all.upper.quartile.read.counts[condition.3.replicate.mask])
}

"get.subset.array.of.all.read.count.arrays" <- function(array.of.all.read.count.arrays, read.count.subset.fraction){
	num.arrays <- dim(array.of.all.read.count.arrays)[1]
	subset.array.of.all.read.count.arrays <- array(NA, dim=dim(array.of.all.read.count.arrays))
	for(array.index in 1:num.arrays){
		subset.array.of.all.read.count.arrays[array.index,] <- get.subset.read.count.array(array.of.all.read.count.arrays[array.index,], read.count.subset.fraction)
	}
	subset.array.of.all.read.count.arrays
}

"get.subset.read.count.array" <- function(read.count.array, read.count.subset.fraction){
	num.entries <- length(read.count.array)
	subset.read.count.array <- array(NA, dim=num.entries)
	for(i in 1:num.entries){
		subset.read.count.array[i] <- rbinom(1, read.count.array[i], read.count.subset.fraction)
	}
	subset.read.count.array
}

"get.read.count.information.for.each.parent.and.the.hybrid" <- function(parent.A.statistics.per.locus.file.array, parent.B.statistics.per.locus.file.array,
									    hybrid.statistics.per.locus.file.array){
	num.replicates.for.parent.A <- length(parent.A.statistics.per.locus.file.array)
	num.replicates.for.parent.B <- length(parent.B.statistics.per.locus.file.array)
	num.replicates.for.hybrid <- length(hybrid.statistics.per.locus.file.array)

	combined.statistics.per.locus.file.array <- c(parent.A.statistics.per.locus.file.array, parent.B.statistics.per.locus.file.array, hybrid.statistics.per.locus.file.array)
	
	data <- get.array.of.all.read.count.arrays(combined.statistics.per.locus.file.array)
	array.of.all.read.count.arrays <- data$array.of.all.read.count.arrays
	array.of.all.ref.allele.read.count.arrays <- data$array.of.all.ref.allele.read.count.arrays
	array.of.all.other.allele.read.count.arrays <- data$array.of.all.other.allele.read.count.arrays
	locus.array <- data$locus.array
	array.of.all.upper.quartile.read.counts <- get.upper.quartile.read.counts.for.multiple.read.count.arrays(array.of.all.read.count.arrays)
	cat("DEBUG:\n")
	cat(array.of.all.upper.quartile.read.counts)
	cat("\n")

	if(length(combined.statistics.per.locus.file.array) != (num.replicates.for.parent.A + num.replicates.for.parent.B + num.replicates.for.hybrid)){
		stop(paste("Internal Error: expected number of replicates for parent A, parent B, and hybrid",
			   "to add up to the number of statistics per locus files in the combined.statistics.per.locus.file.array", sep=""))
	}

	data.2 <- get.replicate.masks.for.each.condition(num.replicates.for.parent.A, num.replicates.for.parent.B, num.replicates.for.hybrid, array.of.all.read.count.arrays) 
	parent.A.replicate.mask <- data.2$parent.A.replicate.mask
	parent.B.replicate.mask <- data.2$parent.B.replicate.mask
	hybrid.replicate.mask <- data.2$hybrid.replicate.mask

	num.loci <- dim(array.of.all.read.count.arrays)[2]
	array.of.all.parent.A.read.count.arrays <- matrix(NA, nrow=num.replicates.for.parent.A, ncol=num.loci)
	array.of.all.parent.B.read.count.arrays <- matrix(NA, nrow=num.replicates.for.parent.B, ncol=num.loci)
	array.of.all.hybrid.read.count.arrays <- matrix(NA, nrow=num.replicates.for.hybrid, ncol=num.loci)

	array.of.all.parent.A.read.count.arrays[,] <- array.of.all.read.count.arrays[parent.A.replicate.mask,]
	array.of.all.parent.B.read.count.arrays[,] <- array.of.all.read.count.arrays[parent.B.replicate.mask,]
	array.of.all.hybrid.read.count.arrays[,] <- array.of.all.read.count.arrays[hybrid.replicate.mask,]

	array.of.all.hybrid.ref.allele.read.count.arrays <- matrix(NA, nrow=num.replicates.for.hybrid, ncol=num.loci)
	array.of.all.hybrid.ref.allele.read.count.arrays[,] <- array.of.all.ref.allele.read.count.arrays[hybrid.replicate.mask,]

	array.of.all.hybrid.other.allele.read.count.arrays <- matrix(NA, nrow=num.replicates.for.hybrid, ncol=num.loci)
	array.of.all.hybrid.other.allele.read.count.arrays[,] <- array.of.all.other.allele.read.count.arrays[hybrid.replicate.mask,]

	list(locus.array=locus.array,
		array.of.all.parent.A.read.count.arrays=array.of.all.parent.A.read.count.arrays,
		array.of.all.parent.B.read.count.arrays=array.of.all.parent.B.read.count.arrays,
		array.of.all.hybrid.read.count.arrays=array.of.all.hybrid.read.count.arrays,
		array.of.all.parent.A.upper.quartile.read.counts=array.of.all.upper.quartile.read.counts[parent.A.replicate.mask],
		array.of.all.parent.B.upper.quartile.read.counts=array.of.all.upper.quartile.read.counts[parent.B.replicate.mask],
		array.of.all.hybrid.upper.quartile.read.counts=array.of.all.upper.quartile.read.counts[hybrid.replicate.mask],
		array.of.all.hybrid.ref.allele.read.count.arrays=array.of.all.hybrid.ref.allele.read.count.arrays,
               	array.of.all.hybrid.other.allele.read.count.arrays=array.of.all.hybrid.other.allele.read.count.arrays)
}

"get.replicate.masks.for.each.condition" <- function(num.replicates.for.parent.A, num.replicates.for.parent.B, num.replicates.for.hybrid, array.of.all.read.count.arrays){
	num.replicates.across.all.conditions <- num.replicates.for.parent.A + num.replicates.for.parent.B + num.replicates.for.hybrid
	parent.A.replicate.mask <- array(FALSE, dim=num.replicates.across.all.conditions)
	parent.B.replicate.mask <- array(FALSE, dim=num.replicates.across.all.conditions)
	hybrid.replicate.mask <- array(FALSE, dim=num.replicates.across.all.conditions)
	
	parent.A.replicate.mask[1:num.replicates.for.parent.A] <- TRUE
	parent.B.replicate.mask[(num.replicates.for.parent.A+1):(num.replicates.for.parent.A+num.replicates.for.parent.B)] <- TRUE
	hybrid.replicate.mask[(num.replicates.for.parent.A+num.replicates.for.parent.B+1):num.replicates.across.all.conditions] <- TRUE

	list(parent.A.replicate.mask=parent.A.replicate.mask, parent.B.replicate.mask=parent.B.replicate.mask, hybrid.replicate.mask=hybrid.replicate.mask)
}

