# DE.study.helper.functions.R

source("./DE.study.global.variables.R")
source("./get.statistics.per.locus.information.for.hybrid.and.its.two.parents.R")

"estimate.read.counts.for.in.silico.hybrid" <- function(condition.to.read.counts.on.each.replicate.per.locus.array, num.loci){
	array.of.all.parent.A.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[PARENT.A]]
	array.of.all.parent.B.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[PARENT.B]]

	array.of.all.in.silico.hybrid.read.count.arrays <-
		get.in.silico.hybrid.read.count.array(array.of.all.parent.A.read.count.arrays, array.of.all.parent.B.read.count.arrays, num.loci)

	condition.to.read.counts.on.each.replicate.per.locus.array[[IN.SILICO.HYBRID]] <- array.of.all.in.silico.hybrid.read.count.arrays
}

"use.method.to.get.locus.ordering.for.category" <- function(method, current.category, 
											differential.expression.factor.threshold, pvalue.threshold, num.replicates, number.of.points.to.sample,
											simulation.type, selected.locus.mask,
											condition.to.read.counts.on.each.replicate.per.locus.array, num.loci, debug=FALSE){
	#if(simulation.type != NAE){
	#	stop("Unsupported simulation type")
	#}

	if(method == sFCC){
		locus.ordering <- use.sFCC.to.get.locus.ordering.for.category(method, current.category, 
												differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
												simulation.type, selected.locus.mask,
												condition.to.read.counts.on.each.replicate.per.locus.array, num.loci, debug=debug)
	}else if(method == FET){
		locus.ordering <- use.FET.to.get.locus.ordering.for.category(method, current.category, 
												differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
												simulation.type, selected.locus.mask,
												condition.to.read.counts.on.each.replicate.per.locus.array, num.loci, debug=debug)
	}else if(method == FC){
		locus.ordering <- use.FC.to.get.locus.ordering.for.category(method, current.category, 
												differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
												simulation.type, selected.locus.mask,
												condition.to.read.counts.on.each.replicate.per.locus.array, num.loci, debug=debug)
	}else if((method == FET.FC) || (method == FC.FET)){
		if(method == FET.FC){
			first.method <- FET
		}else{
			first.method <- FC
		}
		locus.ordering <- use.FC.and.FET.to.get.locus.ordering.for.category(method, current.category, 
												differential.expression.factor.threshold, pvalue.threshold, num.replicates, number.of.points.to.sample,
												simulation.type, selected.locus.mask,
												condition.to.read.counts.on.each.replicate.per.locus.array, num.loci,
												first.method = first.method, debug=debug)
	}else{
		locus.ordering <- NA
		stop(sprintf("Unrecognized method: %s", method))
	}
	locus.ordering
}

"use.sFCC.to.get.locus.ordering.for.category" <- function(method, current.category, 
								differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
								simulation.type, selected.locus.mask,
								condition.to.read.counts.on.each.replicate.per.locus.array, num.loci, debug=FALSE){

	sequencing.data <- list(condition.to.read.counts.on.each.replicate.per.locus.array=condition.to.read.counts.on.each.replicate.per.locus.array)

	## RETURN - have use.sFCC.to.get.locus.ordering.for.category() handle different values of "effect.type"?
	
	if(simulation.type == NAE){
		effect.type <- HYBRID.TRANS
	}else if(simulation.type == DE){
		effect.type <- DE
	}else{
		stop(sprintf("Unsupported simulation type: %s", simulation.type))
	}
	data <- use.sFCC.to.assign.categories.and.compute.associated.pvalues(method, differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
												simulation.type, selected.locus.mask,
												sequencing.data, effect.type, ref.parent, num.loci, debug)

	low.threshold.pvalue.array <- data$low.threshold.pvalue.array
	low.threshold.direction.array <- data$low.threshold.direction.array
	high.threshold.pvalue.array <- data$high.threshold.pvalue.array
	high.threshold.direction.array <- data$high.threshold.direction.array

	if(debug){
		cat("DEBUG: sFCC pvalue arrays and direction arrays:\n")
		cat("low thresh pvalues:\n")
		print(low.threshold.pvalue.array)
		cat("and corresponding directions:\n")
		print(low.threshold.direction.array)
		cat("high thresh pvalues:\n")
		print(high.threshold.pvalue.array)
		cat("and corresponding directions:\n")
		print(high.threshold.direction.array)
	}

	locus.ordering <- get.sFCC.locus.ordering.from.pvalues.and.directions(num.loci, selected.locus.mask, low.threshold.pvalue.array,
											low.threshold.direction.array, high.threshold.pvalue.array,
											high.threshold.direction.array, current.category, simulation.type)

	if(debug){
		cat("DEBUG: sFCC locus.ordering:\n")
		print(locus.ordering)
	}
	locus.ordering
}

"use.sFCC.to.assign.categories.and.compute.associated.pvalues" <- function(method, differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
												simulation.type, selected.locus.mask,
												sequencing.data, effect.type, ref.parent, num.loci, debug=FALSE){

	#if(simulation.type != NAE){
	#	stop("Unsupported simulation type for sFCC")
	#}


	cat("RETURN - remember to check for just one replicate (sFCC)\n")
	## preprocess read counts data for Fisher's Exact Test
	#num.parent.A.replicates <- dim(array.of.all.parent.A.read.count.arrays)[1]
	#num.parent.B.replicates <- dim(array.of.all.parent.B.read.count.arrays)[1]
	#num.hybrid.replicates <- dim(array.of.all.hybrid.read.count.arrays)[1]
	#print(array.of.all.parent.A.read.count.arrays)
	#print(array.of.all.parent.B.read.count.arrays)
	#print(num.parent.A.replicates)
	#if((num.parent.A.replicates != 1) || (num.parent.B.replicates != 1) || (num.hybrid.replicates != 1)){
	#	stop("Internal Error: Expected one replicate per condition if we are using sFCC II")
	#}

	## apply sFCC
	data.low.threshold <- get.pvalues.of.differential.expression.for.selected.loci.using.sFCC(sequencing.data, effect.type, ref.parent,
											(1 / differential.expression.factor.threshold),
											number.of.points.to.sample,
											simulation.type, selected.locus.mask, num.loci,
											debug)

	low.threshold.pvalue.array <- data.low.threshold$pvalue.array
	low.threshold.direction.array <- data.low.threshold$direction.array

	data.high.threshold <- get.pvalues.of.differential.expression.for.selected.loci.using.sFCC(sequencing.data, effect.type, ref.parent,
											differential.expression.factor.threshold,
											number.of.points.to.sample,
											simulation.type, selected.locus.mask, num.loci,
											debug)


	high.threshold.pvalue.array <- data.high.threshold$pvalue.array
	high.threshold.direction.array <- data.high.threshold$direction.array


	## adjust p-values across the two arrays

	## note that the multiple testing correction is performed across the two arrays together,
	## rather than separately for each array

	combined.pvalue.array <- c(low.threshold.pvalue.array, high.threshold.pvalue.array)
	combined.adjusted.pvalue.array <- p.adjust(combined.pvalue.array, method="BH")
	low.threshold.adjusted.pvalue.array <- combined.adjusted.pvalue.array[1:num.loci]
	high.threshold.adjusted.pvalue.array <- combined.adjusted.pvalue.array[(num.loci + 1):(2 * num.loci)]


	category.array <- array(NA, dim=num.loci)
	pvalue.for.category.array <- array(NA, dim=num.loci)
	adjusted.pvalue.for.category.array <- array(NA, dim=num.loci)
	for(locus.index in 1:num.loci){
		low.threshold.direction = low.threshold.direction.array[locus.index]
		high.threshold.direction = high.threshold.direction.array[locus.index]

		low.threshold.pvalue <- low.threshold.pvalue.array[locus.index]
		high.threshold.pvalue <- high.threshold.pvalue.array[locus.index]

		adjusted.low.threshold.pvalue <- low.threshold.adjusted.pvalue.array[locus.index]
		adjusted.high.threshold.pvalue <- high.threshold.adjusted.pvalue.array[locus.index]

		if((low.threshold.direction == DOWN) && (high.threshold.direction == UP)){
			## This should only happen due to inaccuracies in estimating the p-values,
			## which may occur since we use distribution sampling to get an approximate p-value.
			## This case should become more likely when the differential expression factor
			## threshold is closer to 1.

			#cat("Warning: In use.sFCC.to.assign.categories.and.compute.associated.pvalues(): ")
			#cat(sprintf("Found that locus appears to be below low threshold with pvalue=%f", low.threshold.pvalue))
			#cat(sprintf("but that it also appears to be above high threshold with pvalue=%f\n", high.threshold.pvalue))

			current.category <- UNC
			pvalue <- 1.0
			adjusted.pvalue <- 1.0
		}else if(low.threshold.direction == DOWN){
			current.category <- DOWN
			pvalue <- low.threshold.pvalue
			adjusted.pvalue <- adjusted.low.threshold.pvalue
		}else if(high.threshold.direction == UP){
			current.category <- UP
			pvalue <- high.threshold.pvalue
			adjusted.pvalue <- adjusted.high.threshold.pvalue
		}else{
			## additive
			current.category <- NONE
			pvalue <- max(low.threshold.pvalue, high.threshold.pvalue)
			adjusted.pvalue <- max(adjusted.low.threshold.pvalue, adjusted.high.threshold.pvalue)
		}
		category.array[locus.index] <- current.category
		pvalue.for.category.array[locus.index] <- pvalue
		adjusted.pvalue.for.category.array[locus.index] <- adjusted.pvalue
	}

	list(low.threshold.pvalue.array=low.threshold.pvalue.array, low.threshold.direction.array=low.threshold.direction.array,
		high.threshold.pvalue.array=high.threshold.pvalue.array, high.threshold.direction.array=high.threshold.direction.array,
		pvalue.for.category.array=pvalue.for.category.array, adjusted.pvalue.for.category.array=adjusted.pvalue.for.category.array,
		category.array=category.array)
}


"use.FET.to.get.locus.ordering.for.category" <- function(method, current.category, 
												differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
												simulation.type, selected.locus.mask,
												condition.to.read.counts.on.each.replicate.per.locus.array, num.loci,
												debug=FALSE){

	data <- use.FET.to.assign.categories.and.compute.associated.pvalues(method, num.replicates, simulation.type, selected.locus.mask,
								condition.to.read.counts.on.each.replicate.per.locus.array, num.loci)

	pvalue.array <- data$pvalue.array
	direction.array <- data$direction.array


	locus.ordering <- get.FET.locus.ordering.from.pvalues.and.directions(num.loci, selected.locus.mask, pvalue.array, direction.array, current.category, simulation.type)

	if(debug){
		cat("DEBUG: FET pvalue array and direction array:\n")
		print(pvalue.array)
		print(direction.array)
		cat("DEBUG: FET locus.ordering:\n")
		print(locus.ordering)
	}
	
	locus.ordering
}

#assign.categories.and.compute.associated.pvalues
"use.FET.to.assign.categories.and.compute.associated.pvalues" <- function(method, num.replicates, simulation.type, selected.locus.mask,
								condition.to.read.counts.on.each.replicate.per.locus.array, num.loci){
	#if(simulation.type != NAE){
	#	stop("Unsupported simulation type for FET")
	#}

	## RETURN - add code to check for one replicate per condition (for case where simulation type is DE)

	## initialize array of "best guess" categories
	category.array <- array(UNC, dim=num.loci)

	if(simulation.type == NAE){
		array.of.all.parent.A.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[PARENT.A]]
		array.of.all.parent.B.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[PARENT.B]]
		array.of.all.hybrid.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[HYBRID]]
		array.of.all.in.silico.hybrid.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[IN.SILICO.HYBRID]]
        
		## preprocess read counts data for Fisher's Exact Test
		num.parent.A.replicates <- dim(array.of.all.parent.A.read.count.arrays)[1]
		num.parent.B.replicates <- dim(array.of.all.parent.B.read.count.arrays)[1]
		num.hybrid.replicates <- dim(array.of.all.hybrid.read.count.arrays)[1]
		#print(array.of.all.parent.A.read.count.arrays)
		#print(array.of.all.parent.B.read.count.arrays)
		#print(num.parent.A.replicates)
		if((num.parent.A.replicates != 1) || (num.parent.B.replicates != 1) || (num.hybrid.replicates != 1)){
			stop("Internal Error: Expected one replicate per condition if we are using Fisher's Exact Test")
		}
		## get upper quartile read counts
		uq.data <- get.upper.quartile.read.count.information.for.three.conditions(array.of.all.hybrid.read.count.arrays, array.of.all.in.silico.hybrid.read.count.arrays,
											array(NA, dim=c(0, num.loci)), num.loci)
		array.of.all.hybrid.upper.quartile.read.counts <- uq.data$array.of.all.condition.1.upper.quartile.read.counts
		array.of.all.in.silico.hybrid.upper.quartile.read.counts <- uq.data$array.of.all.condition.2.upper.quartile.read.counts
        
		## apply FET
		FET.data <- get.pvalues.of.differential.expression.for.selected.loci.using.FET(array.of.all.hybrid.read.count.arrays,
												array.of.all.in.silico.hybrid.read.count.arrays,
												array.of.all.hybrid.upper.quartile.read.counts,
												array.of.all.in.silico.hybrid.upper.quartile.read.counts,
											selected.locus.mask, num.loci)
		category.array[FET.data$direction.array == DOWN] <- DOWN
		category.array[FET.data$direction.array == UP] <- UP
	}else if(simulation.type == DE){
		array.of.all.condition.A.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[CONDITION.A]]
		array.of.all.condition.B.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[CONDITION.B]]

		## get upper quartile read counts
		uq.data <- get.upper.quartile.read.count.information.for.three.conditions(array.of.all.condition.A.read.count.arrays, array.of.all.condition.B.read.count.arrays,
											array(NA, dim=c(0, num.loci)), num.loci)
		array.of.all.condition.A.upper.quartile.read.counts <- uq.data$array.of.all.condition.1.upper.quartile.read.counts
		array.of.all.condition.B.upper.quartile.read.counts <- uq.data$array.of.all.condition.2.upper.quartile.read.counts

		## apply FET
		FET.data <- get.pvalues.of.differential.expression.for.selected.loci.using.FET(array.of.all.condition.A.read.count.arrays,
												array.of.all.condition.B.read.count.arrays,
												array.of.all.condition.A.upper.quartile.read.counts,
												array.of.all.condition.B.upper.quartile.read.counts,
											selected.locus.mask, num.loci)
		category.array[FET.data$direction.array == DOWN] <- DOWN
		category.array[FET.data$direction.array == UP] <- UP
	}else{
		FET.data <- NA
		stop(sprintf("Unrecognized simulation type: %s", simulation.type))
	}
	FET.data$category.array <- category.array
	FET.data
}

"get.sFCC.locus.ordering.from.pvalues.and.directions" <- function(num.loci, selected.locus.mask, low.threshold.pvalue.array,
											low.threshold.direction.array, high.threshold.pvalue.array,
											high.threshold.direction.array, current.category, simulation.type){
	ranking.array <- c()
	for(i in 1:num.loci){
		## only process selected loci
		if(selected.locus.mask[i] == FALSE){
			next
		}
		low.threshold.pvalue <- low.threshold.pvalue.array[i]
		high.threshold.pvalue <- high.threshold.pvalue.array[i]
		low.threshold.direction <- low.threshold.direction.array[i]
		high.threshold.direction <- high.threshold.direction.array[i]
		if((current.category == UP)){
			if(high.threshold.direction == UP){
				ranking.value <- high.threshold.pvalue
			}else{
				ranking.value <- 2 - high.threshold.pvalue
			}
		}else if(current.category == DOWN){
			if(low.threshold.direction == DOWN){
				ranking.value <- low.threshold.pvalue
			}else{
				ranking.value <- 2 - low.threshold.pvalue
			}
		}else if(current.category == NONE){
			if((low.threshold.direction == DOWN) && (high.threshold.direction == UP)){
				## This should only happen due to inaccuracies in estimating the p-values,
				## which may occur since we use distribution sampling to get an approximate p-value.
				## This case should become more likely when the differential expression factor
				## threshold is closer to 1.
				cat("Warning: In get.sFCC.locus.ordering.from.pvalues.and.directions(): ")
				cat(sprintf("Found that locus appears to be below low threshold with pvalue=%f", low.threshold.pvalue))
				cat(sprintf("but that it also appears to be above high threshold with pvalue=%f\n", high.threshold.pvalue))

				## All else being equal, an imbalance between the p-values would seem to indicate
				## a departure from additivity, and it would also result in a lower minimum between
				## the two p-values.
				ranking.value <- 2 - min(low.threshold.pvalue, high.threshold.pvalue)
			}else if(low.threshold.direction == DOWN){
				ranking.value <- 2 - low.threshold.pvalue
			}else if(high.threshold.direction == UP){
				ranking.value <- 2 - high.threshold.pvalue
			}else{
				ranking.value <- max(low.threshold.pvalue, high.threshold.pvalue)
			}
		}else{
			stop(sprintf("get.sFCC.locus.ordering.from.pvalues.and.directions(): unrecognized category: %s", current.category))
		}
		ranking.array <- c(ranking.array, ranking.value)
	}
	ranking.array[is.na(ranking.array)] <- Inf
	locus.ordering <- order(ranking.array, runif(length(ranking.array)))
	locus.ordering
}

"get.FET.locus.ordering.from.pvalues.and.directions" <- function(num.loci, selected.locus.mask, pvalue.array, direction.array, current.category, simulation.type){
	ranking.array <- c()
	for(i in 1:num.loci){
		## only process selected loci
		if(selected.locus.mask[i] == FALSE){
			next
		}
		direction <- direction.array[i]
		pvalue <- pvalue.array[i]
		if((current.category == UP)){
			if(direction == UP){
				ranking.value <- pvalue
			}else{
				ranking.value <- 2 - pvalue
			}
		}else if(current.category == DOWN){
			if(direction == DOWN){
				ranking.value <- pvalue
			}else{
				ranking.value <- 2 - pvalue
			}
		}else if(current.category == NONE){
			ranking.value <- (- pvalue)
		}
		ranking.array <- c(ranking.array, ranking.value)
	}
	ranking.array[is.nan(ranking.array)] <- Inf
	ranking.array[is.na(ranking.array)] <- Inf
	locus.ordering <- order(ranking.array, runif(length(ranking.array)))
	locus.ordering
}

"get.in.silico.hybrid.read.count.array" <- function(array.of.all.parent.A.read.count.arrays, array.of.all.parent.B.read.count.arrays, num.loci){
	cat("Note: We filter out read counts of one of the two parents in order to make\n")
	cat("the upper quartiles close together before combining the parents to get the in-silico hybrid.\n")

	num.parent.A.replicates <- dim(array.of.all.parent.A.read.count.arrays)[1]
	num.parent.B.replicates <- dim(array.of.all.parent.B.read.count.arrays)[1]
	#print(num.parent.A.replicates)
	if((num.parent.A.replicates != 1) || (num.parent.B.replicates != 1)){
		stop("Internal Error: Expected one replicate per condition (multiple replicates per condition not handled yet)")
	}

	data <- get.upper.quartile.read.count.information.for.three.conditions(array.of.all.parent.A.read.count.arrays, array.of.all.parent.B.read.count.arrays,
										array(NA, dim=c(0, num.loci)), num.loci)
	array.of.all.parent.A.upper.quartile.read.counts <- data$array.of.all.condition.1.upper.quartile.read.counts
	array.of.all.parent.B.upper.quartile.read.counts <- data$array.of.all.condition.2.upper.quartile.read.counts

	parent.A.upper.quartile.read.count <- array.of.all.parent.A.upper.quartile.read.counts[1]
	parent.B.upper.quartile.read.count <- array.of.all.parent.B.upper.quartile.read.counts[1]
	if(parent.A.upper.quartile.read.count < parent.B.upper.quartile.read.count){
		read.count.subset.fraction <- parent.A.upper.quartile.read.count / parent.B.upper.quartile.read.count
		new.array.of.all.parent.B.read.count.arrays <- get.subset.array.of.all.read.count.arrays(array.of.all.parent.B.read.count.arrays, read.count.subset.fraction)
		array.of.all.parent.B.read.count.arrays <- new.array.of.all.parent.B.read.count.arrays
	}else if(parent.B.upper.quartile.read.count < parent.A.upper.quartile.read.count){
		read.count.subset.fraction <- parent.B.upper.quartile.read.count / parent.A.upper.quartile.read.count
		new.array.of.all.parent.A.read.count.arrays <- get.subset.array.of.all.read.count.arrays(array.of.all.parent.A.read.count.arrays, read.count.subset.fraction)
		array.of.all.parent.A.read.count.arrays <- new.array.of.all.parent.A.read.count.arrays
	}

	## combine counts across parents to get in silico hybrid
	array.of.all.in.silico.hybrid.read.count.arrays <- array(NA, dim=c(1,num.loci))
	array.of.all.in.silico.hybrid.read.count.arrays[1,] <- array.of.all.parent.A.read.count.arrays[1,]  + array.of.all.parent.B.read.count.arrays[1,]

	array.of.all.in.silico.hybrid.read.count.arrays
}

"get.pvalues.of.differential.expression.for.selected.loci.using.sFCC" <- function(sequencing.data, effect.type, ref.parent,
											differential.expression.factor.threshold,
											number.of.points.to.sample,
											simulation.type, selected.locus.mask, num.loci,
											debug=FALSE){

	condition.to.read.counts.on.each.replicate.per.locus.array <- sequencing.data$condition.to.read.counts.on.each.replicate.per.locus.array
	condition.to.ref.allele.read.counts.on.each.replicate.per.locus.array <- sequencing.data$condition.to.ref.allele.read.counts.on.each.replicate.per.locus.array
	condition.to.other.allele.read.counts.on.each.replicate.per.locus.array <- sequencing.data$condition.to.other.allele.read.counts.on.each.replicate.per.locus.array

	if(simulation.type == NAE){
		if(effect.type == HYBRID.TRANS){
			array.of.all.hybrid.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[HYBRID]]
			array.of.all.in.silico.hybrid.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[IN.SILICO.HYBRID]]
			if(length(condition.to.read.counts.on.each.replicate.per.locus.array[[IN.SILICO.HYBRID]]) == 0){
				stop(paste("In get.pvalues.of.differential.expression.for.selected.loci.using.sFCC(): did not find an entry for IN.SILICO.HYBRID in ",
					   "condition.to.read.counts.on.each.replicate.per.locus.array (or the entry was an empty data structure)"))
			}
                
			## get upper quartile read counts
			data <- get.upper.quartile.read.count.information.for.three.conditions(array.of.all.hybrid.read.count.arrays, array.of.all.in.silico.hybrid.read.count.arrays,
												array(NA, dim=c(0, num.loci)), num.loci)
			array.of.all.hybrid.upper.quartile.read.counts <- data$array.of.all.condition.1.upper.quartile.read.counts
			array.of.all.in.silico.hybrid.upper.quartile.read.counts <- data$array.of.all.condition.2.upper.quartile.read.counts
		}else if(effect.type == CIS){
			array.of.all.hybrid.ref.allele.read.count.arrays <- condition.to.ref.allele.read.counts.on.each.replicate.per.locus.array[[HYBRID]]
			array.of.all.hybrid.other.allele.read.count.arrays <- condition.to.other.allele.read.counts.on.each.replicate.per.locus.array[[HYBRID]]
		}else if(effect.type == PARENTAL.TRANS){
			array.of.all.hybrid.ref.allele.read.count.arrays <- condition.to.ref.allele.read.counts.on.each.replicate.per.locus.array[[HYBRID]]
			array.of.all.hybrid.other.allele.read.count.arrays <- condition.to.other.allele.read.counts.on.each.replicate.per.locus.array[[HYBRID]]

			array.of.all.parent.A.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[PARENT.A]]
			array.of.all.parent.B.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[PARENT.B]]

			## get upper quartile read counts
			data <- get.upper.quartile.read.count.information.for.three.conditions(array.of.all.parent.A.read.count.arrays, array.of.all.parent.B.read.count.arrays,
												array(NA, dim=c(0, num.loci)), num.loci)
			array.of.all.parent.A.upper.quartile.read.counts <- data$array.of.all.condition.1.upper.quartile.read.counts
			array.of.all.parent.B.upper.quartile.read.counts <- data$array.of.all.condition.2.upper.quartile.read.counts
		}else{
			stop(sprintf("%s is an unsupported effect type for simulation type %s", effect.type, simulation.type))
		}
	}else if(simulation.type == DE){
		if(effect.type != DE){
			stop('The effect type should always be DE if simulation type is DE')
		}
		array.of.all.condition.A.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[CONDITION.A]]
		array.of.all.condition.B.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[CONDITION.B]]
                
		## get upper quartile read counts
		data <- get.upper.quartile.read.count.information.for.three.conditions(array.of.all.condition.A.read.count.arrays, array.of.all.condition.B.read.count.arrays,
											array(NA, dim=c(0, num.loci)), num.loci)
		array.of.all.condition.A.upper.quartile.read.counts <- data$array.of.all.condition.1.upper.quartile.read.counts
		array.of.all.condition.B.upper.quartile.read.counts <- data$array.of.all.condition.2.upper.quartile.read.counts
	}else{
		stop(sprintf("Unsupported simulation type: %s", simulation.type))
	}
	

	pvalue.array <- array(NA, dim=num.loci)
	direction.array <- array(NA, dim=num.loci)

	for(locus.index in 1:num.loci){
		#if((locus.index %% 1000) == 0){
		#	cat(locus.index, "\n")
		#}

		if(selected.locus.mask[locus.index] == FALSE){
			next
		}

		if(simulation.type == NAE){
			if(effect.type == HYBRID.TRANS){
				read.counts.for.target.gene.in.hybrid <- array.of.all.hybrid.read.count.arrays[1,locus.index]
				read.counts.for.target.gene.in.in.silico.hybrid <- array.of.all.in.silico.hybrid.read.count.arrays[1,locus.index]
				read.counts.for.upper.quartile.in.hybrid <- array.of.all.hybrid.upper.quartile.read.counts[1]
				read.counts.for.upper.quartile.in.in.silico.hybrid <- array.of.all.in.silico.hybrid.upper.quartile.read.counts[1]
                                
                                
				data <- get.pvalue.and.direction.of.differential.expression.using.sFCC(read.counts.for.target.gene.in.hybrid, read.counts.for.target.gene.in.in.silico.hybrid,
												read.counts.for.upper.quartile.in.hybrid, read.counts.for.upper.quartile.in.in.silico.hybrid,
												differential.expression.factor.threshold, number.of.points.to.sample, locus.index, debug)
			}else if(effect.type == CIS){
				ref.allele.read.counts.for.target.gene.in.hybrid <- array.of.all.hybrid.ref.allele.read.count.arrays[1,locus.index]
				other.allele.read.counts.for.target.gene.in.hybrid <- array.of.all.hybrid.other.allele.read.count.arrays[1,locus.index]
                        
				data <- get.pvalue.and.direction.of.cis.effects.using.sFCC(ref.allele.read.counts.for.target.gene.in.hybrid, other.allele.read.counts.for.target.gene.in.hybrid,
												differential.expression.factor.threshold, number.of.points.to.sample, locus.index, debug)
			}else if(effect.type == PARENTAL.TRANS){
				ref.allele.read.counts.for.target.gene.in.hybrid <- array.of.all.hybrid.ref.allele.read.count.arrays[1,locus.index]
				other.allele.read.counts.for.target.gene.in.hybrid <- array.of.all.hybrid.other.allele.read.count.arrays[1,locus.index]
                        
				read.counts.for.target.gene.in.parent.A <- array.of.all.parent.A.read.count.arrays[1,locus.index]
				read.counts.for.target.gene.in.parent.B <- array.of.all.parent.B.read.count.arrays[1,locus.index]
                        
				read.counts.for.upper.quartile.in.parent.A <- array.of.all.parent.A.upper.quartile.read.counts[1]
				read.counts.for.upper.quartile.in.parent.B <- array.of.all.parent.B.upper.quartile.read.counts[1]
				
				if(ref.parent == PARENT.A){
					read.counts.for.target.gene.in.ref.parent <- read.counts.for.target.gene.in.parent.A
					read.counts.for.target.gene.in.other.parent <- read.counts.for.target.gene.in.parent.B
					read.counts.for.upper.quartile.in.ref.parent <- read.counts.for.upper.quartile.in.parent.A
					read.counts.for.upper.quartile.in.other.parent <- read.counts.for.upper.quartile.in.parent.B
				}else if(ref.parent == PARENT.B){
					read.counts.for.target.gene.in.ref.parent <- read.counts.for.target.gene.in.parent.B
					read.counts.for.target.gene.in.other.parent <- read.counts.for.target.gene.in.parent.A
					read.counts.for.upper.quartile.in.ref.parent <- read.counts.for.upper.quartile.in.parent.B
					read.counts.for.upper.quartile.in.other.parent <- read.counts.for.upper.quartile.in.parent.A
				}else{
					stop(sprintf("argument ref.parent must be set to either %s or %s", PARENT.A, PARENT.B))
				}
				data <- get.pvalue.and.direction.of.parental.trans.effects.using.sFCC(ref.allele.read.counts.for.target.gene.in.hybrid,
												other.allele.read.counts.for.target.gene.in.hybrid,
												read.counts.for.target.gene.in.ref.parent, read.counts.for.target.gene.in.other.parent,
												read.counts.for.upper.quartile.in.ref.parent, read.counts.for.upper.quartile.in.other.parent,
												differential.expression.factor.threshold, number.of.points.to.sample, locus.index, debug)
			}else{
				stop(sprintf("%s is an unsupported effect type for simulation type %s", effect.type, simulation.type))
			}
		}else if(simulation.type == DE){
			if(effect.type != DE){
				stop('The effect type should always be DE if simulation type is DE')
			}
			read.counts.for.target.gene.in.condition.A <- array.of.all.condition.A.read.count.arrays[1,locus.index]
			read.counts.for.target.gene.in.condition.B <- array.of.all.condition.B.read.count.arrays[1,locus.index]
			read.counts.for.upper.quartile.in.condition.A <- array.of.all.condition.A.upper.quartile.read.counts[1]
			read.counts.for.upper.quartile.in.condition.B <- array.of.all.condition.B.upper.quartile.read.counts[1]
                        
                        
			data <- get.pvalue.and.direction.of.differential.expression.using.sFCC(read.counts.for.target.gene.in.condition.A, read.counts.for.target.gene.in.condition.B,
											read.counts.for.upper.quartile.in.condition.A, read.counts.for.upper.quartile.in.condition.B,
											differential.expression.factor.threshold, number.of.points.to.sample, locus.index, debug)
		}else{
			stop(sprintf("Unsupported simulation type: %s", simulation.type))
		}

		p.value <- data$p.value
		direction <- data$direction

		pvalue.array[locus.index] <- p.value
		direction.array[locus.index] <- direction
	}

	## adjust the pvalues using the Benjamini-Hochberg method
	adjusted.pvalue.array <- p.adjust(pvalue.array, method="BH")

	if(debug){
		cat("DEBUG:\n")
		print(pvalue.array)
		print(adjusted.pvalue.array)
		print(direction.array)
	}
	list(pvalue.array=pvalue.array, adjusted.pvalue.array=adjusted.pvalue.array, direction.array=direction.array)
}

"get.pvalue.and.direction.of.differential.expression.using.sFCC" <- function(read.counts.for.target.gene.in.condition.A, read.counts.for.target.gene.in.condition.B,
									read.counts.for.upper.quartile.in.condition.A, read.counts.for.upper.quartile.in.condition.B,
									differential.expression.factor.threshold, number.of.points.to.sample, locus.index = -1, debug=FALSE){

	read.counts.for.target.gene.and.upper.quartile.in.condition.A <-
		read.counts.for.target.gene.in.condition.A + read.counts.for.upper.quartile.in.condition.A
	read.counts.for.target.gene.and.upper.quartile.in.condition.B <-
		read.counts.for.target.gene.in.condition.B + read.counts.for.upper.quartile.in.condition.B


	## compute differential expression factor from observed data
	observed.differential.expression.value <-
		compute.differential.expression.value.from.observed.read.counts(read.counts.for.target.gene.in.condition.A,
										read.counts.for.target.gene.in.condition.B,
										read.counts.for.upper.quartile.in.condition.A,
										read.counts.for.upper.quartile.in.condition.B)

	# estimate the probability of getting a read count for the current gene vs a read count for that gene or the control (upper-quartile) gene:
	# 	p_g = read.count(gene) / (read.count(gene) + read.count(control)) for both conditions
	p.condition.A <- ((read.counts.for.target.gene.in.condition.A + 1) / (read.counts.for.target.gene.and.upper.quartile.in.condition.A + 2))
	p.condition.B <- ((read.counts.for.target.gene.in.condition.B + 1) / 
				(read.counts.for.target.gene.and.upper.quartile.in.condition.B + 2))

	# estimate the normalized expression value of the current gene:
	# 	n_g = read.count(gene) / read.count(control)
	ratio.condition.A <- p.condition.A / (1 - p.condition.A)
	ratio.condition.B <- p.condition.B / (1 - p.condition.B)

	# use the n_g values from each condition to estimate the "baseline" n_g
	ratio.baseline <- (ratio.condition.A + ratio.condition.B) / 2

	# compute n_g(condition.B) and n_g(condition.A) for the specified differential expression factor threshold
	ratio.condition.B.given.specified.DEF.threshold <- (2 * ratio.baseline) / (1 + differential.expression.factor.threshold)
	ratio.condition.A.given.specified.DEF.threshold <- ratio.condition.B.given.specified.DEF.threshold * differential.expression.factor.threshold

	## compute the corresponding values of p_g
	p.condition.B.given.specified.DEF.threshold <- (ratio.condition.B.given.specified.DEF.threshold /
								(1 + ratio.condition.B.given.specified.DEF.threshold))
	p.condition.A.given.specified.DEF.threshold <- (ratio.condition.A.given.specified.DEF.threshold /
								(1 + ratio.condition.A.given.specified.DEF.threshold))



	# multiple times, generate read counts for the target locus on both conditions using those values of p_g
	simulated.read.counts.for.target.gene.in.condition.A.given.specified.DEF.threshold <- rbinom(number.of.points.to.sample,
									read.counts.for.target.gene.and.upper.quartile.in.condition.A,
									p.condition.A.given.specified.DEF.threshold)
	simulated.read.counts.for.target.gene.in.condition.B.given.specified.DEF.threshold <- rbinom(number.of.points.to.sample,
									read.counts.for.target.gene.and.upper.quartile.in.condition.B,
									p.condition.B.given.specified.DEF.threshold)
	
	simulated.upper.quartiles.in.condition.A.given.specified.DEF.threshold <- (read.counts.for.target.gene.and.upper.quartile.in.condition.A -
									simulated.read.counts.for.target.gene.in.condition.A.given.specified.DEF.threshold)
	simulated.upper.quartiles.in.condition.B.given.specified.DEF.threshold <- (read.counts.for.target.gene.and.upper.quartile.in.condition.B -
										simulated.read.counts.for.target.gene.in.condition.B.given.specified.DEF.threshold)


	# and for each set of simulated read counts, compute the corresponding differential expression factor
	simulated.differential.expression.values.given.specified.DEF.threshold <- array(NA, dim=number.of.points.to.sample)
	for(simulation.index in 1:number.of.points.to.sample){
		simulated.differential.expression.values.given.specified.DEF.threshold[simulation.index] <-
			compute.differential.expression.value.from.observed.read.counts(
				simulated.read.counts.for.target.gene.in.condition.A.given.specified.DEF.threshold[simulation.index],
				simulated.read.counts.for.target.gene.in.condition.B.given.specified.DEF.threshold[simulation.index],
				simulated.upper.quartiles.in.condition.A.given.specified.DEF.threshold[simulation.index],
				simulated.upper.quartiles.in.condition.B.given.specified.DEF.threshold[simulation.index])
	}

	data <- get.pvalue.and.direction.from.observed.and.simulated.values(observed.differential.expression.value, simulated.differential.expression.values.given.specified.DEF.threshold)
	data
}

"compute.differential.expression.value.from.observed.read.counts" <- function(simulated.read.count.for.target.gene.in.hybrid.given.specified.DEF.threshold,
										simulated.read.count.for.target.gene.in.in.silico.hybrid.given.specified.DEF.threshold,
										simulated.upper.quartile.in.hybrid.given.specified.DEF.threshold,
										simulated.upper.quartile.in.in.silico.hybrid.given.specified.DEF.threshold){
	simulated.normalized.expression.value.for.target.gene.in.hybrid.given.specified.DEF.threshold <-
			((simulated.read.count.for.target.gene.in.hybrid.given.specified.DEF.threshold + 1) /
			 (simulated.upper.quartile.in.hybrid.given.specified.DEF.threshold + 1))
	simulated.normalized.expression.value.for.target.gene.in.in.silico.hybrid.given.specified.DEF.threshold <-
		((simulated.read.count.for.target.gene.in.in.silico.hybrid.given.specified.DEF.threshold + 1) /
		 (simulated.upper.quartile.in.in.silico.hybrid.given.specified.DEF.threshold + 1))

	simulated.differential.expression.value.given.specified.DEF.threshold <- (simulated.normalized.expression.value.for.target.gene.in.hybrid.given.specified.DEF.threshold /
									simulated.normalized.expression.value.for.target.gene.in.in.silico.hybrid.given.specified.DEF.threshold)

	simulated.differential.expression.value.given.specified.DEF.threshold
}

"get.pvalue.and.direction.of.cis.effects.using.sFCC" <- function(ref.allele.read.counts.for.target.gene.in.hybrid, other.allele.read.counts.for.target.gene.in.hybrid,
								differential.expression.factor.threshold, number.of.points.to.sample, locus.index = -1, debug=FALSE){
	observed.ref.to.other.ratio <- (ref.allele.read.counts.for.target.gene.in.hybrid + 1) / (other.allele.read.counts.for.target.gene.in.hybrid + 1)

	total.allele.read.count.for.target.gene.in.hybrid <- ref.allele.read.counts.for.target.gene.in.hybrid + other.allele.read.counts.for.target.gene.in.hybrid

	hypothesized.ref.to.other.ratio <- differential.expression.factor.threshold
	hypothesized.ref.allele.fraction <- hypothesized.ref.to.other.ratio / (hypothesized.ref.to.other.ratio + 1)

	simulated.ref.allele.read.count.array <- rbinom(number.of.points.to.sample, total.allele.read.count.for.target.gene.in.hybrid, hypothesized.ref.allele.fraction)
	simulated.other.allele.read.count.array <- total.allele.read.count.for.target.gene.in.hybrid - simulated.ref.allele.read.count.array

	simulated.ref.to.other.ratio.array <- (simulated.ref.allele.read.count.array + 1) / (simulated.other.allele.read.count.array + 1)

	data <- get.pvalue.and.direction.from.observed.and.simulated.values(observed.ref.to.other.ratio, simulated.ref.to.other.ratio.array)
	data
}

"get.pvalue.and.direction.of.parental.trans.effects.using.sFCC" <- function(ref.allele.read.counts.for.target.gene.in.hybrid, other.allele.read.counts.for.target.gene.in.hybrid,
									read.counts.for.target.gene.in.ref.parent, read.counts.for.target.gene.in.other.parent,
									read.counts.for.upper.quartile.in.ref.parent, read.counts.for.upper.quartile.in.other.parent,
									differential.expression.factor.threshold, number.of.points.to.sample, locus.index = -1, debug=FALSE){

	hypothesized.parental.trans.effect.ratio <- differential.expression.factor.threshold

	observed.ref.parent.normalized.expression <- (read.counts.for.target.gene.in.ref.parent + 1) / (read.counts.for.upper.quartile.in.ref.parent + 1)
	observed.other.parent.normalized.expression <- (read.counts.for.target.gene.in.other.parent + 1) / (read.counts.for.upper.quartile.in.other.parent + 1)

	observed.ref.parent.to.other.parent.ratio <- observed.ref.parent.normalized.expression / observed.other.parent.normalized.expression

	observed.ref.to.other.ratio.in.hybrid <- (ref.allele.read.counts.for.target.gene.in.hybrid + 1) / (other.allele.read.counts.for.target.gene.in.hybrid + 1)
	observed.parental.trans.effect.ratio <- observed.ref.parent.to.other.parent.ratio / observed.ref.to.other.ratio.in.hybrid

	observed.allelic.ratio.baseline <- (observed.ref.parent.to.other.parent.ratio + observed.ref.to.other.ratio.in.hybrid) / 2
	hypothesized.ref.to.other.ratio.in.hybrid <- (2 * observed.allelic.ratio.baseline) / (1 + hypothesized.parental.trans.effect.ratio)
	hypothesized.ref.parent.to.other.parent.ratio <- hypothesized.ref.to.other.ratio.in.hybrid * hypothesized.parental.trans.effect.ratio

	observed.parental.baseline.for.normalized.expression <- (observed.ref.parent.normalized.expression + observed.other.parent.normalized.expression) / 2
	hypothesized.other.parent.normalized.expression <- (2 * observed.parental.baseline.for.normalized.expression) / (1 + hypothesized.ref.parent.to.other.parent.ratio)
	hypothesized.ref.parent.normalized.expression <- hypothesized.other.parent.normalized.expression * hypothesized.ref.parent.to.other.parent.ratio

	hypothesized.ref.parent.target.gene.fraction <- hypothesized.ref.parent.normalized.expression / (1 + hypothesized.ref.parent.normalized.expression)
	hypothesized.other.parent.target.gene.fraction <- hypothesized.other.parent.normalized.expression / (1 + hypothesized.other.parent.normalized.expression)

	read.counts.for.target.gene.and.upper.quartile.in.ref.parent <- read.counts.for.target.gene.in.ref.parent + read.counts.for.upper.quartile.in.ref.parent
	read.counts.for.target.gene.and.upper.quartile.in.other.parent <- read.counts.for.target.gene.in.other.parent + read.counts.for.upper.quartile.in.other.parent

	simulated.read.counts.for.target.gene.in.ref.parent.array <-
		rbinom(number.of.points.to.sample, read.counts.for.target.gene.and.upper.quartile.in.ref.parent, hypothesized.ref.parent.target.gene.fraction)
	if(length(simulated.read.counts.for.target.gene.in.ref.parent.array[is.na(simulated.read.counts.for.target.gene.in.ref.parent.array)]) != 0){
		cat(sprintf("DEBUG: nps=%.0f, rctot=%.0f, hyp.frac=%.0f\n", number.of.points.to.sample,
			read.counts.for.target.gene.and.upper.quartile.in.ref.parent, hypothesized.ref.parent.target.gene.fraction))
	}
	simulated.read.counts.for.upper.quartile.in.ref.parent.array <-
			read.counts.for.target.gene.and.upper.quartile.in.ref.parent - simulated.read.counts.for.target.gene.in.ref.parent.array
	simulated.ref.parent.normalized.expression.array <-
			(simulated.read.counts.for.target.gene.in.ref.parent.array + 1) / (simulated.read.counts.for.upper.quartile.in.ref.parent.array + 1)

	simulated.read.counts.for.target.gene.in.other.parent.array <-
		rbinom(number.of.points.to.sample, read.counts.for.target.gene.and.upper.quartile.in.other.parent, hypothesized.other.parent.target.gene.fraction)
	simulated.read.counts.for.upper.quartile.in.other.parent.array <-
			read.counts.for.target.gene.and.upper.quartile.in.other.parent - simulated.read.counts.for.target.gene.in.other.parent.array
	simulated.other.parent.normalized.expression.array <-
			(simulated.read.counts.for.target.gene.in.other.parent.array + 1) / (simulated.read.counts.for.upper.quartile.in.other.parent.array + 1)

	simulated.ref.parent.to.other.parent.ratio.array <- simulated.ref.parent.normalized.expression.array / simulated.other.parent.normalized.expression.array

	total.allele.read.counts.in.hybrid <- ref.allele.read.counts.for.target.gene.in.hybrid + other.allele.read.counts.for.target.gene.in.hybrid
	hypothesized.ref.allele.fraction <- hypothesized.ref.to.other.ratio.in.hybrid / (hypothesized.ref.to.other.ratio.in.hybrid + 1)
	simulated.ref.allele.read.counts.in.hybrid.array <- rbinom(number.of.points.to.sample, total.allele.read.counts.in.hybrid, hypothesized.ref.allele.fraction)
	simulated.other.allele.read.counts.in.hybrid.array <- total.allele.read.counts.in.hybrid - simulated.ref.allele.read.counts.in.hybrid.array
	simulated.ref.to.other.ratios.in.hybrid.array <- (simulated.ref.allele.read.counts.in.hybrid.array + 1) / (simulated.other.allele.read.counts.in.hybrid.array + 1)

	simulated.parental.trans.effect.ratio.array <- simulated.ref.parent.to.other.parent.ratio.array / simulated.ref.to.other.ratios.in.hybrid.array

	data <- get.pvalue.and.direction.from.observed.and.simulated.values(observed.parental.trans.effect.ratio, simulated.parental.trans.effect.ratio.array)
	data	
}

"get.pvalue.and.direction.from.observed.and.simulated.values" <- function(observed.value, simulated.value.array){
	number.of.simulated.values <- length(simulated.value.array)

	## is.na() will also catch values that are NaN
	if(is.na(observed.value)){
		num.invalid <- number.of.simulated.values
		num.greater.than <- 0
		num.less.than <- 0
		num.equal.to <- 0
	}else{
		invalid.mask <- is.na(simulated.value.array)
		num.invalid <- length(invalid.mask[invalid.mask])

		greater.than.mask <- (observed.value > simulated.value.array[!invalid.mask])
		less.than.mask <-    (observed.value < simulated.value.array[!invalid.mask])
		equal.to.mask <-    (observed.value == simulated.value.array[!invalid.mask])
		num.greater.than <- length(greater.than.mask[greater.than.mask]) 
		num.less.than <- length(less.than.mask[less.than.mask]) 
		num.equal.to <- length(equal.to.mask[equal.to.mask])
	}

	## compute p-values, considering all invalid points to count against the current hypothesis
	if(num.greater.than > num.less.than){
		direction <- UP
		p.value <- (num.equal.to + num.less.than + num.invalid) / number.of.simulated.values
	}else if(num.less.than > num.greater.than){
		direction <- DOWN
		p.value <- (num.equal.to + num.greater.than + num.invalid) / number.of.simulated.values
	}else{
		direction <- NONE
		p.value <- (num.equal.to + num.greater.than + num.invalid) / number.of.simulated.values
	}
	## adjust for testing both directions
	p.value <- p.value * 2.0

	## make sure there are no pvalues above 1.0
	p.value <- min(1.0, p.value)

	list(p.value=p.value, direction=direction)
}

"get.pvalues.of.differential.expression.for.selected.loci.using.FET" <- function(array.of.all.hybrid.read.count.arrays,
											array.of.all.in.silico.hybrid.read.count.arrays,
											array.of.all.hybrid.upper.quartile.read.counts,
											array.of.all.in.silico.hybrid.upper.quartile.read.counts,
											selected.locus.mask, num.loci){
	pvalue.array <- c()
	direction.array <- c()

	for(i in 1:num.loci){
		#if((i %% 1000) == 0){
		#	cat(i, "\n")
		#}

		if(selected.locus.mask[i] == FALSE){
			next
		}

		read.counts.for.target.gene.in.hybrid <- array.of.all.hybrid.read.count.arrays[1,i]
		read.counts.for.target.gene.in.in.silico.hybrid <- array.of.all.in.silico.hybrid.read.count.arrays[1,i]
		read.counts.for.upper.quartile.in.hybrid <- array.of.all.hybrid.upper.quartile.read.counts[1]
		read.counts.for.upper.quartile.in.in.silico.hybrid <- array.of.all.in.silico.hybrid.upper.quartile.read.counts[1]

		GeneTesting <- matrix(c(read.counts.for.target.gene.in.hybrid, read.counts.for.upper.quartile.in.hybrid,
					read.counts.for.target.gene.in.in.silico.hybrid, read.counts.for.upper.quartile.in.in.silico.hybrid), nrow=2)
		result <- fisher.test(GeneTesting, alternative="two.sided")
		pvalue <- result$p.value
		pvalue.array <- c(pvalue.array, pvalue)

		if((read.counts.for.target.gene.in.hybrid / read.counts.for.upper.quartile.in.hybrid) <
		   (read.counts.for.target.gene.in.in.silico.hybrid / read.counts.for.upper.quartile.in.in.silico.hybrid)){
			direction <- DOWN
		}else if((read.counts.for.target.gene.in.hybrid / read.counts.for.upper.quartile.in.hybrid) >
			 (read.counts.for.target.gene.in.in.silico.hybrid / read.counts.for.upper.quartile.in.in.silico.hybrid)){
			direction <- UP
		}else{
			direction <- NONE
		}
		direction.array <- c(direction.array, direction)
	}
	adjusted.pvalue.array <- p.adjust(pvalue.array, method="BH")
	list(pvalue.array=pvalue.array, adjusted.pvalue.array=adjusted.pvalue.array, direction.array=direction.array)
}

"use.FC.to.get.locus.ordering.for.category" <- function(method, current.category, 
												differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
												simulation.type, selected.locus.mask,
												condition.to.read.counts.on.each.replicate.per.locus.array, num.loci,
												debug = FALSE){
	#if(simulation.type != NAE){
	#	stop("Unsupported simulation type for FC")
	#}

	## RETURN - add code to check for one replicate per condition (for case where simulation type is DE)
	fold.change.array <- use.FC.to.get.fold.change.array(method, current.category, 
							differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
							simulation.type, selected.locus.mask,
							condition.to.read.counts.on.each.replicate.per.locus.array, num.loci, debug)

	locus.ordering <- get.FC.locus.ordering.from.fold.change.array(current.category, simulation.type, fold.change.array)


	if(debug){
		cat("DEBUG: FC locus.ordering:\n")
		print(locus.ordering)
	}

	locus.ordering
}

"use.FC.to.get.fold.change.array" <- function(method, current.category, 
							differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
							simulation.type, selected.locus.mask,
							condition.to.read.counts.on.each.replicate.per.locus.array, num.loci, debug = FALSE){
	if(simulation.type == NAE){
		array.of.all.parent.A.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[PARENT.A]]
		array.of.all.parent.B.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[PARENT.B]]
		array.of.all.hybrid.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[HYBRID]]
		array.of.all.in.silico.hybrid.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[IN.SILICO.HYBRID]]


		## preprocess read counts data for Fisher's Exact Test
		num.parent.A.replicates <- dim(array.of.all.parent.A.read.count.arrays)[1]
		num.parent.B.replicates <- dim(array.of.all.parent.B.read.count.arrays)[1]
		num.hybrid.replicates <- dim(array.of.all.hybrid.read.count.arrays)[1]
		if((num.parent.A.replicates != 1) || (num.parent.B.replicates != 1) || (num.hybrid.replicates != 1)){
			stop("Internal Error: Expected one replicate per condition if we are using Fold Change (current implementation does not allow more)")
		}

		## get upper quartile read counts
		data <- get.upper.quartile.read.count.information.for.three.conditions(array.of.all.hybrid.read.count.arrays, array.of.all.in.silico.hybrid.read.count.arrays,
											array(NA, dim=c(0, num.loci)), num.loci)
		array.of.all.hybrid.upper.quartile.read.counts <- data$array.of.all.condition.1.upper.quartile.read.counts
		array.of.all.in.silico.hybrid.upper.quartile.read.counts <- data$array.of.all.condition.2.upper.quartile.read.counts

		## apply FC
		array.of.in.silico.hybrid.normalized.expression.values <- array.of.all.in.silico.hybrid.read.count.arrays[1,] / array.of.all.in.silico.hybrid.upper.quartile.read.counts
		array.of.hybrid.normalized.expression.values <- array.of.all.hybrid.read.count.arrays[1,] / array.of.all.hybrid.upper.quartile.read.counts
		fold.change.array <- array.of.hybrid.normalized.expression.values / array.of.in.silico.hybrid.normalized.expression.values

		if(debug){
			cat("Real hybrid upper quartile read counts:\n")
			print(array.of.all.hybrid.upper.quartile.read.counts)
			cat("In silico hybrid upper quartile read counts:\n")
			print(array.of.all.in.silico.hybrid.upper.quartile.read.counts)
			cat("DEBUG: Fold change array:\n")
			print(fold.change.array)
		}
	}else if(simulation.type == DE){
		fold.change.array <- get.fold.change.array.between.two.conditions(condition.to.read.counts.on.each.replicate.per.locus.array, num.loci)
	}else{
		stop(sprintf("Unrecognized simulation type: %s", simulation.type))
	}
	fold.change.array
}


"get.fold.change.array.between.two.conditions" <- function(condition.to.read.counts.on.each.replicate.per.locus.array, num.loci){
	array.of.all.condition.A.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[CONDITION.A]]
	array.of.all.condition.B.read.count.arrays <- condition.to.read.counts.on.each.replicate.per.locus.array[[CONDITION.B]]

	## get upper quartile read counts
	data <- get.upper.quartile.read.count.information.for.three.conditions(array.of.all.condition.A.read.count.arrays, array.of.all.condition.B.read.count.arrays,
										array(NA, dim=c(0, num.loci)), num.loci)
	array.of.all.condition.A.upper.quartile.read.counts <- data$array.of.all.condition.1.upper.quartile.read.counts
	array.of.all.condition.B.upper.quartile.read.counts <- data$array.of.all.condition.2.upper.quartile.read.counts

	## apply FC
	array.of.condition.A.normalized.expression.values <- array.of.all.condition.A.read.count.arrays[1,] / array.of.all.condition.A.upper.quartile.read.counts
	array.of.condition.B.normalized.expression.values <- array.of.all.condition.B.read.count.arrays[1,] / array.of.all.condition.B.upper.quartile.read.counts
	fold.change.array <- array.of.condition.A.normalized.expression.values / array.of.condition.B.normalized.expression.values
	fold.change.array
}

"get.FC.locus.ordering.from.fold.change.array" <- function(current.category, simulation.type, fold.change.array){
	if((current.category == UP)){
		ranking.array <- (- fold.change.array)
	}else if(current.category == DOWN){
		ranking.array <- fold.change.array
	}else if(current.category == NONE){
		ranking.array <- fold.change.array
		## RETURN - temp
		ranking.array[is.na(ranking.array)] <- 1
		ranking.array[ranking.array > 1] <- 1 / ranking.array[ranking.array > 1]
		ranking.array <- (- ranking.array)
	}
	
	## RETURN -temp
	ranking.array[is.nan(ranking.array)] <- Inf
	ranking.array[is.na(ranking.array)] <- Inf
	locus.ordering <- order(ranking.array, runif(length(ranking.array)))
	locus.ordering
}

"use.FC.and.FET.to.get.locus.ordering.for.category" <- function(method, current.category, 
								differential.expression.factor.threshold, pvalue.threshold, num.replicates, number.of.points.to.sample,
								simulation.type, selected.locus.mask,
								condition.to.read.counts.on.each.replicate.per.locus.array, num.loci, first.method, debug = FALSE){
	## RETURN - add code to check for one replicate per condition (for case where simulation type is DE)
	fold.change.array <- use.FC.to.get.fold.change.array(method, current.category, 
							differential.expression.factor.threshold, num.replicates, number.of.points.to.sample,
							simulation.type, selected.locus.mask,
							condition.to.read.counts.on.each.replicate.per.locus.array, num.loci, debug)
	FET.data <- use.FET.to.assign.categories.and.compute.associated.pvalues(method, num.replicates, simulation.type, selected.locus.mask,
								condition.to.read.counts.on.each.replicate.per.locus.array, num.loci)
	pvalue.array <- FET.data$pvalue.array
	adjusted.pvalue.array <- FET.data$adjusted.pvalue.array
	direction.array <- FET.data$direction.array

	FET.locus.ordering <- get.FET.locus.ordering.from.pvalues.and.directions(num.loci, selected.locus.mask, pvalue.array, direction.array, current.category, simulation.type)
	FC.locus.ordering <- get.FC.locus.ordering.from.fold.change.array(current.category, simulation.type, fold.change.array)

	if(first.method == FC){
		fold.change.na.mask <- is.na(fold.change.array)

		fold.change.up.mask <- array(FALSE, dim=num.loci)
		fold.change.up.mask[!fold.change.na.mask] <- (fold.change.array[!fold.change.na.mask] > differential.expression.factor.threshold)

		fold.change.down.mask <- array(FALSE, dim=num.loci)
		fold.change.down.mask[!fold.change.na.mask] <- (fold.change.array[!fold.change.na.mask] < (1 / differential.expression.factor.threshold))
        
		fold.change.none.mask <- array(TRUE, dim=num.loci)
		fold.change.none.mask[fold.change.na.mask] <- FALSE
		fold.change.none.mask[fold.change.up.mask] <- FALSE
		fold.change.none.mask[fold.change.down.mask] <- FALSE

		if(current.category == UP){
			better.bin.mask <- fold.change.up.mask
		}else if(current.category == DOWN){
			better.bin.mask <- fold.change.down.mask
		}else if(current.category == NONE){
			better.bin.mask <- fold.change.none.mask
		}else{
			better.bin.mask <- NA
			stop(sprintf("bad current.category: %s", current.category))
		}
		combined.locus.ordering <- apply.locus.ordering.over.two.ordered.bins(better.bin.mask, FET.locus.ordering, num.loci)
	}else{
		direction.up.mask <- (direction.array == UP)
		direction.down.mask <- (direction.array == DOWN)
		significant.pvalue.mask <- (adjusted.pvalue.array < pvalue.threshold)

		up.with.significant.pvalue.mask <- array(TRUE, dim=num.loci)
		up.with.significant.pvalue.mask[!direction.up.mask] <- FALSE
		up.with.significant.pvalue.mask[!significant.pvalue.mask] <- FALSE

		down.with.significant.pvalue.mask <- array(TRUE, dim=num.loci)
		down.with.significant.pvalue.mask[!direction.down.mask] <- FALSE
		down.with.significant.pvalue.mask[!significant.pvalue.mask] <- FALSE

		if(current.category == UP){
			better.bin.mask <- up.with.significant.pvalue.mask
		}else if(current.category == DOWN){
			better.bin.mask <- down.with.significant.pvalue.mask
		}else if(current.category == NONE){
			better.bin.mask <- (!significant.pvalue.mask)
		}else{
			better.bin.mask <- NA
			stop(sprintf("bad current.category: %s", current.category))
		}
		cat(sprintf("DEBUG: size of better bin mask=%.0f\n", length(better.bin.mask[better.bin.mask])))
		combined.locus.ordering <- apply.locus.ordering.over.two.ordered.bins(better.bin.mask, FC.locus.ordering, num.loci)
	}

		
	if(debug){
		cat(sprintf("DEBUG: sequential combination of FET and FC with first method %s locus.ordering:\n", first.method))
		print(combined.locus.ordering)
	}

	combined.locus.ordering
}

"apply.locus.ordering.over.two.ordered.bins" <- function(better.bin.mask, locus.ordering, num.loci){
	if(num.loci == 0){
		stop("in apply.locus.ordering.over.two.ordered.bins(): did not expect num.loci to be equal to 0")
	}
	better.bin.indices <- seq(1,num.loci,1)[better.bin.mask]
	worse.bin.indices <- seq(1,num.loci,1)[!better.bin.mask]

	num.loci.in.better.bin <- length(better.bin.mask[better.bin.mask])

	locus.ranking <- locus.ordering.to.ranking.value(locus.ordering)	
	if(num.loci.in.better.bin != 0){
		max.ranking.better.bin <- max(locus.ranking[better.bin.mask])
		locus.ranking[!better.bin.mask] <- max.ranking.better.bin + locus.ranking[!better.bin.mask]
	}
	new.locus.ordering <- order(locus.ranking, runif(length(locus.ranking)))
	new.locus.ordering
}

"locus.ordering.to.ranking.value" <- function(locus.ordering){
	num.loci <- length(locus.ordering)
	ranking.value.array <- array(NA, dim=num.loci)
	for(order.index in 1:num.loci){
		locus.index <- locus.ordering[order.index]
		ranking.value.array[locus.index] <- order.index
	}
	#print(ranking.value.array)
	ranking.value.array
}


