# assign.expression.patterns.helper.functions.R

library(hash)

source("./DE.study.global.variables.R")
source("./statistics.per.locus.file.helper.functions.R")
source("./DE.study.helper.functions.R")

"get.locus.hashtable.from.file" <- function(file){
	locus.hashtable <- hash()
	table <- read.table(file, header=FALSE)
	locus.array <- data.matrix(table[,1])[,1]
	for(locus in locus.array){
		locus.hashtable[[locus]] <- 0
	}
	locus.hashtable
}

"get.filtered.locus.mask" <- function(locus.array, filtered.locus.hashtable){
	num.loci <- length(locus.array)
	filtered.locus.mask <- array(FALSE, dim=num.loci)
	for(locus.index in 1:num.loci){
		current.locus <- locus.array[locus.index]
		if(length(filtered.locus.hashtable[[current.locus]]) != 0){
			filtered.locus.mask[locus.index] <- TRUE
		}
	}
	filtered.locus.mask
}

"get.high.confidence.category.to.num.loci.hashtable" <- function(high.confidence.category.array, array.of.all.possible.high.confidence.categories, num.loci){
	high.confidence.category.to.num.loci <- hash()
	for(current.category in array.of.all.possible.high.confidence.categories){
		high.confidence.category.to.num.loci[[current.category]] <- 0
	}
	high.confidence.category.to.num.loci[[UNC]] <- 0

	#cat("DEBUG: Hey2!\n")
	for(locus.index in 1:num.loci){
		high.confidence.category <- high.confidence.category.array[locus.index] 
		if(length(high.confidence.category.to.num.loci[[high.confidence.category]]) == 0){
			stop(sprintf("Did not expect to see high confidence category %s here", high.confidence.category))
		}
		high.confidence.category.to.num.loci[[high.confidence.category]] <- 1 + high.confidence.category.to.num.loci[[high.confidence.category]]
	}
	high.confidence.category.to.num.loci
}

"print.num.loci.for.each.high.confidence.category" <- function(high.confidence.category.to.num.loci){
	for(high.confidence.category in rownames(data.matrix(as.list.hash(high.confidence.category.to.num.loci)))){
		cat(sprintf("Number of loci detected with high confidence category %s is %d\n", high.confidence.category, high.confidence.category.to.num.loci[[high.confidence.category]]))
	}
}

"use.method.to.assign.high.confidence.categories" <- function(method, fold.change.threshold, pvalue.threshold,
										num.replicates, number.of.points.to.sample,
										simulation.type, selected.locus.mask, filtered.locus.mask,
										sequencing.data, effect.type, ref.parent, num.loci, debug, output.file){

	## initialize variable
	fold.change.array <- NA

	## apply sFCC if the chosen method requires it
	if(method == sFCC){
		sFCC.data <- use.sFCC.to.assign.categories.and.compute.associated.pvalues(method, fold.change.threshold,
										num.replicates, number.of.points.to.sample,
										simulation.type, selected.locus.mask,
										sequencing.data, effect.type, ref.parent, num.loci, debug)
		adjusted.pvalue.array <- sFCC.data$adjusted.pvalue.for.category.array
		category.array <- sFCC.data$category.array

		## DEBUG: print out loci
		locus.array <- sequencing.data$locus.array
		cat("", append=FALSE, file=output.file)
		for(i in 1:num.loci){
			if(filtered.locus.mask[i]){
				## Low Mapping Bias Locus
				cat(sprintf("%s\t%.3e\t%s\n", locus.array[i], adjusted.pvalue.array[i], category.array[i]), append=TRUE, file=output.file)
			}
		}
	}

	## apply FET if the chosen method requires it
	if((method == FET) || (method == FET.FC) || (method == FC.FET)){
		FET.data <- use.FET.to.assign.categories.and.compute.associated.pvalues(method, num.replicates, simulation.type, selected.locus.mask,
									sequencing.data$condition.to.read.counts.on.each.replicate.per.locus.array, num.loci)
		pvalue.array <- FET.data$pvalue.array
		direction.array <- FET.data$direction.array

		## adjust p-values across loci for multiple testing correction
		adjusted.pvalue.array <- p.adjust(pvalue.array, method="BH")
	}

	## apply FC if the chosen method requires it
	if((method == FC) || (method == FET.FC) || (method == FC.FET)){
		condition.to.read.counts.on.each.replicate.per.locus.array <- sequencing.data$condition.to.read.counts.on.each.replicate.per.locus.array
		fold.change.array <- get.fold.change.array.between.two.conditions(condition.to.read.counts.on.each.replicate.per.locus.array, num.loci)
	}

	if(method == sFCC){
		high.confidence.category.array <- get.sFCC.high.confidence.category.array(category.array, adjusted.pvalue.array, pvalue.threshold, num.loci)
	}else if(method == FET){
		high.confidence.category.array <- get.FET.high.confidence.category.array(direction.array, adjusted.pvalue.array, pvalue.threshold, num.loci)
	}else  if(method == FC){
		high.confidence.category.array <- get.FC.high.confidence.category.array(fold.change.array, fold.change.threshold, num.loci)
	}else if((method == FET.FC) || (method == FC.FET)){
		FET.high.confidence.category.array <- get.FET.high.confidence.category.array(direction.array, adjusted.pvalue.array, pvalue.threshold, num.loci)
		FC.high.confidence.category.array <- get.FC.high.confidence.category.array(fold.change.array, fold.change.threshold, num.loci)
		high.confidence.category.array <- array(NA, dim=num.loci)
		for(locus.index in 1:num.loci){
			FET.high.confidence.category <- FET.high.confidence.category.array[locus.index]
			FC.high.confidence.category <- FC.high.confidence.category.array[locus.index]
			
			## assign a category as high confidence if both methods agree on it
			## otherwise mark the category as unclear
			if((FET.high.confidence.category == UP) && (FC.high.confidence.category == UP)){
				high.confidence.category <- UP
			}else if((FET.high.confidence.category == DOWN) && (FC.high.confidence.category == DOWN)){
				high.confidence.category <- DOWN
			}else if((FET.high.confidence.category == NONE) && (FC.high.confidence.category == NONE)){
				high.confidence.category <- NONE
			}else{
				high.confidence.category <- UNC
			}
			high.confidence.category.array[locus.index] <- high.confidence.category
		}
	}else{
		high.confidence.category.array <- NA
		stop(sprintf("Unsupported method: %s", method))
	}
	high.confidence.category.array
}

"get.sFCC.high.confidence.category.array" <- function(category.array, adjusted.pvalue.array, pvalue.threshold, num.loci){
	high.confidence.category.array <- array(NA, dim=num.loci)
	for(locus.index in 1:num.loci){
		current.category <- category.array[locus.index]
		adjusted.pvalue <- adjusted.pvalue.array[locus.index]
		if(adjusted.pvalue < pvalue.threshold){
			high.confidence.category <- current.category
			#cat("High conf:", current.category, "\n")
		}else{
			high.confidence.category <- UNC
		}
		high.confidence.category.array[locus.index] <- high.confidence.category
	}
	high.confidence.category.array
}

## Set of loci is split by p-value threshold
"get.FET.high.confidence.category.array" <- function(direction.array, adjusted.pvalue.array, pvalue.threshold, num.loci){
	high.confidence.category.array <- array(NA, dim=num.loci)
	for(locus.index in 1:num.loci){
		direction <- direction.array[locus.index]
		adjusted.pvalue <- adjusted.pvalue.array[locus.index]
		if(adjusted.pvalue < pvalue.threshold){
			high.confidence.category <- direction
			#cat("High conf:", current.category, "\n")
		}else{
			high.confidence.category <- NONE
		}
		high.confidence.category.array[locus.index] <- high.confidence.category
	}
	high.confidence.category.array
}

## split loci by fold change
"get.FC.high.confidence.category.array" <- function(fold.change.array, fold.change.threshold, num.loci){
	high.confidence.category.array <- array(NA, dim=num.loci)
	for(locus.index in 1:num.loci){
		fold.change <- fold.change.array[locus.index]
		if(is.na(fold.change)){
			high.confidence.category <- UNC
		}else if(fold.change < (1 / fold.change.threshold)){
			high.confidence.category <- DOWN
		}else if(fold.change > fold.change.threshold){
			high.confidence.category <- UP
		}else{
			high.confidence.category <- NONE
		}
		high.confidence.category.array[locus.index] <- high.confidence.category
	}
	high.confidence.category.array
}

"get.condition.to.read.counts.on.each.replicate.per.locus.array" <- function(condition.to.statistics.per.locus.file.array){
	condition.to.read.counts.on.each.replicate.per.locus.array <- hash()
	condition.to.ref.allele.read.counts.on.each.replicate.per.locus.array <- hash()
	condition.to.other.allele.read.counts.on.each.replicate.per.locus.array <- hash()

	locus.array <- NULL
	condition.array <- rownames(data.matrix(as.list.hash(condition.to.statistics.per.locus.file.array)))
	for(condition in condition.array){
		statistics.per.locus.file.array <- condition.to.statistics.per.locus.file.array[[condition]]
		data <- get.array.of.all.read.count.arrays(statistics.per.locus.file.array)
		temp.locus.array <- data$locus.array
		if(length(locus.array) == 0){
			locus.array <- temp.locus.array
		}else{
			assert.two.locus.arrays.are.identical(locus.array, temp.locus.array)
		}
		condition.to.read.counts.on.each.replicate.per.locus.array[[condition]] <- data$array.of.all.read.count.arrays
		condition.to.ref.allele.read.counts.on.each.replicate.per.locus.array[[condition]] <- data$array.of.all.ref.allele.read.count.arrays
		condition.to.other.allele.read.counts.on.each.replicate.per.locus.array[[condition]] <- data$array.of.all.other.allele.read.count.arrays
	}
	list(condition.to.read.counts.on.each.replicate.per.locus.array=condition.to.read.counts.on.each.replicate.per.locus.array,
		condition.to.ref.allele.read.counts.on.each.replicate.per.locus.array=condition.to.ref.allele.read.counts.on.each.replicate.per.locus.array,
		condition.to.other.allele.read.counts.on.each.replicate.per.locus.array=condition.to.other.allele.read.counts.on.each.replicate.per.locus.array,
		locus.array=locus.array)
}


