# assign.hybrid.expression.patterns.R

library(hash)

source("./DE.study.global.variables.R")
source("./assign.expression.patterns.helper.functions.R")

DEBUG <- FALSE

"assign.hybrid.expression.patterns" <- function(ref.parent.statistics.per.locus.file.array, other.parent.statistics.per.locus.file.array, hybrid.statistics.per.locus.file.array,
						filtered.loci.file,
						fold.change.threshold = 1.2, pvalue.threshold = 0.05, method = sFCC, 
						number.of.points.to.sample = 1000, hybrid.expression.pattern.output.directory=NA, debug=FALSE){

	parent.A.statistics.per.locus.file.array <- ref.parent.statistics.per.locus.file.array
	parent.B.statistics.per.locus.file.array <- other.parent.statistics.per.locus.file.array
	ref.parent <- PARENT.A

	## this function is hardcoded to test for non-additive expression
	simulation.type <- NAE

	## determine correct set of expression categories to use
	if(simulation.type == NAE){
		array.of.all.possible.high.confidence.categories <- non.additive.expression.category.array
	}else{
		stop(sprintf("Unsupported simulation type: \'%s\'\n", simulation.type))
	}

	condition.to.statistics.per.locus.file.array <- hash()
	condition.to.statistics.per.locus.file.array[[PARENT.A]] <- parent.A.statistics.per.locus.file.array
	condition.to.statistics.per.locus.file.array[[PARENT.B]] <- parent.B.statistics.per.locus.file.array
	condition.to.statistics.per.locus.file.array[[HYBRID]] <- hybrid.statistics.per.locus.file.array

	sequencing.data <- get.condition.to.read.counts.on.each.replicate.per.locus.array(condition.to.statistics.per.locus.file.array)
	locus.array <- sequencing.data$locus.array

	filtered.locus.hashtable <- get.locus.hashtable.from.file(filtered.loci.file)
	filtered.locus.mask <- get.filtered.locus.mask(locus.array, filtered.locus.hashtable)
	num.filtered.loci <- length(filtered.locus.mask[filtered.locus.mask])
	filtered.locus.array <- locus.array[filtered.locus.mask]

	num.loci <- dim(sequencing.data$condition.to.read.counts.on.each.replicate.per.locus.array[[PARENT.A]])[2]
	num.replicates <- dim(sequencing.data$condition.to.read.counts.on.each.replicate.per.locus.array[[PARENT.A]])[1]

	## estimate read counts for in silico hybrid from the two parents
	condition.to.read.counts.on.each.replicate.per.locus.array <- sequencing.data$condition.to.read.counts.on.each.replicate.per.locus.array
	estimate.read.counts.for.in.silico.hybrid(condition.to.read.counts.on.each.replicate.per.locus.array, num.loci)
	sequencing.data$condition.to.read.counts.on.each.replicate.per.locus.array <- condition.to.read.counts.on.each.replicate.per.locus.array


	selected.locus.mask <- array(TRUE, dim=num.loci)


	## detect the presence and absence of each type of cis or trans effect
	effect.type.to.high.confidence.category.array <- hash()
	for(effect.type in ARRAY.OF.ALL.EFFECT.TYPES){
		cat("Effect type is ", effect.type, ":\n")

		output.file=paste(hybrid.expression.pattern.output.directory, "/", effect.type, "_effect_category_assignment.txt", sep="")

		high.confidence.category.array <- use.method.to.assign.high.confidence.categories(method, fold.change.threshold, pvalue.threshold,
													num.replicates, number.of.points.to.sample,
													simulation.type, selected.locus.mask, filtered.locus.mask,
													sequencing.data, effect.type, ref.parent, num.loci, debug, output.file)


		
		## filter loci to use only filtered loci
		high.confidence.category.array <- high.confidence.category.array[filtered.locus.mask]

		high.confidence.category.to.num.loci <- get.high.confidence.category.to.num.loci.hashtable(high.confidence.category.array,
														array.of.all.possible.high.confidence.categories,
														num.filtered.loci)
		print.num.loci.for.each.high.confidence.category(high.confidence.category.to.num.loci)

		effect.type.to.high.confidence.category.array[[effect.type]] <- high.confidence.category.array
	}

	hybrid.trans.effect.high.confidence.category.array <- effect.type.to.high.confidence.category.array[[HYBRID.TRANS]]
	cis.effect.high.confidence.category.array <- effect.type.to.high.confidence.category.array[[CIS]]
	parental.trans.effect.high.confidence.category.array <- effect.type.to.high.confidence.category.array[[PARENTAL.TRANS]]

	## detect the presence and absence of additivity
	output.file=paste(hybrid.expression.pattern.output.directory, "/additivity_based_category_assignment.txt", sep="")

	cat("", append=FALSE, file=output.file)
	for(filtered.locus.index in 1:num.filtered.loci){
		filtered.locus <- filtered.locus.array[filtered.locus.index]
		high.conf.hybrid.trans.effect <- hybrid.trans.effect.high.confidence.category.array[filtered.locus.index]
		high.conf.parental.trans.effect <- parental.trans.effect.high.confidence.category.array[filtered.locus.index]
		if((high.conf.hybrid.trans.effect == NONE) && (high.conf.parental.trans.effect == NONE)){
			additive.category <- ADD
		}else if((high.conf.hybrid.trans.effect == DOWN) || (high.conf.hybrid.trans.effect == UP) ||
			 (high.conf.parental.trans.effect == DOWN) || (high.conf.parental.trans.effect == UP)){
			additive.category <- NADD
		}else{
			additive.category <- UNC
		}
		cat(sprintf("%s\t%s\n", filtered.locus, additive.category), append=TRUE, file=output.file)
	}
		
	num.loci.for.which.at.least.one.effect.is.additive <- 0
	num.loci.which.are.strictly.additive <- 0
	num.loci.which.are.strictly.nonadditive <- 0
	for(locus.index in 1:num.filtered.loci){
		hybrid.trans <- hybrid.trans.effect.high.confidence.category.array[locus.index]
		parental.trans <- parental.trans.effect.high.confidence.category.array[locus.index]
		cis <- cis.effect.high.confidence.category.array[locus.index]
		if((hybrid.trans == NONE) || (parental.trans == NONE) || (cis == NONE)){
			num.loci.for.which.at.least.one.effect.is.additive <- num.loci.for.which.at.least.one.effect.is.additive + 1
		}
		if((hybrid.trans == NONE) && (parental.trans == NONE)){
			num.loci.which.are.strictly.additive <- num.loci.which.are.strictly.additive + 1
		}
		if(((hybrid.trans == UP) || (hybrid.trans == DOWN)) ||
		   ((parental.trans == UP) || (parental.trans == DOWN))){
			num.loci.which.are.strictly.nonadditive <- num.loci.which.are.strictly.nonadditive + 1
		}
	}
	cat(sprintf("Number of (filtered) loci for which at least one effect is additive: %.0f\n", num.loci.for.which.at.least.one.effect.is.additive))
	cat(sprintf("Number of (filtered) loci which are strictly additive: %.0f\n", num.loci.which.are.strictly.additive))
	cat(sprintf("Number of (filtered) loci which are non-additive: %.0f\n", num.loci.which.are.strictly.nonadditive))
	
	cat("Table of combinations:\n")

	## for each hybrid trans-effect category
	down.additive.and.up.array <- c(DOWN, NONE, UP, UNC)
	for(i in 1:4){
		hybrid.trans.effect.category <- down.additive.and.up.array[i]

		cat("========================================\n")
		cat(sprintf("%s = %s\n", HYBRID.TRANS, hybrid.trans.effect.category))
		cat("========================================\n")

		header.line <- sprintf("%20s", PARENTAL.TRANS)
		for(k in 1:4){
			parental.trans.effect.category <- down.additive.and.up.array[k]
			header.line <- sprintf("%s%20s", header.line, parental.trans.effect.category)
		}
		cat(header.line, "\n")
		for(j in 1:4){
			cis.effect.category <- down.additive.and.up.array[j]
			output.line <- sprintf("%20s", sprintf("%s %s", CIS, cis.effect.category))
			for(k in 1:4){
				parental.trans.effect.category <- down.additive.and.up.array[k]
				num.loci.with.current.combination <- 0
				for(locus.index in 1:num.filtered.loci){
					if((hybrid.trans.effect.high.confidence.category.array[locus.index] == hybrid.trans.effect.category) &&
					   (cis.effect.high.confidence.category.array[locus.index] == cis.effect.category) &&
					   (parental.trans.effect.high.confidence.category.array[locus.index] == parental.trans.effect.category)){
						num.loci.with.current.combination <- num.loci.with.current.combination + 1
					}
				}
				output.line <- sprintf("%s%20.0f", output.line, num.loci.with.current.combination)
			}
			cat(output.line, "\n")
		}
		cat("\n")
	}
	
}

argument.array <- as.matrix(read.table("assign.hybrid.expression.patterns.arguments.file"))
if(length(argument.array) != 9){
	stop(paste("Expected 9 arguments on separate lines of the arguments file: ref.parent.statistics.per.locus.file.array, ",
			"other.parent.statistics.per.locus.file.array, hybrid.statistics.per.locus.file.array, ",
			"filtered.loci.file, fold.change.threshold, pvalue.threshold, method, ",
			"number.of.points.to.sample, output.file\n", sep=""))
}

ref.parent.statistics.per.locus.file.array <- argument.array[1]
other.parent.statistics.per.locus.file.array <- argument.array[2]
hybrid.statistics.per.locus.file.array <- argument.array[3]
filtered.loci.file <- argument.array[4]
fold.change.threshold <- as.numeric(argument.array[5])
pvalue.threshold <- as.numeric(argument.array[6])
method <- get(argument.array[7])
number.of.points.to.sample <- as.numeric(argument.array[8])
output.file <- argument.array[9]

assign.hybrid.expression.patterns(ref.parent.statistics.per.locus.file.array, other.parent.statistics.per.locus.file.array, hybrid.statistics.per.locus.file.array,
					filtered.loci.file, fold.change.threshold, pvalue.threshold, method, 
					number.of.points.to.sample, output.file, debug=DEBUG)
