#statistics.per.locus.file.helper.functions.R

"get.array.of.all.read.count.arrays" <- function(statistics.per.locus.file.array){
	num.files <- length(statistics.per.locus.file.array)
	
	array.of.all.read.count.arrays <- NULL
	array.of.all.ref.allele.read.count.arrays <- NULL
	array.of.all.other.allele.read.count.arrays <- NULL
	locus.array <- NULL
	for(file.index in 1:num.files){
		statistics.per.locus.file <- statistics.per.locus.file.array[file.index]
		file.data <- read.statistics.per.locus.file(statistics.per.locus.file)
		temp.locus.array <- file.data$locus.array
		if(length(locus.array) == 0){
			locus.array <- temp.locus.array
		}else{
			assert.two.locus.arrays.are.identical(locus.array, temp.locus.array)
		}
		num.loci <- length(locus.array)
		if(length(array.of.all.read.count.arrays) == 0){
			array.of.all.read.count.arrays <- matrix(NA, nrow=num.files, ncol=num.loci)
			array.of.all.ref.allele.read.count.arrays <- matrix(NA, nrow=num.files, ncol=num.loci)
			array.of.all.other.allele.read.count.arrays <- matrix(NA, nrow=num.files, ncol=num.loci)
		}
		array.of.all.read.count.arrays[file.index,] <- file.data$read.count.array
		array.of.all.ref.allele.read.count.arrays[file.index,] <- file.data$num.ref.array
		array.of.all.other.allele.read.count.arrays[file.index,] <- file.data$num.other.array
	}
	list(array.of.all.read.count.arrays=array.of.all.read.count.arrays,
		array.of.all.ref.allele.read.count.arrays=array.of.all.ref.allele.read.count.arrays,
		array.of.all.other.allele.read.count.arrays=array.of.all.other.allele.read.count.arrays,
		locus.array=locus.array)
}

"assert.two.locus.arrays.are.identical" <- function(locus.array, temp.locus.array){
	if(length(locus.array) != length(temp.locus.array)){
		stop("Internal Error: locus arrays must be of equal length")
	}
	for(i in 1:length(locus.array)){
		if(locus.array[i] != temp.locus.array[i]){
			stop(sprintf("Internal Error: found two different loci at index %d of two locus arrays", i))
		}
	}
}

"read.statistics.per.locus.file" <- function(statistics.per.locus.file){
	table <- read.table(statistics.per.locus.file)

	## Note: an empty file will not result in an empty table object - there must be a header line for this to work
	empty.table <- FALSE
	if((table[1,1] == "locus") || (table[1,1] == "locus_name")){
		if(dim(table)[1] == 1){
			empty.table <- TRUE
		}else{
			table <- table[2:(dim(table)[1]),]
		}
	}
	if(dim(table)[2] != 5){
		stop('Expected 5 tab-delimited columns in each statistics per locus input file')
	}
	if(empty.table){
		locus.array <- c()
		read.count.array <- c()
		RPKM.array <- c()
		num.ref.array <- c()
		num.other.array <- c()
	}else{
		locus.array <- data.matrix(table[,1])
		read.count.array <- as.numeric(data.matrix(table[,2]))
		RPKM.array <- as.numeric(data.matrix(table[,3]))
		num.ref.array <- as.numeric(data.matrix(table[,4]))
		num.other.array <- as.numeric(data.matrix(table[,5]))
	}
	list(locus.array=locus.array, read.count.array=read.count.array, RPKM.array=RPKM.array, num.ref.array=num.ref.array, num.other.array=num.other.array)
}

"stochastically.discard.reads" <- function(file.data, kept.fraction){
	num.loci <- length(file.data$locus.array)
	new.read.count.array <- array(NA, dim=num.loci)
	new.RPKM.array <- array(NA, dim=num.loci)
	new.num.ref.array <- array(NA, dim=num.loci)
	new.num.other.array <- array(NA, dim=num.loci)

	for(i in 1:num.loci){
		read.count <- file.data$read.count.array[i]
		RPKM <- file.data$RPKM.array[i]
		num.ref <- file.data$num.ref.array[i]
		num.other <- file.data$num.other.array[i]
		
		num.allele.independent.reads <- read.count - num.ref - num.other
		new.num.ref <- rbinom(1, num.ref, kept.fraction)
		new.num.other <- rbinom(1, num.other, kept.fraction)
		new.num.allele.independent.reads <- rbinom(1, num.allele.independent.reads, kept.fraction)
		new.read.count <- new.num.ref + new.num.other + new.num.allele.independent.reads
		new.RPKM <- RPKM * (new.read.count / read.count)
		
		new.read.count.array[i] <- new.read.count
		new.RPKM.array[i] <- new.RPKM
		new.num.ref.array[i] <- new.num.ref
		new.num.other.array[i] <- new.num.other
	}
	list(locus.array=file.data$locus.array, read.count.array=new.read.count.array, RPKM.array=new.RPKM.array, num.ref.array=new.num.ref.array, num.other.array=new.num.other.array)
}
	
"print.file.data.to.output.file" <- function(file.data, output.file){
	cat('locus_name\tnum_reads\tRPKM\tNumRef\tNumOth\n', file=output.file, append=FALSE)
	
	num.loci <- length(file.data$locus.array)
	for(i in 1:num.loci){
		locus <- file.data$locus.array[i]
		read.count <- file.data$read.count.array[i]
		RPKM <- file.data$RPKM.array[i]
		num.ref <- file.data$num.ref.array[i]
		num.other <- file.data$num.other.array[i]
		if(is.na(RPKM)){
			RPKM.string <- 'NA'
		}else{
			RPKM.string <- sprintf('%.6f', RPKM)
		}
		cat(sprintf('%s\t%d\t%s\t%d\t%d\n', locus, read.count, RPKM.string, num.ref, num.other), file=output.file, append=TRUE)
	}
}


