#!/usr/bin/env python
import sys

LIBRARY_LIST = ['R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12']


if len(sys.argv) != 2:
	raise IOError, './get_orphan_statistics_from_mutation_candidates_file.py mutation_candidates_file'

def get_orphan_statistics_from_mutation_candidates_file(mutation_candidates_filename):
	gene_to_num_orphan_candidates = {}
	gene_to_num_row_orphan_candidates = {}
	gene_to_num_column_orphan_candidates = {}
	gene_to_num_well_candidates = {}
	num_standard_orphan_candidates = 0
	num_non_standard_orphan_candidates = 0
	num_standard_well_candidates = 0
	num_non_standard_well_candidates = 0

	library_to_num_orphan_candidates = {}
	for library in LIBRARY_LIST:
		library_to_num_orphan_candidates[library] = 0

	mutation_candidates_file = open(mutation_candidates_filename, 'r')
	for line in mutation_candidates_file:
		line = line.strip()
		field_list = line.split('\t')
		species = field_list[0]
		gene = field_list[1]
		if (species == "Species") or (gene == "Gene"):
			# header
			continue

		row_library = field_list[5]
		column_library = field_list[6]

		if not (gene in gene_to_num_well_candidates):
			gene_to_num_well_candidates[gene] = 0
		if not (gene in gene_to_num_orphan_candidates):
			gene_to_num_orphan_candidates[gene] = 0
		if not (gene in gene_to_num_column_orphan_candidates):
			gene_to_num_column_orphan_candidates[gene] = 0
		if not (gene in gene_to_num_row_orphan_candidates):
			gene_to_num_row_orphan_candidates[gene] = 0

		pos_cds = field_list[2]
		refbase, mutbase = pos_cds[0], pos_cds[len(pos_cds)-1]

		if (row_library != 'NA') and (column_library != 'NA'):
			# well candidate
			gene_to_num_well_candidates[gene] += 1
			
			if (not row_library.isdigit()) or (not column_library.isdigit()):
				raise IOError, "Invalid row or column library name (%s,%s)" % (row_library, column_library)

			if ((refbase == 'G') and (mutbase == 'A')) or ((refbase == 'C') and (mutbase == 'T')):
				#std
				num_standard_well_candidates += 1
			else:
				#nonstd
				num_non_standard_well_candidates += 1
		else:
			# orphan candidate
			if ((refbase == 'G') and (mutbase == 'A')) or ((refbase == 'C') and (mutbase == 'T')):
				#std
				num_standard_orphan_candidates += 1
			else:
				#nonstd
				num_non_standard_orphan_candidates += 1
			gene_to_num_orphan_candidates[gene] += 1
			if row_library == 'NA':
				gene_to_num_column_orphan_candidates[gene] += 1
				library = 'C' + str(column_library)
			if column_library == 'NA':
				gene_to_num_row_orphan_candidates[gene] += 1
				library = 'R' + str(row_library)
			library_to_num_orphan_candidates[library] += 1
	
	print 'Gene' + '\t\t' + 'Well' + '\t\t' + 'Orphan' + '\t\t' + 'Row' + '\t\t' + 'Column'
	for gene in gene_to_num_well_candidates:
		print gene + '\t\t' + str(gene_to_num_well_candidates[gene]) + '\t\t' + str(gene_to_num_orphan_candidates[gene]) + '\t\t' + \
				('%.3f' % (float(gene_to_num_row_orphan_candidates[gene]) / float(8))) + '\t\t' + ('%.3f' % (float(gene_to_num_column_orphan_candidates[gene]) / float(12)))
	for library in LIBRARY_LIST:
		print '%s\t%d' % (library, library_to_num_orphan_candidates[library])
	print 'Num standard orphan candidates: %d' % num_standard_orphan_candidates
	print 'Num non-standard orphan candidates: %d' % num_non_standard_orphan_candidates
	print 'Num standard well candidates: %d' % num_standard_well_candidates
	print 'Num non-standard well candidates: %d' % num_non_standard_well_candidates

#def is_library_name(name):
#	if (name[0:2] == 'T3') and ((name[2] == 'C') or (name[2] == 'R')) and name[3].isdigit():
#		return True
#	else:
#		return False

get_orphan_statistics_from_mutation_candidates_file(sys.argv[1])

