#!/bin/sh
# process_mutation_candidates_helper_functions.py

import sys

def process_mutation_candidates_file(mutation_candidates_filename, use_3D_pooling):
	threshold_dict = {}
	mutation_to_line_dict = {}
	thresh_to_gained_mutation_dict = {}
	thresh_to_lost_mutation_dict = {}

	if use_3D_pooling:
		index_correction_for_gained_and_lost_thresholds = 5
	else:
		index_correction_for_gained_and_lost_thresholds = 0

	mutation_candidates_file = open(mutation_candidates_filename, 'r')
	for line in mutation_candidates_file:
		line = line.rstrip('\n\r')
		field_list = line.split('\t')
		expected_number_of_fields = 23 + index_correction_for_gained_and_lost_thresholds
		if len(field_list) != expected_number_of_fields:
			raise IOError, 'expected exactly %d fields, where last two fields comprise the FrNn_threshold range for which the given well mutation candidate will be predicted' % expected_number_of_fields

		organism = field_list[0]
		gene = field_list[1]
		base_change_in_CDS_string = field_list[2]
		base_change_in_TIL_string = field_list[3]
		row_lib_string = field_list[5]
		col_lib_string = field_list[6]
		if use_3D_pooling:
			d_lib_string = field_list[7]

		threshold_to_lose_mutation = field_list[21 + index_correction_for_gained_and_lost_thresholds]
		if (threshold_to_lose_mutation != 'NA') and (threshold_to_lose_mutation != 'nan'):
			threshold_to_lose_mutation = float(threshold_to_lose_mutation)

		threshold_to_gain_mutation = field_list[22 + index_correction_for_gained_and_lost_thresholds]
		if (threshold_to_gain_mutation == 'NA') or (threshold_to_gain_mutation == 'nan'):
			# skip mutation candidate if top scoring well has score 'nan'
			continue
		threshold_to_gain_mutation = float(threshold_to_gain_mutation)

		#if (gene == 'KRP36') and (base_change_in_TIL_string == 'G362A'):
		#	print 'threshold_to_gain_mutation: %f, threshold_to_lose_mutation: %f' % (threshold_to_gain_mutation, threshold_to_lose_mutation)

		## associate each mutation candidate with its line
		mutation_candidate_string = organism + '\t' + gene + '\t' + base_change_in_TIL_string + '\t' + row_lib_string + '\t' + col_lib_string
		if use_3D_pooling:
			mutation_candidate_string += '\t' + d_lib_string
		mutation_to_line_dict[mutation_candidate_string] = line

		#if mutation_candidate_string == 'KRP36\tT50G':
		#	#print 'DEBUG(as strings): thresh_to_gain=%s, thresh_to_lose=%s' % (field_list[22], field_list[21])
		#	#print 'DEBUG(as floats): thresh_to_gain=%.20e, thresh_to_lose=%.20e' % (threshold_to_gain_mutation, threshold_to_lose_mutation)

		#if (gene == 'KRP36') and (base_change_in_TIL_string == 'G362A'):
		#	#print thresh_to_gained_mutation_dict[threshold_to_gain_mutation]
		#	print '---- threshold_to_gain_mutation = %f' % threshold_to_gain_mutation
		#	print 'num keys in threshold dict (before) = %d' % len(threshold_dict)
		#	threshold_list = threshold_dict.keys()
		#	threshold_list.sort()
		#	threshold_list.reverse()
		#	print 'start_of_threshold_list - in process function - before adding'
		#	print threshold_list[0:30]

		## update dictionary of all thresholds
		if (threshold_to_lose_mutation != 'NA') and (threshold_to_lose_mutation != 'nan'):
			threshold_dict[threshold_to_lose_mutation] = 0
		threshold_dict[threshold_to_gain_mutation] = 0
		
		## update dictionaries of gained and lost mutations at just below each threshold

		if not (threshold_to_gain_mutation in thresh_to_gained_mutation_dict):
			thresh_to_gained_mutation_dict[threshold_to_gain_mutation] = {}
		thresh_to_gained_mutation_dict[threshold_to_gain_mutation][mutation_candidate_string] = 0

		#if (gene == 'KRP36') and (base_change_in_TIL_string == 'G362A'):
		#	#print thresh_to_gained_mutation_dict[threshold_to_gain_mutation]
		#	threshold_list = threshold_dict.keys()
		#	threshold_list.sort()
		#	threshold_list.reverse()
		#	print 'num keys in threshold dict (after) = %d' % len(threshold_dict)
		#	print 'start_of_threshold_list - in process function - after adding'
		#	print threshold_list[0:30]

		if (threshold_to_lose_mutation != 'NA') and (threshold_to_lose_mutation != 'nan'):
			if not (threshold_to_lose_mutation in thresh_to_lost_mutation_dict):
				thresh_to_lost_mutation_dict[threshold_to_lose_mutation] = {}
			thresh_to_lost_mutation_dict[threshold_to_lose_mutation][mutation_candidate_string] = 0
	mutation_candidates_file.close()

	return threshold_dict, mutation_to_line_dict, thresh_to_gained_mutation_dict, thresh_to_lost_mutation_dict


