#!/usr/bin/env python

# Helper module for both
# get_average_frequency_of_each_base_change_from_parsed_pileup.py
# and find_mutations.py

import sys

def initialize_HQ_base_change_frequency_statistics_dictionaries(refbase_list):
	refbase_to_total_number_of_HQ_base_reads = {}
	refbase_to_number_of_HQ_base_reads_for_newbase_dict = {}

	for refbase in refbase_list:
		refbase_to_total_number_of_HQ_base_reads[refbase] = 0
		refbase_to_number_of_HQ_base_reads_for_newbase_dict[refbase] = {}
		newbase_list = [base for base in refbase_list]
		newbase_list.remove(refbase)
		for newbase in newbase_list:
			refbase_to_number_of_HQ_base_reads_for_newbase_dict[refbase][newbase] = 0

	return refbase_to_total_number_of_HQ_base_reads, refbase_to_number_of_HQ_base_reads_for_newbase_dict

def update_HQ_base_change_frequency_statistics_by_information_for_gene(line_dictionary_list_for_each_position,
							     refbase_to_total_number_of_HQ_base_reads,
							     refbase_to_number_of_HQ_base_reads_for_newbase_dict,
							     bool_eliminate_outliers,
							     prob_refbase_read_as_newbase_dict,
							     outlier_elimination_constant,
							     gene):
	if bool_eliminate_outliers:
		raise IOError, 'This option is not implemented'

	refbase_list=['A','C', 'G', 'T']
	num_positions_in_gene = len(line_dictionary_list_for_each_position)
	#print '%d positions in gene %s' % (num_positions_in_gene, gene)
	for position in range(1, (num_positions_in_gene + 1)):
		## for each position
		line_dictionary_list = line_dictionary_list_for_each_position[position]
		refbase = line_dictionary_list[0]['refbase']
		newbase_list = [base for base in refbase_list]
		if not (refbase in newbase_list):
			## Found unexpected refbase at the current position in the current gene, so skip analysis of position
			#print 'WARNING: Found unexpected refbase \'%s\' at position %d of gene %s.  Skipping analysis of this position.' % (refbase, position, gene)
			continue
		newbase_list.remove(refbase)
		for line_dictionary in line_dictionary_list:
			## for each library
			coverage_HQ = int(line_dictionary["coverage"])
			refbase_to_total_number_of_HQ_base_reads[refbase] += coverage_HQ

			for newbase in newbase_list:
				newbase_lc = newbase.lower()
				Nn_HQ = int(line_dictionary[newbase]) + int(line_dictionary[newbase_lc])
				refbase_to_number_of_HQ_base_reads_for_newbase_dict[refbase][newbase] += Nn_HQ
	return


