#!/usr/bin/env /usr/local/bin/python2.6
# find_mutations.py

########################################
## Script encoding the CAMBa Algorithm #
##   implemented by Victor Missirian   #
########################################

# 2/24/2009 - removed code for Laplace's Rule of Succession

import sys, math, scipy
import scipy.stats
import scipy.stats.distributions

# these modules are part of the pipeline:
from homolog_alignment import *
from input_output import *
from HQ_base_change_frequency import *

GLOBAL_DEBUG = False

# Note: This script assumes that EMS (applied to seeds) was used as the mutagenizing agent.
#       The expected overall rate of mutation for several different organisms is stored in this script.
#       The expected fraction of all mutations that are G/C -> A/T is also stored.

# Note: The script has an option to apply the tilling offset to the position of each candidate mutation before printing the candidate mutation.

global_restriction_enzyme_recognition_sequence_filename = "../REBASE/type_2_restriction_enzyme_prototype_to_recognition_sequence-commercially_available"

## Global parameters:

global_well_only = True 


# Only prints feasible single-library candidates when global_well_only is set to 'False'

# THIS OPTION IS ONLY IMPLEMENTED FOR THE 'z_scores' METHOD:

global_print_all_feasible_candidates = False

global_mutation_factor_threshold_for_computing_restricion_enzymes_gained_and_lost = 0.999999

## global_potential_mutation_pvalue_threshold is now set through the \'threshold_for_method\' parameter!
#global_potential_mutation_pvalue_threshold = 0.9999
global_pvalue_threshold_for_potential_parent_of_orphan = 9.99999e-1

# is the following option deprecated?:
OUTLIER_ELIMINATION_CONSTANT_FOR_DETERMINING_BASE_CHANGE_FREQUENCIES = 2

#global_FrHQ_threshold_standard_EMS_mutation_candidate = 0.5
#global_FrHQ_threshold_not_standard_EMS_mutation_candidate = 0.7
global_FrHQ_threshold_standard_EMS_mutation_candidate =  0
global_FrHQ_threshold_not_standard_EMS_mutation_candidate =  0

global_distance_threshold_from_ends_of_sequences_for_homolog_dilution = 100

global_print_extra_statistics = False
global_verbose = False





## This will be set to 'True' if the current experiment is T7 tomato and rice:

T7_tomato_and_rice=False



## Change these global parameters as input file changes:


## 2-D pooling scheme:
global_use_3D_pooling = False
global_num_pools_per_row_library = 12
global_num_pools_per_column_library = 8
global_num_pools_per_d_library = 'NA'

## 3-D pooling_scheme
#global_use_3D_pooling = True
#global_num_pools_per_row_library = 48
#global_num_pools_per_column_library =  48
#global_num_pools_per_d_library = 64

# Tilling2
#global_num_individuals_per_pool = 8
#global_number_of_libraries = 18
#global_tilling_run_prefix = "T2"
#global_row_library_list = ["T2R1", "T2R2", "T2R3", "T2R4", "T2R5", "T2R6", "T2R7", "T2R8"]
#global_column_library_list = ["T2C1", "T2C2", "T2C3", "T2C6", "T2C7", "T2C8", "T2C9", "T2C10", "T2C11", "T2C12"]
#global_extra_library_name_modifiers_list = []
#global_read_length = 35



# Tilling3
#global_num_individuals_per_pool = 8
#global_number_of_libraries = 20
#global_tilling_run_prefix = "T3"
#global_row_library_list = ["T3R1", "T3R2", "T3R3", "T3R4", "T3R5", "T3R6", "T3R7Dprime", "T3R8Dprime"]
#global_column_library_list = ["T3C1prime", "T3C2prime", "T3C3prime", "T3C4prime", "T3C5prime", "T3C6", "T3C7", "T3C8", "T3C9", "T3C10", "T3C11", "T3C12"]
#global_extra_library_name_modifiers_list = ['Dprime', 'prime']
#global_read_length = 35

# Tilling3 - Row vs. Row
#global_num_individuals_per_pool = 8
#global_number_of_libraries = 8
#global_tilling_run_prefix = "T3"
#global_row_library_list = ["T3R1", "T3R2", "T3R3", "T3R4"]
#global_column_library_list = ["T3R5", "T3R6", "T3R7Dprime", "T3R8Dprime"]
#global_extra_library_name_modifiers_list = ['Dprime', 'prime']
#global_read_length = 35

# Tilling3 - Column vs. Column
#global_num_individuals_per_pool = 8
#global_number_of_libraries = 12
#global_tilling_run_prefix = "T3"
#global_row_library_list = ["T3C1prime", "T3C2prime", "T3C3prime", "T3C4prime", "T3C5prime", "T3C6"]
#global_column_library_list = ["T3C7", "T3C8", "T3C9", "T3C10", "T3C11", "T3C12"]
#global_extra_library_name_modifiers_list = ['Dprime', 'prime']
#global_read_length = 35
#


# Tilling4
#global_num_individuals_per_pool = 8
#global_number_of_libraries = 20
#global_tilling_run_prefix = "T4"
#global_row_library_list = ["T4R1", "T4R2", "T4R3", "T4R4", "T4R5", "T4R6", "T4R7", "T4R8"]
#global_column_library_list = ["T4C1", "T4C2", "T4C3", "T4C4", "T4C5", "T4C6", "T4C7", "T4C8", "T4C9", "T4C10", "T4C11", "T4C12"]
#global_extra_library_name_modifiers_list = []
#global_read_length = 35



# Tilling5
global_num_individuals_per_pool = 8
global_number_of_libraries = 20
global_tilling_run_prefix = "T5"
global_row_library_list = ["T5R1", "T5R2", "T5R3", "T5R4", "T5R5", "T5R6", "T5R7", "T5R8"]
global_column_library_list = ["T5C1", "T5C2", "T5C3", "T5C4", "T5C5", "T5C6", "T5C7", "T5C8", "T5C9", "T5C10", "T5C11", "T5C12"]
global_extra_library_name_modifiers_list = []
global_read_length = 35

# Tilling5 Row vs. Row
#global_num_individuals_per_pool = 8
#global_number_of_libraries = 8
#global_tilling_run_prefix = "T5"
#global_row_library_list = ["T5R1", "T5R2", "T5R3", "T5R4"]
#global_column_library_list = ["T5R5", "T5R6", "T5R7", "T5R8"]
#global_extra_library_name_modifiers_list = []
#global_read_length = 35

# Tilling5 Column vs. Column 
#global_num_individuals_per_pool = 8
#global_number_of_libraries = 12
#global_tilling_run_prefix = "T5"
#global_row_library_list = ["T5C1", "T5C2", "T5C3", "T5C4", "T5C5", "T5C6"]
#global_column_library_list = ["T5C7", "T5C8", "T5C9", "T5C10", "T5C11", "T5C12"]
#global_extra_library_name_modifiers_list = []
#global_read_length = 35



#Tilling6
#global_num_individuals_per_pool = 8
#global_number_of_libraries = 20
#global_tilling_run_prefix = "T6"
#global_row_library_list = ["T6R1", "T6R2", "T6R3", "T6R4", "T6R5", "T6R6", "T6R7", "T6R8"]
#global_column_library_list = ["T6C1", "T6C2", "T6C3", "T6C4", "T6C5", "T6C6", "T6C7", "T6C8", "T6C9", "T6C10", "T6C11", "T6C12"]
#global_extra_library_name_modifiers_list = []
#global_read_length = 35


## Original Test for False Positives:
#global_num_individuals_per_pool = 8
#global_number_of_libraries = 12
#global_tilling_run_prefix = "T3"
#global_row_library_list = ["T3C1prime", "T3C2prime", "T3C3prime", "T3C4prime", "T3C5prime", "T3C6"]
#global_column_library_list = ["T3C7", "T3C8", "T3C9", "T3C10", "T3C11", "T3C12"]
#global_extra_library_name_modifiers_list = ['Dprime', 'prime']
#global_read_length = 35


# Tilling7 - Tomato and Rice
#T7_tomato_and_rice=True
#global_num_individuals_per_pool = 1
#global_number_of_libraries = 15
#global_tilling_run_prefix = "T7"
#global_row_library_list = ["T7R1", "T7R2", "T7R3", "T7R4"]
#global_column_library_list = ["T7C9", "T7C10", "T7C11", "T7C12", 'T7C13', 'T7C14', 'T7C15', 'T7C16']
#global_d_library_list = ["T7D7", 'T7D8', 'T7D9']
#global_extra_library_name_modifiers_list = []
#global_read_length = 35

# Tilling7 - Wheat
#global_num_individuals_per_pool = 8
#global_number_of_libraries = 11
#global_tilling_run_prefix = "T7"
#global_row_library_list = ["T7R1", "T7R2", "T7R3", "T7R4", 'T7R5', 'T7R6', 'T7R7', 'T7R8']
#global_column_library_list = ['T7C7', 'T7C8', 'T7C9']
#global_d_library_list = 'NA'
#global_extra_library_name_modifiers_list = []
#global_read_length = 35



if T7_tomato_and_rice:
	well_combination_tuple_list = []
	well_combination_tuple_list.append((range(1, 9), range(1, 5), range(1, 7)))
	well_combination_tuple_list.append((range(9, 17), range(5, 9), range(1, 7)))
	well_combination_tuple_list.append((range(1, 9), range(9, 13), range(7, 13)))
	well_combination_tuple_list.append((range(9, 17), range(13, 17), range(7, 13)))

	global_well_dict = {}
	for tuple in well_combination_tuple_list:
		(row_num_list, col_num_list, d_num_list) = tuple
		for row_num in row_num_list:
			for col_num in col_num_list:
				for d_num in d_num_list:
					well_string = '%d\t%d\t%d' % (row_num, col_num, d_num)
					global_well_dict[well_string] = 0
else:
	global_well_dict = 'NA'


def well_is_in_global_well_dict(row_library_name, col_library_name, d_library_name):
	if global_well_dict == 'NA':
		raise IOError, 'global_well_dict should be specified if this function is being called'
	row_num = get_number_from_library_name(row_library_name)
	col_num = get_number_from_library_name(col_library_name)
	d_num = get_number_from_library_name(d_library_name)
	well_string = '%d\t%d\t%d' % (row_num, col_num, d_num)
	if well_string in global_well_dict:
		result = True
	else:
		result = False
	return result


## These global variables will be reset
global_tilling_sequence = []

global_organism = ''
global_organism_mutation_rate = 'NA'

global_gene_name = ""


## Global constants
# splice = splicing error
# PSM = possibly severe (missense) mutation
# NSM = (probably) not severe mutation
global_organism_name_to_abbreviation_dict = {"Camelina_sativa":"Cs", "Oryza_sativa":"Os", "Triticum_aestivum":"Ta", "Triticum_durum":"Td", "arabidopsis_tetraploid":"At", "Solanum_lycopersicum":"Le"}

global_header_field_list = ["refseq", "position", "library", "refbase", "A", "a", "T", "t", "C", "c", "G", "g", "comma", "dot", "coverage", \
				"FrAa", "FrTt", "FrCc", "FrGg", "SkewA", "SkewT", "SkewC", "SkewG", "MQ(Aa)", "MQ(Tt)", "MQ(Cc)", "MQ(Gg)", \
				"MQ(CommaDot)", "MQ(all)", "deltaQ-A", "deltaQ-T", "deltaQ-C", "deltaQ-G", \
				"Aa_HQ_and_LQ",	"Tt_HQ_and_LQ",	"Cc_HQ_and_LQ",	"Gg_HQ_and_LQ",	"ref_HQ_and_LQ", \
				"FrHQ_for_Aa", "FrHQ_for_Tt", "FrHQ_for_Cc", "FrHQ_for_Gg", "FrHQ_for_ref", "quality_cutoff"]

global_heterozygous_row_mutation_fraction = 1.0 / float(global_num_pools_per_row_library * global_num_individuals_per_pool * 2)
global_heterozygous_column_mutation_fraction = 1.0 / float(global_num_pools_per_column_library * global_num_individuals_per_pool * 2)
global_homozygous_row_mutation_fraction = 2.0 * global_heterozygous_row_mutation_fraction
global_homozygous_column_mutation_fraction = 2.0 * global_heterozygous_column_mutation_fraction
if global_use_3D_pooling:
	global_heterozygous_d_library_mutation_fraction = 1.0 / float(global_num_pools_per_d_library * global_num_individuals_per_pool * 2)
	global_homozygous_d_library_mutation_fraction = 2.0 * global_heterozygous_d_library_mutation_fraction


if len(sys.argv) < 11:
	raise IOError, 'Usage: ./find_mutations.py parsed_pileup_file lines_to_skip organism ' \
			+ '("standard_output" | "parsed_pileup_subset") tilling_sequences_file ' \
			+ 'gene_name clustal_multiple_alignment_file contamination_test method_to_use threshold_for_method [ homolog1 homolog2 ... ]'


global_organism = sys.argv[3]
if global_organism == "Oryza_sativa":
	#global_organism_mutation_rate = (1.0 / 350000.0)
	# the following gc content fraction is from refseq project for Oryza sativa ssp japonica (on NCBI)
	#global_organism_gc_fraction = 0.43

	## New results from Comai (and other authors) Rice TILLING paper (using CEL-1 to identify mutations)
	global_organism_mutation_rate = 'NA'
	global_organism_gc_fraction = 'NA'
elif global_organism == "Camelina_sativa":
	global_organism_mutation_rate = (1.0 / 50000.0)
	# the following gc content fraction is from refseq project for A. thaliana on NCBI
	global_organism_gc_fraction = 0.35
elif global_organism == "arabidopsis_tetraploid":
	global_organism_mutation_rate = (1.0 / 50000.0)
	# the following gc content fraction is from refseq project for A. thaliana on NCBI (probably not 4x At)
	global_organism_gc_fraction = 0.35
elif global_organism == "Triticum_aestivum":
	global_organism_mutation_rate = (1.0 / 25000.0)
	# the following gc content fraction is from information for annotated wheat mRNAs at link
	# <http://www.tigr.org/tdb/e2k1/tae1/info.shtml#Genes>
	global_organism_gc_fraction = 0.489
elif global_organism == "Triticum_durum":
	###### NOTE: These values are computed using information from the Uauy et al. 2009 paper
	###### 	     on TILLING Triticum durum
	#global_organism_mutation_rate = 1.176999e-05 # OLD
	#global_organism_mutation_rate = 1.471249e-05 # OLD #2 (MULTIPLY BY 25%)
	#global_organism_gc_fraction = 0.373959 # OLD and OLD #2
	#global_organism_mutation_rate = 1.420340e-05 # NEWEST (don't consider first & last ~10% of TILLING sequences)
	#global_organism_gc_fraction = 0.380265 # NEWEST (don't consider first & last ~10% of TILLING sequences)
	global_organism_mutation_rate = 'NA'
	global_organism_gc_fraction = 'NA'
	
	## Commented out old values:

	#global_organism_mutation_rate = (1.0 / 50000.0)
	## the following gc content fraction is from information for annotated wheat mRNAs at link
	## <http://www.tigr.org/tdb/e2k1/tae1/info.shtml#Genes>
	#global_organism_gc_fraction = 0.489
elif global_organism == "Solanum_lycopersicum":
	#global_organism_mutation_rate = (1.0 / 300000.0)
	global_organism_mutation_rate = 'NA'
	# the following gc content fraction is from a table entry for BAC end sequence data from Table 1 in 
	# Zhu  et al. BMC Genomics 2008 9:286
	#global_organism_gc_fraction = 0.362
	global_organism_gc_fraction = 'NA'
else:
	raise IOError, ("Script does not have mutation rate for organism: " + global_organism)


contamination_test_string = sys.argv[8]
global_is_contamination_test = 'NA'
if contamination_test_string == 'true':
	global_is_contamination_test = True
elif contamination_test_string == 'false':
	global_is_contamination_test = False
else:
	raise IOError, 'Usage error: parameter \'contamination_test\' must be set to either \'true\' or \'false\''

global_method_to_use = sys.argv[9]
global_threshold_for_method = sys.argv[10]
if global_threshold_for_method != 'NA':
	global_threshold_for_method = float(global_threshold_for_method)

if global_method_to_use == 'bayesian':
	global_potential_mutation_pvalue_threshold = global_threshold_for_method
elif global_method_to_use == 'z_scores':
	global_z_score_threshold = global_threshold_for_method
elif global_method_to_use == 'alternate':
	global_FrNn_threshold = global_threshold_for_method
elif global_method_to_use == 'outlier':
	global_outlier_pvalue_threshold = global_threshold_for_method
elif global_method_to_use == 'poisson':
	global_poisson_pvalue_threshold = global_threshold_for_method
else:
	raise IOError, 'Usage error: parameter \'method_to_use\' must be set to either \'z_scores,\' \'alternate,\' or \'bayesian\''

# Put in a helper function module?
def probability_of_union_of_N_independent_events_of_equal_probability(N, prob):
	prob_union_of_accumulated_events = 0.0
	for i in range(0, N):
		prob_union_of_accumulated_events = probability_of_union_of_two_independent_events(prob_union_of_accumulated_events, prob)
	return prob_union_of_accumulated_events

def probability_of_union_of_two_independent_events(p_x, p_y):	
	return p_x + p_y - (p_x * p_y)

def find_mutations(parsed_pileup_filename, lines_to_skip, output_option, \
		tilling_sequences_filename, gene_name, clustal_multiple_alignment_filename, homolog_list):
	global global_tilling_sequence, \
			global_extra_library_name_modifiers_list, \
			global_gene_name
	
	# TEMPORARY:
	global_gene_name = gene_name

	
	## in the implementation for a 2D pooling scheme, when considering a given well, we set the value of the chosen d library to be 'NA'
	if global_use_3D_pooling and ('NA' in global_d_library_list):
		raise IOError, 'Error: should not name any d libraries as \'NA\''

	
	# The sorting of the modifier list makes sure that we remove the largest applicable modifiers first, when determining the row/column/d library number
	#
	# We do not want this to happen:
	#
	#	'R8Dprime' ==> 'R8D'
	#
	# in the case where both 'Dprime' and 'prime' are applicable modifiers, but 'D' is not
	global_extra_library_name_modifiers_list = sort_string_list_by_length(global_extra_library_name_modifiers_list)
	global_extra_library_name_modifiers_list.reverse()

	global_tilling_sequence = get_sequence_from_file(gene_name, tilling_sequences_filename)

	line_dictionary_list_for_each_position = read_parsed_pileup_file(parsed_pileup_filename, lines_to_skip, gene_name)

	# get number of positions in gene
	list_of_positions = line_dictionary_list_for_each_position.keys()
	list_of_positions.sort()
	num_positions_in_gene = list_of_positions[len(list_of_positions) - 1]
	#print "Number of positions in gene: ", num_positions_in_gene, "\n"




	## Estimate probability that each base is read as a different base
	estimated_prob_refbase_read_as_not_refbase_dict, prob_of_each_base_change_on_refbase_looks_reasonable_dict = get_estimated_prob_refbase_read_as_not_refbase_dict(line_dictionary_list_for_each_position)



	# check homolog list for errors
	if list_contains_duplicate_elements(homolog_list) == True:
		raise IOError, 'A homolog was entered multiple times'
	if gene_name in homolog_list:
		raise IOError, 'Entered the gene name (' + gene_name + ') as a homolog'

	gene_to_tilling_sequence_dict = {}
	for homolog in homolog_list:
		gene_to_tilling_sequence_dict[homolog] = get_sequence_from_file(homolog, tilling_sequences_filename)
	gene_to_tilling_sequence_dict[gene_name] = get_sequence_from_file(gene_name, tilling_sequences_filename)
	if gene_to_tilling_sequence_dict[gene_name] != global_tilling_sequence:
		raise IOError, 'Error: read inconsistent tilling sequences for gene: \'%s\'' % gene_name

	if len(homolog_list) != 0:
		probability_read_from_gene_at_position_maps_to_gene_dict = {}
		homolog_alignment_dict = get_homolog_alignment_dictionary(clustal_multiple_alignment_filename, homolog_list, gene_name)
		probability_read_maps_to_gene_dict = get_probability_read_maps_to_gene_dictionary(gene_name, homolog_list, homolog_alignment_dict, gene_to_tilling_sequence_dict, global_read_length)
		probability_read_at_position_maps_to_gene_dict = \
				get_probability_read_at_position_maps_to_gene_dictionary(gene_name, homolog_list, probability_read_maps_to_gene_dict, gene_to_tilling_sequence_dict, global_read_length)

		# compute read mapping probability distribution for each gene
		gene_to_homolog_alignment_dict, probability_read_from_gene_at_position_maps_to_gene_dict = \
				compute_homolog_alignment_and_read_mapping_probability_distribution_for_each_gene((homolog_list + [gene_name]),
														clustal_multiple_alignment_filename, 
														gene_to_tilling_sequence_dict, global_read_length)

		## This is simply double-checking:
		if probability_read_at_position_maps_to_gene_dict != probability_read_from_gene_at_position_maps_to_gene_dict[gene_name]:
			raise IOError, 'Internal error: Inconsistent computation of read mapping probability distribution for reads from gene: \'%s\'' % gene_name
		if homolog_alignment_dict != gene_to_homolog_alignment_dict[gene_name]:
			raise IOError, 'Internal error: Incomsistent homolog alignment for gene \'%s\'' + gene_name

		# get line dictionary list for each position, for the specified gene and each of its homologs
		gene_to_line_dictionary_list_for_each_position = {}
		gene_to_line_dictionary_list_for_each_position[gene_name] = line_dictionary_list_for_each_position
		for homolog in homolog_list:
			gene_to_line_dictionary_list_for_each_position[homolog] = read_parsed_pileup_file(parsed_pileup_filename, lines_to_skip, homolog)

		estimated_real_coverage_file = open("intermediate_files/estimated_real_coverage_log", 'a')
		library_and_position_to_estimated_real_coverage_for_specified_gene_dict = \
				get_library_and_position_to_estimated_real_coverage_for_specified_gene_dict(gene_name, homolog_list, 
													    gene_to_line_dictionary_list_for_each_position,
													    probability_read_from_gene_at_position_maps_to_gene_dict, 
													    gene_to_homolog_alignment_dict,
													    gene_to_tilling_sequence_dict,
													    estimated_real_coverage_file)
		estimated_real_coverage_file.close()


	candidate_positions_list = []
	candidate_list_for_gene = []
	number_of_candidate_mutations_per_position_to_frequency_dict = {}

	native_fraction_of_base_reads_file = open("intermediate_files/native_fraction_of_base_reads_log", 'a')
	# only consider EMS-type mutations
	#raise IOError, 'Deal with duplicate parsed pileup lines for homolog issue'
	for position in range(1, (num_positions_in_gene + 1)):	
		#print 'VDEBUG: position = %d' % position
		if len(homolog_list) != 0:
			read_mapping_probability_for_gene_list = [(probability_read_from_gene_at_position_maps_to_gene_dict[gene_name][position][gene], gene) \
								  for gene in probability_read_from_gene_at_position_maps_to_gene_dict[gene_name][position]]
			read_mapping_probability_for_gene_list.sort()
			read_mapping_probability_for_gene_list.reverse()
			gene_list_ordered_by_read_mapping_probability = [tuple[1] for tuple in read_mapping_probability_for_gene_list]
			homolog_list_ordered_by_read_mapping_probability = [gene for gene in gene_list_ordered_by_read_mapping_probability]
			homolog_list_ordered_by_read_mapping_probability.remove(gene_name)

			homolog_to_read_mapping_probability_dict = {}
			for homolog in homolog_list:
				homolog_to_read_mapping_probability_dict[homolog] = probability_read_from_gene_at_position_maps_to_gene_dict[gene_name][position][homolog]

			#print "DEBUG: position =", position
			#for tuple in read_mapping_probability_for_gene_list:
			#	print "DEBUG: " + tuple[1] + '\t%.3f' % tuple[0]

			#print 'Gene list: ' + str(gene_list_ordered_by_read_mapping_probability) + '\n\n'
			#print 'Homolog list: ' + str(homolog_list_ordered_by_read_mapping_probability) + '\n\n'


			library_to_fraction_of_base_reads_mapping_to_global_gene_that_come_from_global_gene_dict \
					= estimate_fraction_of_base_reads_mapping_to_specified_gene_that_come_from_specified_gene_at_position_for_each_library(
														gene_name, position,
														library_and_position_to_estimated_real_coverage_for_specified_gene_dict,
														gene_to_line_dictionary_list_for_each_position, 
														probability_read_from_gene_at_position_maps_to_gene_dict,
														native_fraction_of_base_reads_file)
		else:
			# If there are no homologs for the global gene, then assume 100% accurate read mapping
			library_to_fraction_of_base_reads_mapping_to_global_gene_that_come_from_global_gene_dict = {}
			if global_use_3D_pooling:
				list_of_all_libraries = (global_row_library_list + global_column_library_list + global_d_library_list)
			else:
				list_of_all_libraries = (global_row_library_list + global_column_library_list)
			for library in list_of_all_libraries:
				library_to_fraction_of_base_reads_mapping_to_global_gene_that_come_from_global_gene_dict[library] = 1.0

		predict_mutation_at_position = evaluate_position_in_gene(line_dictionary_list_for_each_position[position], position, 
									 library_to_fraction_of_base_reads_mapping_to_global_gene_that_come_from_global_gene_dict,
					  				 candidate_list_for_gene,
									 number_of_candidate_mutations_per_position_to_frequency_dict, estimated_prob_refbase_read_as_not_refbase_dict,
									 prob_of_each_base_change_on_refbase_looks_reasonable_dict, output_option)
		if predict_mutation_at_position:
			candidate_positions_list.append(position)
		#else:
		#	if len(homolog_list) != 0:
		#		# if we did not predict a mutation at the position,
		#		# check for the case where the mutation signal is watered down across gene homologs
		#		predict_mutation_at_position = check_for_a_mutation_signal_spread_out_over_homologs(gene_name, homolog_list, position,
		#													gene_to_line_dictionary_list_for_each_position,
		#													homolog_list_ordered_by_read_mapping_probability,
		#													
		#													candidate_list_for_gene,
		#													estimated_prob_refbase_read_as_not_refbase_dict,
		#													prob_of_each_base_change_on_refbase_looks_reasonable_dict,
		#													output_option, 
		#													gene_to_homolog_alignment_dict[gene_name],
		#													homolog_to_read_mapping_probability_dict)
		#		if predict_mutation_at_position:
		#			candidate_positions_list.append(position)

	native_fraction_of_base_reads_file.close()
		
	if output_option == "parsed_pileup_subset":
		header_line = ""
		header_line = header_line + global_header_field_list[0]
		for header_field in global_header_field_list[1:]:
			header_line = header_line + '\t' + header_field
		# I commented out the header-printing so that I can automatically run the script multiple times:
		#print header_line
		for position in candidate_positions_list:
			print_parsed_pileup_lines_for_line_dictionary_list(line_dictionary_list_for_each_position[position])
	elif output_option == "standard_output":
		# EXTRA STATISTICS:
		if global_print_extra_statistics:
			num_candidate_mutations_list = number_of_candidate_mutations_per_position_to_frequency_dict.keys()
			num_candidate_mutations_list.sort()
			for num_candidate_mutations in num_candidate_mutations_list:
				print 'STATISTICS: \t' + str(num_candidate_mutations) + ' wells - ' + str(number_of_candidate_mutations_per_position_to_frequency_dict[num_candidate_mutations])

		if global_verbose == False:
			print_candidates_for_gene__z_scores_method_or_alternate_method(candidate_list_for_gene)
	else:
		raise IOError, 'Invalid output option: ' + output_option

	return

def get_estimated_prob_refbase_read_as_not_refbase_dict(line_dictionary_list_for_each_position):
	bool_eliminate_outliers = False
	prob_refbase_read_as_newbase_dict = 'NA'
	
	base_list = ['A', 'C', 'G', 'T']
	refbase_to_total_number_of_HQ_base_reads, refbase_to_number_of_HQ_base_reads_for_newbase_dict \
		= initialize_HQ_base_change_frequency_statistics_dictionaries(base_list)

	update_HQ_base_change_frequency_statistics_by_information_for_gene(line_dictionary_list_for_each_position,
									   refbase_to_total_number_of_HQ_base_reads,
									   refbase_to_number_of_HQ_base_reads_for_newbase_dict,
									   bool_eliminate_outliers,
						    			   prob_refbase_read_as_newbase_dict,
						    			   OUTLIER_ELIMINATION_CONSTANT_FOR_DETERMINING_BASE_CHANGE_FREQUENCIES,
									   global_gene_name)

	#prob_refbase_read_as_newbase_dict = {}
	#base_list = ['A', 'C', 'G', 'T']
	#for refbase in base_list:
	#	other_bases = [elt for elt in base_list]
	#	other_bases.remove(refbase)
	#
	#	if not (refbase in prob_refbase_read_as_newbase_dict):
	#		prob_refbase_read_as_newbase_dict[refbase] = {}
	#	
	#	total_HQ_base_reads_for_refbase = refbase_to_total_number_of_HQ_base_reads[refbase]
	#	for newbase in other_bases:
	#		prob_refbase_read_as_newbase_dict[refbase][newbase] = \
	#			((float(refbase_to_number_of_HQ_base_reads_for_newbase_dict[refbase][newbase]) + 1) / (float(total_HQ_base_reads_for_refbase) + 4))
	
		#if bool_eliminate_outliers:
		#	## break after second pass of determining base change frequencies
		#	break
		#bool_eliminate_outliers = True


	## Determine probability that each refbase is read incorrectly
	prob_refbase_read_as_not_refbase_dict = {}
	prob_of_each_base_change_on_refbase_looks_reasonable_dict = {}

	for refbase in base_list:
		other_bases = [elt for elt in base_list]
		other_bases.remove(refbase)
		
		prob_of_each_base_change_on_refbase_looks_reasonable_dict[refbase] = True

		total_HQ_base_reads_for_refbase = refbase_to_total_number_of_HQ_base_reads[refbase]

		total_HQ_base_change_reads_for_refbase = 0
		for newbase in other_bases:
			prob_refbase_read_as_newbase = \
				((float(refbase_to_number_of_HQ_base_reads_for_newbase_dict[refbase][newbase]) + 1) / (float(total_HQ_base_reads_for_refbase) + 4))
			if prob_refbase_read_as_newbase > 0.01:
				#raise IOError, 'Unexpectedly high error rate for [%s -> %s] base changes: %f' % (refbase, newbase, prob_refbase_read_as_newbase_dict[refbase][newbase])
				prob_of_each_base_change_on_refbase_looks_reasonable_dict[refbase] = False
			total_HQ_base_change_reads_for_refbase += refbase_to_number_of_HQ_base_reads_for_newbase_dict[refbase][newbase]

		prob_refbase_read_as_not_refbase_dict[refbase] = float(total_HQ_base_change_reads_for_refbase + 1) / float(total_HQ_base_reads_for_refbase + 2)

	return prob_refbase_read_as_not_refbase_dict, prob_of_each_base_change_on_refbase_looks_reasonable_dict

def check_for_a_mutation_signal_spread_out_over_homologs(gene_name, homolog_list, position,
							gene_to_line_dictionary_list_for_each_position,
							homolog_list_ordered_by_read_mapping_probability,
							candidate_list_for_gene, estimated_prob_refbase_read_as_not_refbase_dict,
							prob_of_each_base_change_on_refbase_looks_reasonable_dict,
							output_option, homolog_alignment_dict,
							homolog_to_read_mapping_probability_dict):
	if len(homolog_list_ordered_by_read_mapping_probability) != len(homolog_list):
		raise IOError, 'Internal error in constructing \'homolog_list_ordered_by_read_mapping_probability\''
	combined_line_dictionary_list_for_current_position = get_copy_of_line_dictionary_list(gene_to_line_dictionary_list_for_each_position[gene_name][position])
	predict_mutation_at_position = False
	for homolog in homolog_list_ordered_by_read_mapping_probability:
		if homolog_to_read_mapping_probability_dict[homolog] == 0.0:
			break
		if not (position in homolog_alignment_dict[homolog]):
			raise IOError, 'Position %d in the specified gene should correspond to a position in the homolog' % position
		position_in_homolog = homolog_alignment_dict[homolog][position]

		# add data for this homolog to the "combined" line dictionaries, for each library at this position
		add_line_dictionary_list_to_combined_line_dictionary_list(combined_line_dictionary_list_for_current_position,
										gene_to_line_dictionary_list_for_each_position[homolog][position_in_homolog], homolog)


		############################################################################
		## TO DO:								  ##
		# How to deal with the same homolog combination for diff. starting genes? ##
		############################################################################

		## This is temporarily set to an empty dictionary:
		library_to_fraction_of_base_reads_that_come_from_homolog_dict = {}

		# Note:
		#     The main 'number_of_candidate_mutations_per_position_to_frequency_dict' is not updated when looking at combinations of data from different homologs.
		#     Instead, we pass in an empty dictionary:
		predict_mutation_at_position = evaluate_position_in_gene(combined_line_dictionary_list_for_current_position, position, 
									 library_to_fraction_of_base_reads_that_come_from_homolog_dict,
					  				 candidate_list_for_gene,
									 {}, estimated_prob_refbase_read_as_not_refbase_dict, 
									 prob_of_each_base_change_on_refbase_looks_reasonable_dict, output_option)
		if predict_mutation_at_position:
			break
	return predict_mutation_at_position

def get_library_and_position_to_estimated_real_coverage_for_specified_gene_dict(specified_gene, homolog_list, gene_to_line_dictionary_list_for_each_position,
		probability_read_from_gene_at_position_maps_to_gene_dict, gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict,
		estimated_real_coverage_file):

	##### Important parameter - :
	#distance_threshold_when_real_coverage_is_only_known_on_one_side = int(math.floor(float(global_read_length) / float(5)))
	####

	

	print 'DEBUG|Estimating real coverage:'
	if len(homolog_list) != 2:
		raise IOError, 'The mutation dilution factor code currently only works in the case of three homologs (including the specified gene)'


	length_of_tilling_sequence_for_specified_gene = len(gene_to_tilling_sequence_dict[specified_gene])

	library_and_position_to_estimated_real_coverage_for_specified_gene_dict = {}
	
	#gene_to_block_list = get_gene_to_list_of_regions_of_known_coverage_dict([specified_gene] + homolog_list, 
	#									probability_read_from_gene_at_position_maps_to_gene_dict, 
	#									gene_to_tilling_sequence_dict)

	## Rename?
	#gene_to_positions_in_specified_gene_where_gene_has_known_coverage_dict = {}
	#for gene in ([specified_gene] + homolog_list):
	#	positions_in_specified_gene_where_gene_has_known_coverage_dict = {}
	#	for block in gene_to_block_list[gene]:
	#		block_range = range(block[0], block[1] + 1)
	#		for position in block_range:
	#			positions_in_specified_gene_where_gene_has_known_coverage_dict.append(position)
	#	gene_to_positions_in_specified_gene_where_gene_has_known_coverage_dict[gene] = \
	#			positions_in_specified_gene_where_gene_has_known_coverage_dict	

	# Rename?
	number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict = {}
	number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict[0] = {}
	number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict[1] = {}
	number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict[2] = {}

	for position in range(1, length_of_tilling_sequence_for_specified_gene + 1):
		num_aligned_homologs = get_number_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, position, gene_to_homolog_alignment_dict)
		if not (num_aligned_homologs in number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict):
			raise IOError, 'Internal error: unexpected number of aligned homologs'
		number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict[num_aligned_homologs][position] = 0

	###########################################################    DEBUG:    ##########################################################
	#
	# OK, time to compute ( specified gene coverage : total coverage ) ratio, over the appropriate positions of the specified gene,
	# for each library:

	for library in (global_row_library_list + global_column_library_list):

		position_to_fraction_of_coverage_for_specified_gene_vs_all_genes_dict = {}

		start_position = global_distance_threshold_from_ends_of_sequences_for_homolog_dilution + 1
		stop_position = length_of_tilling_sequence_for_specified_gene - global_distance_threshold_from_ends_of_sequences_for_homolog_dilution
		for position in range(start_position, stop_position + 1):
			bool_we_know_real_coverage_for_specified_gene = (probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][position][specified_gene] == 1.0)
			aligned_homolog_list_for_position = get_list_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, position, gene_to_homolog_alignment_dict)
			bool_two_aligned_homologs_at_position = (len(aligned_homolog_list_for_position) == 2)

			if bool_we_know_real_coverage_for_specified_gene and bool_two_aligned_homologs_at_position:
				# check if we are too close to one of the ends of one of the two aligned homologs
				too_close_to_one_end_of_one_of_the_homologs = \
						position_is_too_close_to_one_end_of_one_homolog(position, specified_gene, aligned_homolog_list_for_position, 
												gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict)
				if not (too_close_to_one_end_of_one_of_the_homologs):
					# print ratio at this position
					coverage_of_specified_gene = \
							get_coverage_of_library_at_position_of_gene(specified_gene, position, library, gene_to_line_dictionary_list_for_each_position)
					total_coverage = get_total_coverage_at_position_for_library(specified_gene, aligned_homolog_list_for_position, position, library, 
															gene_to_line_dictionary_list_for_each_position, 
															gene_to_homolog_alignment_dict)
					position_to_fraction_of_coverage_for_specified_gene_vs_all_genes_dict[position] = \
							float(coverage_of_specified_gene) / float(total_coverage)
					#print '%s\t%d\t%.4f' % (library, position, position_to_fraction_of_coverage_for_specified_gene_vs_all_genes_dict[position])
	#raise IOError, 'COMMENT THIS OUT: Stop after printing debugging info on coverage ratio between genes'

	print 'DEBUG|\tFinished with debugging code to print fraction of real coverage for specified gene at all positions where we know it:'
	###################################################################################################################################

	## Estimate real coverage	
	print 'DEBUG|\tEstimating real coverage for %s' % specified_gene

	## To do:
	# 1. If len(aligned_homolog_list_for_position) == 1, then have a special case for insertions/deletions					<DONE!>
	# 3. case where one of high_pos or low_pos is == 'NA'											<DONE!>
	# 4. case where we are near to end of one of the tilling sequences - either on reg. position or low/high position (?)			<DONE!>
	#    or we are near the end of one of the homologous sequences?                                                                         
	# (4 2). what about case where (gA:gB) ratio serves us better than (gA:total) ratio?                                                  
	# (4 3). Fix it so that we always straight-up use global (gA:total) ratio (or gA:gB or gA:gC) when (high_pos - low_pos) > Delta, for some delta
	# (4 4). Plot ratio of geneA to total coverage, over entire gene!									<DONE!>
	# 5. case where we have estimate of (geneA:total cov) ratio on LEFT side, but not right side . . . or on RIGHT side, but not left size
	# 6. extend the code to handle the case where we have a total of only << 2 homologs >>, including the specified gene!
	
	for library in (global_row_library_list + global_column_library_list):
		print 'DEBUG|\tLibrary %s' % library
		library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library] = {}


		## Start for (4 1/2):
		#homolog_subset_to_fraction_of_coverage_of_homolog_subset_due_to_specified_gene_dict = {}
		#list_of_subsets_of_homolog_list = []
		#if len(homolog_list) == 2:
		#	[homologA, homologB] = homolog_list
		#	list_of_subsets_of_homolog_list = [[homologA], [homologB], [homologA, homologB]]
		#elif len(homolog_list) == 1:
		#	[homologA] = homolog_list
		#	list_of_subsets_of_homolog_list = [[homologA]]
		#else:
		#	raise IOError, ('get_library_and_position_to_estimated_real_coverage_for_specified_gene_dict() can currently only handle \n' + \
		#			'case where there are one or two homologs of specified gene')

		#for subset_of_homolog_list in list_of_subsets_of_homolog_list:
		#	subset_of_homolog_list.sort()
		#	subset_of_homologs_string = '\t'.join(subset_of_homolog_list)

		overall_fraction_of_total_coverage_due_to_gene_dict = {}

		gene_list = ([specified_gene] + homolog_list)
		sum_of_estimated_overall_fraction_of_total_coverage_due_to_all_genes = 0
		for current_gene in gene_list:
			new_homolog_list = [gene for gene in gene_list]
			new_homolog_list.remove(current_gene)
			#homolog_subset_to_fraction_of_coverage_of_homolog_subset_due_to_specified_gene_dict[subset_of_homologs_string] = \
			overall_fraction_of_total_coverage_due_to_gene_dict[current_gene] = \
					get_overall_fraction_of_total_coverage_due_to_specified_gene_for_library(current_gene, new_homolog_list, library,
													  gene_to_line_dictionary_list_for_each_position, 
													  gene_to_homolog_alignment_dict,
													  probability_read_from_gene_at_position_maps_to_gene_dict,
													  number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict,
													  gene_to_tilling_sequence_dict)
			sum_of_estimated_overall_fraction_of_total_coverage_due_to_all_genes += overall_fraction_of_total_coverage_due_to_gene_dict[current_gene]
			print 'DEBUG|\t\tOverall fraction of total coverage due to %s is %f' % (current_gene, overall_fraction_of_total_coverage_due_to_gene_dict[current_gene])


		# Normalize the estimated overall fraction of total coverage due to each gene 
		# so that the sum over the specified gene and its homologs will be 1
		for current_gene in gene_list:
			overall_fraction_of_total_coverage_due_to_gene_dict[current_gene] /= float(sum_of_estimated_overall_fraction_of_total_coverage_due_to_all_genes)

		# Check that the normalization factor was not very large
		ERROR_THRESH = 0.1
		if abs(sum_of_estimated_overall_fraction_of_total_coverage_due_to_all_genes - 1.0) > ERROR_THRESH:
			print '------------------------------------------------------------------------'
			for current_gene in gene_list:
				print 'DEBUG|\t\tEstimated overall fraction of total coverage due to %s is %f' % \
						(current_gene, overall_fraction_of_total_coverage_due_to_gene_dict[current_gene])
			print '------------------------------------------------------------------------'
			raise IOError, 'Error: Estimates of overall fraction of total coverage due to the specified gene and its homologs are inconsistent'



		for position in range(1, length_of_tilling_sequence_for_specified_gene + 1):
			aligned_homolog_list_for_position = get_list_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, position, gene_to_homolog_alignment_dict)

			if probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][position][specified_gene] == 1.0:
				# At this position, gA maps only to gA and no other gene maps to gA,
				# so real coverage == observed coverage
				library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][position] = \
						get_coverage_of_library_at_position_of_gene(specified_gene, position, library, gene_to_line_dictionary_list_for_each_position)
			else:
				## We must estimate the real coverage, from our knowledge of surrounding positions


				# Can we estimate the real coverage accurately for this position?
				too_close_to_ends_of_specified_gene_or_an_aligned_homolog = \
						position_is_too_close_to_ends_of_specified_gene_or_homolog(position, specified_gene, aligned_homolog_list_for_position,
												gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict)					
				if too_close_to_ends_of_specified_gene_or_an_aligned_homolog:
					# cannot get an accurate estimate of real coverage at this position
					library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][position] = 'NA'
				else:	
#					# Estimate the real coverage for specified gene at this position
#					if len(aligned_homolog_list_for_position) == 0:
#						raise IOError, 'Internal error: Should have been handled by the case where 100% of reads from specified gene covering the current position ' + \
#							       'map to specified gene'
#					elif len(aligned_homolog_list_for_position) == 2:
#						# We are looking for [ (real coverage for gA) / (real coverage for gA, gB, and gC) ]
#						low_position, high_position = find_low_and_high_position_in_two_aligned_homologs_case(specified_gene, position, library,
#														gene_to_line_dictionary_list_for_each_position,
#														probability_read_from_gene_at_position_maps_to_gene_dict,
#														number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict[2],
#														length_of_tilling_sequence_for_specified_gene)
#					elif len(aligned_homolog_list_for_position) == 1:
#						homolog = aligned_homolog_list_for_position[0]
#						# We are looking for [ (real coverage for gA) / (real coverage for gA, homolog) ], where homolog may be either gB or gC
#						low_position, high_position = find_low_and_high_position_in_one_aligned_homolog_case(specified_gene, homolog, position, library,
#														gene_to_line_dictionary_list_for_each_position,
#														probability_read_from_gene_at_position_maps_to_gene_dict,
#														number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict,
#														length_of_tilling_sequence_for_specified_gene,
#														gene_to_homolog_alignment_dict)
#					else:
#						raise IOError, 'Error: unexpected length of aligned_homolog_list_for_position'
#
#
#					## Check that low_position and high_position are not too close to the ends of the specified gene or one of the aligned homologs
#
#					low_position_too_close_to_ends_of_specified_gene_or_an_aligned_homolog = \
#							position_is_too_close_to_ends_of_specified_gene_or_homolog(low_position, specified_gene, aligned_homolog_list_for_position, \
#												gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict)
#
#					high_position_too_close_to_ends_of_specified_gene_or_an_aligned_homolog = \
#							position_is_too_close_to_ends_of_specified_gene_or_homolog(high_position, specified_gene, aligned_homolog_list_for_position, \
#												gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict)
#
#					if low_position_too_close_to_ends_of_specified_gene_or_an_aligned_homolog:
#						low_position = 'NA'
#					if high_position_too_close_to_ends_of_specified_gene_or_an_aligned_homolog:
#						high_position = 'NA'
#
#
#
#					## Process low position and high position
#
#					if low_position != 'NA':
#						fraction_of_total_coverage_due_to_specified_gene__before = \
#								get_fraction_of_total_coverage_due_to_specified_gene(specified_gene, aligned_homolog_list_for_position, low_position, library,
#															gene_to_line_dictionary_list_for_each_position, 
#															gene_to_homolog_alignment_dict)
#        
#					if high_position != 'NA':
#						fraction_of_total_coverage_due_to_specified_gene__after = \
#								get_fraction_of_total_coverage_due_to_specified_gene(specified_gene, aligned_homolog_list_for_position, high_position, library,
#															gene_to_line_dictionary_list_for_each_position, 
#															gene_to_homolog_alignment_dict)

        
					#if (low_position == 'NA') and (high_position == 'NA'):
					#	raise IOError, 'DEBUG: should also add in a check that the estimated overall real coverage fraction for specified gene is \'NA\' in this case'
					#	library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][position] = 'NA'
					#else if (((low_position == 'NA') and (abs(high_position - position) > \
					#				      distance_threshold_when_real_coverage_is_only_known_on_one_side)) or \
					#	 ((high_position == 'NA') and (abs(low_position - position) > \
					#	 			       distance_threshold_when_real_coverage_is_only_known_on_one_side))):


					#if (low_position == 'NA') or (high_position == 'NA') or ((high_position - low_position) >= global_read_length):

					# use the overall fraction of total coverage due to each gene, to predict the real coverage at this position
					overall_fraction_of_total_coverage_due_to_specified_gene_and_aligned_homologs = 0.0
					overall_fraction_of_total_coverage_due_to_specified_gene_and_aligned_homologs += \
							overall_fraction_of_total_coverage_due_to_gene_dict[specified_gene]
					for homolog in aligned_homolog_list_for_position:
						overall_fraction_of_total_coverage_due_to_specified_gene_and_aligned_homologs += \
								overall_fraction_of_total_coverage_due_to_gene_dict[homolog]

					predicted_fraction_of_total_coverage_due_to_specified_gene = \
							(float(overall_fraction_of_total_coverage_due_to_gene_dict[specified_gene]) / \
							 float(overall_fraction_of_total_coverage_due_to_specified_gene_and_aligned_homologs))

					#else:
					#	raise IOError, 'Internal error: this should not happen! \n => Or you set global_read_length(%d) too high.\n' + \
					#			'Set global_read_length to be the lowest real read length over all libraries in the current tilling run, then try again.'
						# estimate fraction of total coverage due to reads from specified gene
						#predicted_fraction_of_total_coverage_due_to_specified_gene = \
						#				float(((position - low_position) * fraction_of_total_coverage_due_to_specified_gene__after) + \
						#				      ((high_position - position) * fraction_of_total_coverage_due_to_specified_gene__before)) / \
						#				float(high_position - low_position)
					
					# find total coverage at current position
					total_coverage_at_current_position = get_total_coverage_at_position_for_library(specified_gene, aligned_homolog_list_for_position, 
															position, library, 
															gene_to_line_dictionary_list_for_each_position, 
															gene_to_homolog_alignment_dict)
        
					# estimate real coverage for specified gene at current position
					estimated_real_coverage_for_specified_gene = total_coverage_at_current_position * predicted_fraction_of_total_coverage_due_to_specified_gene
       
					library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][position] = estimated_real_coverage_for_specified_gene
        
					###############################################     DEBUG:   ###################################################################
					if library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][position] != 'NA':
						DEBUG_estimated_native_coverage_of_specified_gene = \
								(library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][position] * \
								 probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][position][specified_gene])
					else:
						DEBUG_estimated_native_coverage_of_specified_gene = 'NA'

					# write out final estimates of real coverage at current position of specified gene, for the current library, plus other statistics
					estimated_real_coverage_file.write(('Gene: %s,\tLibrary: %s,\tPosition: %d,\n' + \
									    'Estimated real frac of total coverage due to gene: %f\n' + \
									    'Estimated real coverage of gene: %s\n' + \
									    'P(gene -> gene) = %f\n' + \
									    'Estimated native coverage of gene: %f\n' + \
									    'Observed coverage of gene: %d\n') % \
							(specified_gene, library, position, 
							 predicted_fraction_of_total_coverage_due_to_specified_gene, 
							 str(library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][position]),
							 probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][position][specified_gene],
							 DEBUG_estimated_native_coverage_of_specified_gene,
							 get_coverage_of_library_at_position_of_gene(specified_gene, position, library, gene_to_line_dictionary_list_for_each_position)))
					################################################################################################################################

		print 'DEBUG|\tFinished estimating real coverage for %s' + specified_gene

#			################# OLD CODE: ##########################
#
#		# Repair estimates of real coverage for specified gene at positions where there is a gap in the alignment
#		for position in range(1, length_of_tilling_sequence_for_specified_gene + 1):
#			number_of_aligned_homologs_for_position = \
#					get_number_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, position, gene_to_homolog_alignment_dict)
#
#			if (probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][position][specified_gene] != 1.0) and \
#				(number_of_aligned_homologs_for_position == 1):
#				# Determine previous and next positions where we expect to have a more accurate estimate of the real coverage.
#				# There are two cases where a position looks good:
#				#	(a) there are 2 aligned homologs at the position
#				#	(b) there is 100% mapping from specified gene to specified gene, so we already know the real coverage
#				prev_pos, next_pos = determine_previous_and_next_positions_where_we_expect_to_have_a_better_estimate_of_real_coverage(
#															specified_gene, homolog_list, position, library,
#															gene_to_homolog_alignment_dict,
#															probability_read_from_gene_at_position_maps_to_gene_dict,
#															library_and_position_to_estimated_real_coverage_for_specified_gene_dict,
#															length_of_tilling_sequence_for_specified_gene)
#				if (prev_pos == position) or (next_pos == position):
#					raise IOError, 'Internal error: there should be only one homolog that aligns to the current position'
#
#				# check for ANY SIZE OF gap in alignment for one of the homologs
#
#				#	 	******** We may need to upper bound the gap size later on! ********
#				if (prev_pos != 'NA') and (next_pos != 'NA'):
#					# recompute estimate of real coverage at position, based on estimates of real coverage at prev_pos and next_pos
#					estimated_real_coverage_prev_pos = library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][prev_pos] 
#					estimated_real_coverage_next_pos = library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][next_pos] 
#					new_estimate_of_real_coverage_of_position = ((estimated_real_coverage_next_pos * (position - prev_pos)) + \
#										     (estimated_real_coverage_prev_pos * (next_pos - position))) / \
#										    (next_pos - prev_pos)
#					library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][position] = new_estimate_of_real_coverage_of_position
#
#		# write out final estimates of real coverage at each position of specified gene, for the current library
#		for position in range(1, length_of_tilling_sequence_for_specified_gene + 1):
#			estimated_real_coverage_file.write('Gene: %s,\tLibrary: %s,\tPosition: %d,\tEstimated real coverage: %s\n' % \
#					(specified_gene, library, position, str(library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][position])))

	return library_and_position_to_estimated_real_coverage_for_specified_gene_dict



def position_is_too_close_to_ends_of_specified_gene_or_homolog(position, specified_gene, homolog_list,
								gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict):

	length_of_tilling_sequence_for_specified_gene = len(gene_to_tilling_sequence_dict[specified_gene])
	
	too_close_to_ends_of_specified_gene = False
	if ((position <= global_distance_threshold_from_ends_of_sequences_for_homolog_dilution) or \
	    (position > (length_of_tilling_sequence_for_specified_gene - global_distance_threshold_from_ends_of_sequences_for_homolog_dilution))):
		too_close_to_ends_of_specified_gene = True

	too_close_to_ends_of_a_homolog = \
			position_is_too_close_to_one_end_of_one_homolog(position, specified_gene, homolog_list,
									gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict)

	if too_close_to_ends_of_specified_gene or too_close_to_ends_of_a_homolog:
		too_close_to_ends_of_specified_gene_or_homolog = True
	else:
		too_close_to_ends_of_specified_gene_or_homolog = False

	return too_close_to_ends_of_specified_gene_or_homolog

def position_is_too_close_to_one_end_of_one_homolog(position, specified_gene, homolog_list, 
						    gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict):
	too_close_to_one_end_of_one_of_the_homologs = False
	for homolog in homolog_list:
		position_in_homolog = gene_to_homolog_alignment_dict[specified_gene][homolog][position]
		if ((position_in_homolog <= global_distance_threshold_from_ends_of_sequences_for_homolog_dilution) or \
		    (position_in_homolog > (len(gene_to_tilling_sequence_dict[homolog]) - global_distance_threshold_from_ends_of_sequences_for_homolog_dilution))):
			too_close_to_one_end_of_one_of_the_homologs = True
	return too_close_to_one_end_of_one_of_the_homologs

def get_number_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, position, gene_to_homolog_alignment_dict):
	return len(get_list_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, position, gene_to_homolog_alignment_dict))

def get_list_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, position, gene_to_homolog_alignment_dict):
	aligned_homolog_list = []
	for homolog in homolog_list:
		if position in gene_to_homolog_alignment_dict[specified_gene][homolog]:
			aligned_homolog_list.append(homolog)
	return aligned_homolog_list

def determine_previous_and_next_positions_where_we_expect_to_have_a_better_estimate_of_real_coverage(specified_gene, homolog_list, position, library,
										gene_to_homolog_alignment_dict,
										probability_read_from_gene_at_position_maps_to_gene_dict,
										library_and_position_to_estimated_real_coverage_for_specified_gene_dict,
										length_of_tilling_sequence_for_specified_gene):

	prev_pos = position
	next_pos = position

	found_prev_position = False
	found_next_position = False

	while prev_pos != 0:
		number_of_aligned_homologs_for_position = \
				get_number_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, prev_pos, gene_to_homolog_alignment_dict)
		bool_already_know_real_coverage = probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][prev_pos][specified_gene] == 1.0
		bool_real_coverage_estimate_is_not_NA = (library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][prev_pos] != 'NA')
		if bool_already_know_real_coverage or ((number_of_aligned_homologs_for_position == 2) and bool_real_coverage_estimate_is_not_NA):
			found_prev_position = True
			break
		prev_pos -= 1
	if found_prev_position == False:
		prev_pos == 'NA'
	
	while next_pos != (length_of_tilling_sequence_for_specified_gene + 1):
		number_of_aligned_homologs_for_position = \
				get_number_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, next_pos, gene_to_homolog_alignment_dict)
		bool_already_know_real_coverage = probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][next_pos][specified_gene] == 1.0
		bool_real_coverage_estimate_is_not_NA = (library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][next_pos] != 'NA')
		if bool_already_know_real_coverage or ((number_of_aligned_homologs_for_position == 2) and bool_real_coverage_estimate_is_not_NA):
			found_next_position = True
			break
		next_pos += 1
	if found_next_position == False:
		next_pos == 'NA'
	
	return prev_pos, next_pos

def find_low_and_high_position_in_two_aligned_homologs_case(specified_gene, position, library,
							gene_to_line_dictionary_list_for_each_position,
							probability_read_from_gene_at_position_maps_to_gene_dict,
							list_of_positions_with_two_aligned_homologs_dict,
							length_of_tilling_sequence_for_specified_gene):

	low_position = position
	high_position = position

	found_low_position = False
	found_high_position = False

	while low_position >= 1:
		if (probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][low_position][specified_gene] == 1.0) and \
				(low_position in list_of_positions_with_two_aligned_homologs_dict):
			found_low_position = True
			break
		low_position -= 1
	while high_position <= length_of_tilling_sequence_for_specified_gene:
		if probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][high_position][specified_gene] == 1.0 and \
				(high_position in list_of_positions_with_two_aligned_homologs_dict):
			found_high_position = True
			break
		high_position += 1

	if found_high_position == False:
		high_position = 'NA'
		
	if found_low_position == False:
		low_position = 'NA'

	return low_position, high_position

def find_low_and_high_position_in_one_aligned_homolog_case(specified_gene, homolog, position, library,
							gene_to_line_dictionary_list_for_each_position,
							probability_read_from_gene_at_position_maps_to_gene_dict,
							number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict,
							length_of_tilling_sequence_for_specified_gene,
							gene_to_homolog_alignment_dict):

	list_of_positions_with_one_aligned_homolog_dict = number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict[1]
	list_of_positions_with_two_aligned_homologs_dict = number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict[2]
	
	low_position = position
	high_position = position

	found_low_position = False
	found_high_position = False

	while low_position >= 1:
		if not (low_position in gene_to_homolog_alignment_dict[specified_gene][homolog]):
			# low_position of specified gene does not align to homolog
			bool_we_know_real_coverage_for_both_specified_gene_and_homolog = False
		else:
			low_position_in_homolog = gene_to_homolog_alignment_dict[specified_gene][homolog][low_position]
        
			bool_we_know_real_coverage_for_both_specified_gene_and_homolog = \
					((probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][low_position][specified_gene] == 1.0) and \
					 (probability_read_from_gene_at_position_maps_to_gene_dict[homolog][low_position_in_homolog][homolog] == 1.0))

		if bool_we_know_real_coverage_for_both_specified_gene_and_homolog:
			found_low_position = True
			break
		low_position -= 1
	while high_position <= length_of_tilling_sequence_for_specified_gene:
		if not (high_position in gene_to_homolog_alignment_dict[specified_gene][homolog]):
			# high_position of specified gene does not align to homolog
			bool_we_know_real_coverage_for_both_specified_gene_and_homolog = False
		else:
			high_position_in_homolog = gene_to_homolog_alignment_dict[specified_gene][homolog][high_position]
        
			bool_we_know_real_coverage_for_both_specified_gene_and_homolog = \
					((probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][high_position][specified_gene] == 1.0) and \
					 (probability_read_from_gene_at_position_maps_to_gene_dict[homolog][high_position_in_homolog][homolog] == 1.0))

		if bool_we_know_real_coverage_for_both_specified_gene_and_homolog:
			found_high_position = True
			break
		high_position += 1

	if found_high_position == False:
		high_position = 'NA'
		
	if found_low_position == False:
		low_position = 'NA'

	return low_position, high_position

def get_gene_to_list_of_regions_of_known_coverage_dict(gene_list, probability_read_from_gene_at_position_maps_to_gene_dict, gene_to_tilling_sequence_dict):
	gene_to_list_of_regions_of_known_coverage_dict = {}
	for gene in gene_list:
		gene_to_list_of_regions_of_known_coverage_dict[gene] = \
			get_list_of_regions_of_known_coverage_for_specified_gene(gene, probability_read_from_gene_at_position_maps_to_gene_dict, len(gene_to_tilling_sequence_dict[gene]))
	return gene_to_list_of_regions_of_known_coverage_dict

def get_list_of_regions_of_known_coverage_for_specified_gene(specified_gene, probability_read_from_gene_at_position_maps_to_gene_dict, length_of_tilling_sequence_for_specified_gene):
	block_list = []
	bool_in_block = False
	for position in range(1, length_of_tilling_sequence_for_specified_gene + 1):
		if probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][position][specified_gene] == 1.0:
			# real coverage == observed coverage
			if bool_in_block == False:
				bool_in_block = True
				block_start = position
		else:
			if bool_in_block == True:
				bool_in_block = False
				block_end = (position - 1)
				block_list.append([block_start, block_end])
	if bool_in_block == True:
		block_end = length_of_tilling_sequence_for_specified_gene
		block_list.append([block_start, block_end])

	return block_list	

def get_fraction_of_total_coverage_due_to_specified_gene(specified_gene, homolog_list, position, library, gene_to_line_dictionary_list_for_each_position, gene_to_homolog_alignment_dict):
	coverage_for_specified_gene = get_coverage_of_library_at_position_of_gene(specified_gene, position, library, gene_to_line_dictionary_list_for_each_position)
	if coverage_for_specified_gene == 'NA':
		raise IOError, 'Internal error in get_fraction_of_total_coverage_due_to_specified_gene(): coverage = NA'
	total_coverage = get_total_coverage_at_position_for_library(specified_gene, homolog_list, position, library, gene_to_line_dictionary_list_for_each_position,
									gene_to_homolog_alignment_dict)
	return float(coverage_for_specified_gene) / float(total_coverage)

def get_total_coverage_at_position_for_library(specified_gene, homolog_list, position, library, gene_to_line_dictionary_list_for_each_position, gene_to_homolog_alignment_dict):
	total_coverage = 0

	# add coverage for specified gene
	coverage_for_gene = get_coverage_of_library_at_position_of_gene(specified_gene, position, library, gene_to_line_dictionary_list_for_each_position)
	if coverage_for_gene == 'NA':
		raise IOError, 'Internal error in get_total_coverage_at_position_for_library(): coverage = NA'
	total_coverage += coverage_for_gene

	# add coverage for homologs
	for homolog in homolog_list:
		# test if the current position in specified gene maps to a position in the current homolog
		if position in gene_to_homolog_alignment_dict[specified_gene][homolog]:
			position_in_homolog = gene_to_homolog_alignment_dict[specified_gene][homolog][position]
			coverage_for_homolog = get_coverage_of_library_at_position_of_gene(homolog, position_in_homolog, library, gene_to_line_dictionary_list_for_each_position)
			if coverage_for_homolog == 'NA':
				raise IOError, 'Internal error in get_total_coverage_at_position_for_library(): coverage = NA'
			total_coverage += coverage_for_homolog

	return total_coverage


def get_overall_fraction_of_total_coverage_due_to_specified_gene_for_library(specified_gene, homolog_list, library,
								      gene_to_line_dictionary_list_for_each_position, 
								      gene_to_homolog_alignment_dict,
								      probability_read_from_gene_at_position_maps_to_gene_dict,
								      number_of_aligned_homologs_to_list_of_positions_in_specified_gene_dict,
								      gene_to_tilling_sequence_dict):
	print 'DEBUG|\t\tComputing mean fraction of real coverage due to %s . . .' % specified_gene

	length_of_tilling_sequence_for_specified_gene = len(gene_to_tilling_sequence_dict[specified_gene])

	start_position = global_distance_threshold_from_ends_of_sequences_for_homolog_dilution + 1
	stop_position = length_of_tilling_sequence_for_specified_gene - global_distance_threshold_from_ends_of_sequences_for_homolog_dilution
	
	ideal_section_size = 200
	#raise IOError, 'SHOULD I SET THIS TO BE EXACLY GLOBAL READ LENGTH -- WHAT IF I OVER/UNDERESTIMATE?'
	subsection_size_threshold = global_read_length

	start_of_good_region, end_of_good_region = find_start_and_end_positions_of_good_region(specified_gene, homolog_list,
												gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict)

	## determine the size of the large sections
	size_of_good_region = (end_of_good_region - start_of_good_region + 1)
	num_sections = int(math.ceil(float(size_of_good_region) / float(ideal_section_size)))

	# the last section will be bigger than the others
	section_size = int(math.floor(float(size_of_good_region) / float(num_sections)))

	## initialize variables for large sections
	current_section = 0
	section_to_list_of_subsection_tuples_dict = {}
	for section in range(0, num_sections):
		section_to_list_of_subsection_tuples_dict[section] = []

	## initialize variables for subsections
	subsection_start = 'NA'
	in_subsection = False
	subsection_point_list = 'NA'
	subsection_total_score = 'NA'

	previous_point_in_subsection = 'NA'
	previous_coverage_fraction_in_subsection = 'NA'
	section_for_previous_point = 'NA'

	print 'DEBUG|\t\t\tCovFrac\tWeight\tSection'
	for position in range(start_of_good_region, end_of_good_region + 1):
		bool_we_know_real_coverage_for_specified_gene = (probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][position][specified_gene] == 1.0)
		aligned_homolog_list_for_position = get_list_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, position, gene_to_homolog_alignment_dict)
		bool_all_homologs_align_at_this_position = (len(aligned_homolog_list_for_position) == len(homolog_list))

		if bool_we_know_real_coverage_for_specified_gene and bool_all_homologs_align_at_this_position:
			## we know the real fraction of total coverage due to the specified gene, at this position

			coverage_of_specified_gene = \
					get_coverage_of_library_at_position_of_gene(specified_gene, position, library, gene_to_line_dictionary_list_for_each_position)
			total_coverage = get_total_coverage_at_position_for_library(specified_gene, aligned_homolog_list_for_position, position, library, 
													gene_to_line_dictionary_list_for_each_position, 
													gene_to_homolog_alignment_dict)
			current_coverage_fraction = \
					float(coverage_of_specified_gene) / float(total_coverage)


			## get section for current point
			adjusted_position = (position - start_of_good_region + 1)
			current_section = math.ceil(float(adjusted_position) / float(section_size)) - 1


			bool_first_point, bool_point_is_after_large_gap, bool_first_point_in_new_section, bool_new_subsection = \
					get_boolean_values_for_subsection_point(previous_point_in_subsection, position, section_for_previous_point, 
										current_section, in_subsection, subsection_size_threshold)

			if bool_new_subsection:
				## new subsection

				if in_subsection == True:
					if (not bool_point_is_after_large_gap) and bool_first_point_in_new_section:
						# subsection stretches across barrier between sections
						
						adjusted_position = (position - start_of_good_region + 1)
						end_of_previous_section = (current_section * section_size) - 1 + start_of_good_region
						start_of_current_section = (end_of_previous_section + 1)
                                
						size_of_first_part_of_gap = (end_of_previous_section - previous_point_in_subsection)
						size_of_second_part_of_gap = (position - start_of_current_section)
                                
						middle_of_first_part_of_gap = previous_point_in_subsection + (float(size_of_first_part_of_gap + 1) / float(2.0))
						middle_of_second_part_of_gap = position - (float(size_of_second_part_of_gap + 1) / float(2.0))
                                
						first_gap_score = (size_of_first_part_of_gap * \
							     ((((middle_of_first_part_of_gap - previous_point_in_subsection) * current_coverage_fraction) + \
							       ((position - middle_of_first_part_of_gap) * previous_coverage_fraction_in_subsection)) / \
							      (position - previous_point_in_subsection)))
                                
						second_gap_score = (size_of_second_part_of_gap * \
							     ((((middle_of_second_part_of_gap - previous_point_in_subsection) * current_coverage_fraction) + \
							       ((position - middle_of_second_part_of_gap) * previous_coverage_fraction_in_subsection)) / \
							      (position - previous_point_in_subsection)))
							      
						# add the part of the gap that is in the previous section to the old subsection
						subsection_total_score += first_gap_score
						subsection_end = end_of_previous_section
						print 'DEBUG|\t\t\tLast point for previous subsection: %d, End prev section: %d, First point for new subsection: %d' % \
								(previous_point_in_subsection, end_of_previous_section, position)
						print 'DEBUG|\t\t\tSplitting subsection that stretches across barrier between sections %d and %d' % \
								(current_section - 1, current_section)
						print 'DEBUG|\t\t\tSize of gap #1: %d, Size of gap #2: %d' % (size_of_first_part_of_gap, size_of_second_part_of_gap)
						print 'DEBUG|\t\t\tFirst gap score=%f, Second gap score=%f' % (first_gap_score, second_gap_score)
					else:
						subsection_end = previous_point_in_subsection
						
                                
					## record old subsection

					subsection_size = subsection_end - subsection_start + 1
					mean_coverage_fraction_for_subsection = float(subsection_total_score) / float(subsection_size)

					subsection_weight = max(1.0, (float(subsection_size) / float(subsection_size_threshold + 1)))
                                
					# get one-indexed position with respect to start of "good region"
					#adjusted_subsection_start = (subsection_start - start_of_good_region + 1)
					#adjusted_subsection_end = (subsection_end - start_of_good_region + 1)
                                
					#starting_section = math.ceil(float(adjusted_subsection_start) / float(section_size)) - 1
					#ending_section = math.ceil(float(adjusted_subsection_end) / float(section_size)) - 1

					#if starting_section != ending_section:
					#	raise IOError, 'Error: no subsection should span two sections'
					#if starting_section != current_section:
					#	raise IOError, 'Error: miscomputed current section'
					
					section_to_list_of_subsection_tuples_dict[section_for_previous_point].append((mean_coverage_fraction_for_subsection, subsection_weight))

					print 'DEBUG|\t\t\t%.3f\t%.1f\t%d' % (mean_coverage_fraction_for_subsection, subsection_weight, section_for_previous_point)


				## construct new subsection

				if (in_subsection == True) and (not bool_point_is_after_large_gap) and bool_first_point_in_new_section:
					# Have new subsection begin at the barrier between sections
					# Add the part of the gap that is in the current section to the new subsection
					subsection_total_score = second_gap_score
					subsection_start = start_of_current_section
				else:
					subsection_total_score = 0.0
					subsection_start = position

				previous_point_in_subsection = 'NA'
				previous_coverage_fraction_in_subsection = 'NA'
				section_for_previous_point = 'NA'
				in_subsection = True


			## update subsection for current point

			subsection_total_score += current_coverage_fraction
			if previous_coverage_fraction_in_subsection != 'NA':
				# this is not the first point in the subsection, 
				# so update the subsection with information from the gap to the left of the current position
				size_of_gap_before_current_position = (position - previous_point_in_subsection - 1)
				gap_score = size_of_gap_before_current_position * (float(current_coverage_fraction + previous_coverage_fraction_in_subsection) / float(2.0))
				subsection_total_score += gap_score


			## store variables for current point

			previous_point_in_subsection = position
			previous_coverage_fraction_in_subsection = current_coverage_fraction
			section_for_previous_point = current_section
	
	sum_of_mean_coverage_fraction_over_sections = 0.0
	number_of_sections_that_contain_subsections = 0

	weighted_sum_of_mean_coverage_fraction_over_subsections = 0.0
	sum_of_subsection_weights = 0.0

	for section in section_to_list_of_subsection_tuples_dict:
		weighted_sum_of_subsection_scores_for_section = 0.0
		sum_subsection_weights_for_section = 0.0
		subsection_tuple_list_for_section = section_to_list_of_subsection_tuples_dict[section]

		# if there are no subsections in this section, then move on
		if len(subsection_tuple_list_for_section) == 0:
			continue
		number_of_sections_that_contain_subsections += 1

		for subsection_tuple in subsection_tuple_list_for_section:
			(mean_coverage_fraction_for_subsection, subsection_weight) = subsection_tuple
			weighted_sum_of_subsection_scores_for_section += (mean_coverage_fraction_for_subsection * subsection_weight)
			sum_subsection_weights_for_section += subsection_weight
		mean_coverage_fraction_for_section = float(weighted_sum_of_subsection_scores_for_section) / float(sum_subsection_weights_for_section)
		total_weight_for_section = sum_subsection_weights_for_section

		sum_of_mean_coverage_fraction_over_sections += mean_coverage_fraction_for_section

		weighted_sum_of_mean_coverage_fraction_over_subsections += weighted_sum_of_subsection_scores_for_section
		sum_of_subsection_weights += sum_subsection_weights_for_section
	
	mean_coverage_fraction_by_sections = float(sum_of_mean_coverage_fraction_over_sections) / float(number_of_sections_that_contain_subsections)
	mean_coverage_fraction_by_subsections = float(weighted_sum_of_mean_coverage_fraction_over_subsections) / float(sum_of_subsection_weights)

	# Note: 
	# The following prediction of mean coverage fraction is a compromise between considering only large-scale 
	# or only small-scale variations in coverage.
	#
	# There is probably a more accurate estimation method:
	# A better way might be to model the score for subsection j, in section i, by a linear model with random effects:
	#	s_ij = mu + a_i + e_ij
	# where a_i models large-scale changes between sections,
	# e_ij handles smaller changes between subsections (or parts of subsections, where weight==1 for each part),
	# and "mu" would be the estimate of the mean coverage fraction for the specified gene.
	#
	# One issue is that the a_i's and e_ij's may not be normally-distributed.
	predicted_mean_coverage_fraction = float(mean_coverage_fraction_by_sections + mean_coverage_fraction_by_subsections) / float(2)

	return predicted_mean_coverage_fraction

def find_start_and_end_positions_of_good_region(specified_gene, homolog_list, 
		gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict):

	found_start_of_good_region = False
	found_end_of_good_region = False

	start_of_good_region = 'NA'
	end_of_good_region = 'NA'
	
	start_position = global_distance_threshold_from_ends_of_sequences_for_homolog_dilution + 1
	stop_position = len(gene_to_tilling_sequence_dict[specified_gene]) - global_distance_threshold_from_ends_of_sequences_for_homolog_dilution
	for position in range(start_position, stop_position + 1):
		aligned_homolog_list_for_position = get_list_of_aligned_homologs_for_position_of_specified_gene(specified_gene, homolog_list, position, gene_to_homolog_alignment_dict)
		bool_all_homologs_align_at_this_position = (len(aligned_homolog_list_for_position) == len(homolog_list))

		if bool_all_homologs_align_at_this_position:
			# check if we are too close to one of the ends of one of the homologs
			too_close_to_one_end_of_one_of_the_homologs = \
					position_is_too_close_to_one_end_of_one_homolog(position, specified_gene, homolog_list, 
											gene_to_homolog_alignment_dict, gene_to_tilling_sequence_dict)
			if not (too_close_to_one_end_of_one_of_the_homologs):
				# we are in the 'good region'
				if found_start_of_good_region == False:
					# we have just entered the 'good region'
					found_start_of_good_region = True
					start_of_good_region = position
			else:
				# we are too close to an end of one of the homologs
				if (found_start_of_good_region == True) and (found_end_of_good_region == False):
					# we have just left the 'good region'
					found_end_of_good_region = True
					end_of_good_region = position - 1
					break
	if found_start_of_good_region == False:
		raise IOError, 'Error: could not find any positions where all homologs align and we are not too close to the ends of any homolog'
	elif found_end_of_good_region == False:
		end_of_good_region = stop_position

	return start_of_good_region, end_of_good_region


def get_boolean_values_for_subsection_point(previous_point_in_subsection, position, section_for_previous_point, current_section, in_subsection, subsection_size_threshold):
	bool_first_point = (in_subsection == False)

	if bool_first_point:
		bool_first_point_in_new_section = True
		bool_point_is_after_large_gap = 'NA'
		bool_new_subsection = True
	else:
		if current_section != section_for_previous_point:
			bool_first_point_in_new_section = True
		else:
			bool_first_point_in_new_section = False

		if ((position - previous_point_in_subsection) >= subsection_size_threshold):
			bool_point_is_after_large_gap = True
		else:
			bool_point_is_after_large_gap = False

		if bool_first_point or bool_point_is_after_large_gap or bool_first_point_in_new_section:
			bool_new_subsection = True
		else:
			bool_new_subsection = False

	return bool_first_point, bool_point_is_after_large_gap, bool_first_point_in_new_section, bool_new_subsection


def estimate_fraction_of_base_reads_mapping_to_specified_gene_that_come_from_specified_gene_at_position_for_each_library(specified_gene, position,
														library_and_position_to_estimated_real_coverage_for_specified_gene_dict,
														gene_to_line_dictionary_list_for_each_position, 
														probability_read_from_gene_at_position_maps_to_gene_dict,
														native_fraction_of_base_reads_file):

	prob_read_from_specified_gene_maps_to_specified_gene = probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][position][specified_gene]

	library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict = {}
	for library in (global_row_library_list + global_column_library_list):
		#print '(%d,%s)'  % (position, library)
		if prob_read_from_specified_gene_maps_to_specified_gene == 1.0:
			# the real coverage equals the observed coverage
			library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library] = 1.0
		else:
			estimated_real_coverage_for_specified_gene = library_and_position_to_estimated_real_coverage_for_specified_gene_dict[library][position]
	
			if estimated_real_coverage_for_specified_gene == 'NA':
				# the code did not return an estimate of the real coverage, so estimate native fraction == 1.0 by default
				library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library] = 1.0
			else:
				# use the estimate of the real coverage to estimate the native fraction of base reads mapping to the specified gene
				coverage_for_specified_gene_in_current_library = get_coverage_of_library_at_position_of_gene(specified_gene, position, library, 
															     gene_to_line_dictionary_list_for_each_position)
				if coverage_for_specified_gene_in_current_library == 'NA':
					raise IOError, 'Internal error in estimate_<native_fraction_of_base_reads_for_specified_gene>_at_position_for_each_library(): coverage = NA'
        
				#print 'Estimated real cov: ' + str(estimated_real_coverage_for_specified_gene) + '\t' + \
				#		'Obs cov:' + str(coverage_for_specified_gene_in_current_library) + '\t' + \
				#		'Prob spec gene => spec gene: ' + str(prob_read_from_specified_gene_maps_to_specified_gene)

				library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library] = \
						(float(estimated_real_coverage_for_specified_gene * prob_read_from_specified_gene_maps_to_specified_gene) / \
						 float(coverage_for_specified_gene_in_current_library))

		# record estimated native fraction of base reads
		native_fraction_of_base_reads_file.write(specified_gene + '\t' + str(position) + '\t' + library + '\t' + \
					str(library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library]) + '\n')

		if library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library] > 1.0:
			print 'Warning:  native fraction of base reads exceeded 1.0, so it is being reset to 1.0 (%s, %d, %s)' % (specified_gene, position, library)
			library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library] = 1.0
		elif library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library] < 0:
			raise IOError, 'Internal error: native fraction of base reads should not be less than 0'

	return library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict

def estimate_fraction_of_base_reads_mapping_to_specified_gene_that_come_from_specified_gene_at_position_for_each_library_old(position, specified_gene, homolog_list, 
									gene_to_line_dictionary_list_for_each_position, probability_read_from_gene_at_position_maps_to_gene_dict,
									gene_to_homolog_alignment_dict, length_of_tilling_sequence_for_specified_gene,
									native_fraction_of_base_reads_file):
	gene_list = homolog_list + [specified_gene]

	# Only consider genes that the specified gene could map to,
	# or the genes that <those> genes could map to,
	# or the genes that <those> genes could map to,
	# etc.
	gene_list = determine_list_of_genes_associated_with_specified_gene_including_specified_gene(specified_gene, homolog_list,
												    probability_read_from_gene_at_position_maps_to_gene_dict,
												    position)

	number_of_genes = len(gene_list)

	#print 'DEBUG: Position %d of specified gene %s, at library %s:'
	print 'DEBUG: Gene list: ' + str(gene_list) + '\n'
	print 'DEBUG: position: %d\n' % position

	read_mapping_probability_matrix = []#[[0] * (number_of_genes + 1)] * number_of_genes
	for row in range(0, number_of_genes):
		to_gene = gene_list[row]
		read_mapping_probability_matrix.append([])
		for col in range(0, number_of_genes):
			from_gene = gene_list[col]
			if from_gene in homolog_list:
				position_in_from_gene = gene_to_homolog_alignment_dict[specified_gene][from_gene][position]
			else:
				position_in_from_gene = position
			# get the probability that a read from the from_gene at this position maps to the to_gene
			read_mapping_probability_matrix[row].append(probability_read_from_gene_at_position_maps_to_gene_dict[from_gene][position_in_from_gene][to_gene])
			#DEBUG_print_matrix(read_mapping_probability_matrix)
			print 'DEBUG: There is %.3f%% probability that a read from %s (col index %d) maps to %s (row index %d)\n' % \
					(probability_read_from_gene_at_position_maps_to_gene_dict[from_gene][position_in_from_gene][to_gene] * 100.0,
					 from_gene, col, to_gene, row)
		read_mapping_probability_matrix[row].append('NA')
	
	#print 'DEBUG: read_mapping probability_matrix:'
	#DEBUG_print_matrix(read_mapping_probability_matrix)

	gene_to_coverage_for_each_library_dict = {}
	first_iteration = True
	for gene in gene_list:
		library_list = []
		gene_to_coverage_for_each_library_dict[gene] = {}
		if gene in homolog_list:
			position_in_gene = gene_to_homolog_alignment_dict[specified_gene][gene][position]
		else:
			position_in_gene = position
		for line_dictionary in gene_to_line_dictionary_list_for_each_position[gene][position_in_gene]:
			library = line_dictionary["library"]
			coverage = int(line_dictionary["coverage"])
			gene_to_coverage_for_each_library_dict[gene][library] = coverage
		 	library_list.append(library)
		library_list.sort()
		if first_iteration:
			first_library_list = [library for library in library_list]
			first_iteration = False
		else:
			if library_list != first_library_list:
				raise IOError, 'Error: found different library list for genes %s and %s' % (gene_list[0], gene)
	
	#library_to_real_number_of_base_reads_that_come_from_specified_gene_dict = {}
	library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict = {}
	for library in library_list:
		# plug in the coverage of each gene, in the current library:
		for i in range(0, len(gene_list)):

			# get corresponding position in each gene
			gene = gene_list[i]

			############################  REDUNDANT CODE ###############################################
			#if gene in homolog_list:
			#	position_in_gene = gene_to_homolog_alignment_dict[specified_gene][gene][position]
			#else:
			#	position_in_gene = position
			# get the coverage for the current gene in the current library
			#line_dictionary_list_for_position_of_gene = gene_to_line_dictionary_list_for_each_position[gene][position_in_gene]
			#line_dictionary_list_for_current_library_at_position_of_gene = subset_line_dictionary_list(line_dictionary_list_for_position_of_gene, "library", [library])
			#if len(line_dictionary_list_for_current_library_at_position_of_gene) != 1:
			#	raise IOError, 'Error: There should be only one line_dictionary matching the current library, gene, position combination: (%s, %s, %d)' % \
			#			(library, gene, position_in_gene)
			#line_dictionary_for_current_library_at_position_of_gene = line_dictionary_list_for_current_library_at_position_of_gene[0]
			#coverage_for_current_gene_in_current_library = line_dictionary_for_current_library_at_position_of_gene["coverage"]
			############################################################################################

			coverage_for_current_gene_in_current_library = gene_to_coverage_for_each_library_dict[gene][library]
			if gene == specified_gene:
				coverage_for_specified_gene_in_current_library = coverage_for_current_gene_in_current_library

			read_mapping_probability_matrix[i][number_of_genes] = coverage_for_current_gene_in_current_library
		
		print 'DEBUG: read_mapping probability_matrix (lib=%s)' % library
		DEBUG_print_matrix(read_mapping_probability_matrix)

		bool_matrix_is_symmetric = matrix_is_symmetric(read_mapping_probability_matrix, number_of_genes)

		method_of_determining_native_fraction_of_base_reads = 'NA'
		bool_computation_successful = 'NA'

		if len(gene_list) == 1:
			# reads covering this position of the specified gene do not map to any of the homologs
			library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library] = 1.0
			method_of_determining_native_fraction_of_base_reads = 'known'

		else:
			if bool_matrix_is_symmetric == False:
				real_coverage_for_specified_gene = \
						estimate_real_coverage_for_library_at_position_of_specified_gene_using_backup_method(
												specified_gene, position, library,
												probability_read_from_gene_at_position_maps_to_gene_dict,
												gene_to_line_dictionary_list_for_each_position,
												length_of_tilling_sequence_for_specified_gene)
				method_of_determining_native_fraction_of_base_reads = 'backup'

			else:
				# solve linear system of equations for the current library
				num_equations = number_of_genes
				num_variables = number_of_genes
        
				# computation may fail if the linear equations are not independent, or if there is no solution
				bool_computation_successful, real_coverage_for_gene_list = \
						solve_NxN_system_of_independent_linear_equations_by_gaussian_elimination(copy_2D_matrix(read_mapping_probability_matrix), num_equations, num_variables)
        
				if bool_computation_successful == False:
					real_coverage_for_specified_gene = \
							estimate_real_coverage_for_library_at_position_of_specified_gene_using_backup_method(
													specified_gene, position, library,
													probability_read_from_gene_at_position_maps_to_gene_dict,
													gene_to_line_dictionary_list_for_each_position,
													length_of_tilling_sequence_for_specified_gene)
					method_of_determining_native_fraction_of_base_reads = 'backup'
        
				else:
					print 'Estimated real coverage for genes (linear eq solver): '
					for i in range(0, len(gene_list)):
						print '%s: %f' % (gene_list[i], real_coverage_for_gene_list[i])

					# get estimated real number of base reads that come from specified gene
					real_coverage_for_specified_gene = 'NA'
					for i in range(0, len(gene_list)):
						gene = gene_list[i]
						if gene == specified_gene:
							real_coverage_for_specified_gene = real_coverage_for_gene_list[i]
					method_of_determining_native_fraction_of_base_reads = 'system_of_linear_equations'

			print 'real coverage for specified gene: %d' % real_coverage_for_specified_gene
        
			if real_coverage_for_specified_gene == 'NA':
				raise IOError, 'Internal error: real_coverage_for_specified_gene was not determined'
        
			prob_read_from_specified_gene_maps_to_specified_gene = probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][position][specified_gene]
			print 'P(to spec gene | from spec gene) = %f' % prob_read_from_specified_gene_maps_to_specified_gene
        
			library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library] = \
					(float(real_coverage_for_specified_gene * prob_read_from_specified_gene_maps_to_specified_gene) / \
					 float(coverage_for_specified_gene_in_current_library))
			print 'Native fraction=%f' % library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library]	
        
		native_fraction_of_base_reads_file.write(specified_gene + '\t' + str(position) + '\t' + library + '\t' + \
				str(library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict[library]) + '\t' + \
				method_of_determining_native_fraction_of_base_reads + '\t' + \
				'Sym=' + str(bool_matrix_is_symmetric) + '\t' + 'Success=' + str(bool_computation_successful) + '\n')

	return library_to_native_fraction_of_base_reads_mapping_to_specified_gene_dict

# Miscellaneous functions:
def copy_2D_matrix(matrix):
	return [[elt for elt in row] for row in matrix]

def get_coverage_of_library_at_position_of_gene(gene, position, library, gene_to_line_dictionary_list_for_each_position):
	coverage = 'NA'
	line_dictionary_list = gene_to_line_dictionary_list_for_each_position[gene][position]
	for line_dictionary in line_dictionary_list:
		current_library = line_dictionary["library"]
		if current_library == library:
			coverage = int(line_dictionary["coverage"])
	return coverage


# Get a list of genes containing:
#	(1) the specified gene
#	(2) all homologs that are directly or indirectly associated with the specified gene,
#	via mapping of reads covering the current position
def determine_list_of_genes_associated_with_specified_gene_including_specified_gene(specified_gene, homolog_list, probability_read_from_gene_at_position_maps_to_gene_dict, position):
	queue_gene_list = [specified_gene]
	queue_idx = 0
	while queue_idx < len(queue_gene_list):
		current_gene = queue_gene_list[queue_idx]
		for homolog in homolog_list:
			if probability_read_from_gene_at_position_maps_to_gene_dict[current_gene][position][homolog] != 0.0:
				if not (homolog in queue_gene_list):
					queue_gene_list.append(homolog)
		queue_idx += 1
	return queue_gene_list

# do not consider the "observed coverage" column
def matrix_is_symmetric(rmp_matrix, number_of_genes):
	for i in range(0, number_of_genes):
		for j in range(i, number_of_genes):
			if rmp_matrix[i][j] != rmp_matrix[j][i]:
				print 'Found not-symmetric matrix (here we are including the "observed coverage" column in the printout): '
				DEBUG_print_matrix(rmp_matrix)
				return False
	return True


def estimate_real_coverage_for_library_at_position_of_specified_gene_using_backup_method(specified_gene, position, library,
												probability_read_from_gene_at_position_maps_to_gene_dict,
												gene_to_line_dictionary_list_for_each_position,
												length_of_tilling_sequence_for_specified_gene):
	low_position = position
	high_position = position

	found_low_position = False
	found_high_position = False

	while low_position >= 1:
		if probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][low_position][specified_gene] == 1.0:
			found_low_position = True
			break
		low_position -= 1
	while high_position <= length_of_tilling_sequence_for_specified_gene:
		if probability_read_from_gene_at_position_maps_to_gene_dict[specified_gene][high_position][specified_gene] == 1.0:
			found_high_position = True
			break
		high_position += 1

	observed_coverage_at_current_position = get_coverage_of_library_at_position_of_gene(specified_gene, position, library, gene_to_line_dictionary_list_for_each_position)
	if (found_low_position == False) or (found_high_position == False):
		print 'Warning: our estimation failed for (%s,%d,%s) so we assume that the real coverage is equal to the observed coverage' % (specified_gene, position, library)
		estimated_real_coverage_at_current_position = observed_coverage_at_current_position
	else:
		real_coverage_at_low_position = get_coverage_of_library_at_position_of_gene(specified_gene, low_position, library, gene_to_line_dictionary_list_for_each_position)
		real_coverage_at_high_position = get_coverage_of_library_at_position_of_gene(specified_gene, high_position, library, gene_to_line_dictionary_list_for_each_position)
		estimated_real_coverage_at_current_position = float(real_coverage_at_low_position + real_coverage_at_high_position) / float(2.0)
		print 'rCovLow=%d, rCovHigh=%d, rCovEst=%f, obsCov=%d' % (real_coverage_at_low_position, real_coverage_at_high_position, estimated_real_coverage_at_current_position, \
				observed_coverage_at_current_position)

	return estimated_real_coverage_at_current_position

# Old function:
def correct_for_perfect_homolog_match(rmp_matrix, gene_list):
	all_homologs_match = False
	combine_homologs = False

	number_of_genes = len(gene_list)
	if number_of_genes == 3:
		if (rmp_matrix[0][0] == rmp_matrix[0][1]) and (rmp_matrix[0][1] == rmp_matrix[0][2]):
			# all three homologs match in this region
			all_homologs_match = True
		elif (rmp_matrix[0][0] == rmp_matrix[0][1]):
			gene_x1_idx, gene_x2_idx, gene_y_idx = 0, 1, 2
			combine_homologs = True
		elif (rmp_matrix[0][0] == rmp_matrix[0][2]):
			gene_x1_idx, gene_x2_idx, gene_y_idx = 0, 2, 1
			combine_homologs = True
		elif (rmp_matrix[1][1] == rmp_matrix[1][2]):
			gene_x1_idx, gene_x2_idx, gene_y_idx = 1, 2, 0
			combine_homologs = True
		else:
			# no pre-processing is needed
			0

		if combine_homologs == True:
			new_rmp_matrix, new_gene_list, new_number_of_genes = \
					combine_homologs_x1_and_x2_in_the_case_of_three_homologs(rmp_matrix, gene_list, gene_x1_idx, gene_x2_idx, gene_y_idx)
	elif number_of_genes == 2:
		if (rmp_matrix[0][0] == rmp_matrix[0][1]):
			# the two homologs match perfectly
			all_homologs_match = True
		else:
			# no pre-processing is needed
			0
	elif (number_of_genes == 0) or (number_of_genes == 1):
		# this should not happen
		raise IOError, 'Internal error: gene list should not have length 1 or 0 at this point'
	else:
		raise IOError, 'Internal error: correct_for_perfect_homolog_match() does not work for more than three genes'

	if all_homologs_match:
		new_rmp_matrix, new_gene_list, new_number_of_genes = 'NA', 'NA', 'NA'
	elif combine_homologs:
		0
	else:
		new_rmp_matrix, new_gene_list, new_number_of_genes = rmp_matrix, gene_list, number_of_genes

	return all_homologs_match, new_rmp_matrix, new_gene_list, new_number_of_genes

# Old function:
def combine_homologs_x1_and_x2_in_the_case_of_three_homologs(rmp_matrix, gene_list, gene_x1_idx, gene_x2_idx, gene_y_idx):
	## Combine the data for homologs x1 and x2, leaving homolog y separate
	gene_x1 = gene_list[gene_x1_idx]
	gene_x2 = gene_list[gene_x2_idx]
	gene_y = gene_list[gene_y_idx]
	
	merged_gene = gene_x1 + '\t' + gene_x2
	new_gene_list = [merged_gene, gene_y]
	new_number_of_genes = len(new_gene_list)

	new_rmp_matrix = [[0 for i in range(0, (new_number_of_genes + 1))] for i in range(0, new_number_of_genes)]
	
	# probability that read from {x1, x2} maps to {x1, x2}:
	new_rmp_matrix[0][0] = 2 * rmp_matrix[gene_x1_idx][gene_x1_idx]

	# probability that read from y maps to y
	new_rmp_matrix[1][1] = rmp_matrix[gene_y_idx][gene_y_idx]

	# probability that read from {x1, x2} maps to y
	new_rmp_matrix[1][0] = 1 - new_rmp_matrix[0][0]
	
	# probability that read from y maps to {x1, x2}
	new_rmp_matrix[0][1] = 1 - new_rmp_matrix[1][1]

	# set combined coverage for {x1, x2} and coverage for y
	new_rmp_matrix[0][2] = rmp_matrix[gene_x1_idx][3] + rmp_matrix[gene_x2_idx][3]
	new_rmp_matrix[1][2] = rmp_matrix[gene_y_idx][3]
			
	return new_rmp_matrix, new_gene_list, new_number_of_genes

# Old function:
def correct_real_coverage_for_gene_list_for_a_single_perfect_homolog_match(real_coverage_for_gene_list_after_merge, gene_list_after_merge):
	final_gene_list = []
	real_coverage_for_final_gene_list = []
	for i in range(0, len(gene_list_after_merge)):
		current_gene = gene_list_after_merge[i]
		real_coverage = real_coverage_for_gene_list_after_merge[i]
		if '\t' in current_gene:
			if len(current_gene.split('\t')) != 2:
				raise IOError, 'Found instance where more than two genes have been merged: \'%s\'' % current_gene
			[geneA, geneB] = current_gene.split('\t')
			final_gene_list.append(geneA)
			final_gene_list.append(geneB)
			real_coverage_for_final_gene_list.append(float(real_coverage) / float(2.0))
			real_coverage_for_final_gene_list.append(float(real_coverage) / float(2.0))
		else:
			final_gene_list.append(current_gene)
			real_coverage_for_final_gene_list.append(real_coverage)
	return real_coverage_for_final_gene_list, final_gene_list

def solve_NxN_system_of_independent_linear_equations_by_gaussian_elimination(matrix, num_equations, num_variables):
	""" Matrix is of type:
	   a b c q
	[[ 2 3 4 0]	eq1
	 [ 2 1 4 1]	eq2
	 [ 2 1 2 -1]]	eq3
	
	where for each equation:
		ax + by + cz = q
	
	---------------------------------------------------------------------------
	Limitations:
		The number of unknowns must be equal to the number of equations,
	and the equations must be independent of each other.
	---------------------------------------------------------------------------
	"""

	# do error checking on parameters
	test_parameters_for_system_of_linear_equations_solver(matrix, num_equations, num_variables)
	
	print 'In linear equation solver code: '
	DEBUG_print_matrix(matrix)

	# forward substitution
	bool_found_error = False
	error_message = ''
	for equation_index in range(0, (num_equations - 1)):
		best_row_index = find_row_with_maximum_absolute_value_for_column(equation_index, num_equations - 1, equation_index, matrix)
		# swap the best row with the current row
		swap_rows(best_row_index, equation_index, matrix)

		pivot_equation = [element for element in matrix[equation_index]]
		pivot_element = pivot_equation[equation_index]
		if pivot_element == 0.0:
			bool_found_error = True
			error_message = ('Internal error: pivot_element of equation %d is 0.0 -- cannot determine value of variable %d' % (equation_index, equation_index))
			break
		for row_index in range(equation_index + 1, num_equations):
			# eliminate the variable at equation_index
			leftmost_nonzero_element_in_current_row = float(matrix[row_index][equation_index])
			for i in range(0, len(pivot_equation)):
				print 'DEBUG: pivot elt normalization: ' + str(leftmost_nonzero_element_in_current_row / float(pivot_element)) \
						+ '\tassociated value from pivot equation: ' + str(float(pivot_equation[i])) \
						+ '\tvalue to subtract: ' + str(float(pivot_equation[i]) / float(pivot_element) * leftmost_nonzero_element_in_current_row)
				matrix[row_index][i] -= float(pivot_equation[i]) / float(pivot_element) * leftmost_nonzero_element_in_current_row

			# check that the constant for the variable at equation_index has been zeroed out
			if matrix[row_index][equation_index] != 0.0:
				bool_found_error = True
				error_message = ('Internal error: the variable at index %d did not cancel correctly in equation %d(zero_indexed)' % (equation_index, row_index))
				break
		#DEBUG_print_matrix(matrix)
		if bool_found_error:
			break
	
	if matrix[num_equations - 1][num_equations - 1] == 0.0:
		if matrix[num_equations - 1][num_equations] != 0.0:
			bool_found_error = True
			error_message = ('Internal error: found a contradiction [ %f * X_n == %f ] on the last equation' % \
					 (float(matrix[num_equations - 1][num_equations - 1]), float(matrix[num_equations - 1][num_equations])))
		else:
			bool_found_error = True
			error_message = 'Internal error: the last equation cancelled out -- cannot determine value of the last variable'
	
	if bool_found_error:
		value_of_variable_list = []
	else:
		# backward substitution
		value_of_variable_list = [0 for i in range(0, num_variables)]
        
		equation_indices_in_reverse_order = range(0, num_equations)
		equation_indices_in_reverse_order.reverse()
		
		right_hand_side_index = num_variables
        
		for equation_index in equation_indices_in_reverse_order:
			# solve for unknown variable
			right_hand_side_constant = matrix[equation_index][right_hand_side_index]
			weight_of_current_variable = matrix[equation_index][equation_index]
			weighted_sum_of_remaining_variables = 0.0
			for i in range(0, (num_variables - (equation_index + 1))):
				weighted_sum_of_remaining_variables += matrix[equation_index][(equation_index + 1) + i] * value_of_variable_list[(equation_index + 1) + i]
        
			value_of_variable_list[equation_index] = float(right_hand_side_constant - weighted_sum_of_remaining_variables) / float(weight_of_current_variable)
	
	print 'value_of_variable_list: %s' + str(value_of_variable_list)
	bool_computation_successful = (not bool_found_error)
	print 'Found error: %s, Success: %s' % (str(bool_found_error), str(bool_computation_successful))
	return bool_computation_successful, value_of_variable_list

def test_parameters_for_system_of_linear_equations_solver(matrix, num_equations, num_variables):
	if num_equations != num_variables:
		raise IOError, 'Error in system of linear equations solver: number of equations must equal number of variables'
	if num_equations <= 0:
		raise IOError, 'A positive, non-zero number of linear equations must be submitted'

	# test number of equations
	if len(matrix) != num_equations:
		raise IOError, 'The number of equations in the matrix (%d) does not match the specified value of the variable \'num_equations\' (%d)' % \
				(len(matrix), num_equations)

	# test number of variables
	for equation_index in range(0, num_equations):
		if len(matrix[equation_index]) != (num_variables + 1):
			raise IOError, 'The length of the list for equation %d must be one plus the number of unknown variables in the system of linear equations' % equation_index

	return

def find_row_with_maximum_absolute_value_for_column(start_row, stop_row, column, matrix):
	""" returns 'NA' if there are no rows in the row range """
	maximum_so_far = 'NA'
	best_row = 'NA'
	for row in range(start_row, stop_row + 1):
		if (maximum_so_far == 'NA') or (abs(matrix[row][column]) > maximum_so_far):
			maximum_so_far = abs(matrix[row][column])
			best_row = row
	return best_row

def swap_rows(i, j, matrix):
	temp = matrix[i]
	matrix[i] = matrix[j]
	matrix[j] = temp
	return

def DEBUG_print_matrix(matrix):
	print 'DEBUG: Printing matrix:'
	for i in range(0, len(matrix)):
		for j in range(0, len(matrix[i])):
			sys.stdout.write(str(matrix[i][j]) + '\t' )
		sys.stdout.write('\n')
	return

	# ----- MUST ------ test for linear independence

	

#	CONST = 100
#	low_position = position
#	while probability_read_at_position_maps_to_gene_dict[low_position][gene_name] != 1.0:
#		low_position -= 1
#		if (low_position == 0) or ((position - low_position) > CONST):
#			# we ran off the end, or we have gone too far
#			low_position = 'NA'
#			break
#
#	high_position = position
#	while probability_read_at_position_maps_to_gene_dict[high_position][gene_name] != 1.0:
#		high_position += 1
#		if (high_position > len(global_tilling_sequence)) or ((high_position - position) > CONST):
#			# we ran off the end, or we have gone too far
#			high_position = 'NA'
#			break
#	
#	if (low_position == 'NA') or (high_position == 'NA'):
#		raise IOError, 'Found position at which real number of base reads for gene %s may be difficult to determine'
#	else:

	

def get_copy_of_line_dictionary_list(line_dictionary_list):
	copy_of_line_dictionary_list = [get_copy_of_line_dictionary(line_dictionary) for line_dictionary in line_dictionary_list]
	return copy_of_line_dictionary_list

def get_copy_of_line_dictionary(line_dictionary):
	line_dictionary_field_list = global_header_field_list
	copy_of_line_dictionary = {}
	for key in line_dictionary:
		value_string = line_dictionary[key]
		if not isinstance(value_string, str):
			raise IOError, 'The code in the function get_copy_of_line_dictionary() assumes that all the values in the line dictionary ' + \
					'are of type str (and are therefore immutable).'
		copy_of_line_dictionary[key] = value_string
	return copy_of_line_dictionary

def add_line_dictionary_list_to_combined_line_dictionary_list(combined_line_dictionary_list_for_current_position,
								line_dictionary_list_for_next_homolog_at_current_position, homolog):
	for i in range(0, global_number_of_libraries):
		library_to_add = combined_line_dictionary_list_for_current_position[i]["library"]
		line_dictionary_to_add = {}
		found_line_dictionary_to_add = False
		for line_dictionary in line_dictionary_list_for_next_homolog_at_current_position:
			if line_dictionary["library"] == library_to_add:
				line_dictionary_to_add = line_dictionary
				found_line_dictionary_to_add = True
				break
		if found_line_dictionary_to_add == False:
			raise IOError, 'Error: could not find line dictionary for library %s of homolog %s' % (library_to_add, homolog)
		add_line_dictionary_to_combined_line_dictionary(combined_line_dictionary_list_for_current_position[i], line_dictionary_to_add)
	return

def add_line_dictionary_to_combined_line_dictionary(combined_line_dictionary, line_dictionary_to_add):
	""" Combining each field of the two line dictionaries is structured into three runs,
	where each run updates a different set of fields.

	The first run updates fields in combined_line_dictionary that depend on
	the original values of other fields in both combined_line_dictionary and
	line_dictionary_to_add, that are updated in the second run.
	The fields updated in the first run are called "primary" fields.
	They are updated in the first run because the original values of the
	secondary fields that they are based on are guarenteed to be intact.

	The second run updates fields in combined_line_dictionary that depend on
	the original values of the same field in both combined_line_dictionary and
	line_dictionary_to_add, or just on the original value of the same field in
	combined_line_dictionary.

	The third run updates fields in combined_line_dictionary that depend on 
	the updated values of other fields in combined_line_dictionary that were
	updated in the second or first runs.
	"""

	if global_header_field_list != ["refseq", "position", "library", "refbase", "A", "a", "T", "t", "C", "c", "G", "g", "comma", "dot", "coverage", \
					"FrAa", "FrTt", "FrCc", "FrGg", "SkewA", "SkewT", "SkewC", "SkewG", "MQ(Aa)", "MQ(Tt)", "MQ(Cc)", "MQ(Gg)", \
					"MQ(CommaDot)", "MQ(all)", "deltaQ-A", "deltaQ-T", "deltaQ-C", "deltaQ-G", \
					"Aa_HQ_and_LQ",	"Tt_HQ_and_LQ",	"Cc_HQ_and_LQ",	"Gg_HQ_and_LQ",	"ref_HQ_and_LQ", \
					"FrHQ_for_Aa", "FrHQ_for_Tt", "FrHQ_for_Cc", "FrHQ_for_Gg", "FrHQ_for_ref", "quality_cutoff"]:
		raise IOError, "global_header_field_list was modified: it is recommended that you change the function\n" + \
				"add_line_dictionary_to_combined_line_dictionary() to adjust to the change.\n" + \
				"Then, please adjust the condition for this error accordingly."

	bases_uppercase = ['A', 'T', 'C', 'G']
	bases_lowercase = ['a', 't', 'c', 'g']
	bases_upper_and_lowercase = ['Aa', 'Tt', 'Cc', 'Gg']

	# These fields depend on secondary fields from both "combined_line_dictionary" and "line_dictionary_to_add"
	# so they must be set updated before the secondary fields for "combined_line_dictionary" are updated:
	primary_fields_list = ["MQ(Aa)", "MQ(Tt)", "MQ(Cc)", "MQ(Gg)", "MQ(CommaDot)", "MQ(all)"]

	# These fields depend directly on updated primary and secondary fields
	tertiary_fields_list = ["FrAa", "FrTt", "FrCc", "FrGg", "SkewA", "SkewT", "SkewC", "SkewG", "FrHQ_for_Aa", "FrHQ_for_Tt", "FrHQ_for_Cc", "FrHQ_for_Gg", "FrHQ_for_ref", \
					"deltaQ-A", "deltaQ-T", "deltaQ-C", "deltaQ-G"]

	# compute fields that depend on secondary fields from both "combined_line_dictionary" and "line_dictionary_to_add"
	for field in primary_fields_list:
		if (len(field) >= 4) and (field[0:3] == "MQ(") and (field[len(field)-1] == ')'):
			middle_term = field[3:(len(field)-1)]
			if middle_term in bases_upper_and_lowercase:
				base = middle_term[0]
				base_lc = middle_term[1]
				update_MQNn_for_combined_line_dictionary(combined_line_dictionary, line_dictionary_to_add, base, base_lc, field)
			elif middle_term == "CommaDot":
				update_MQref_for_combined_line_dictionary(combined_line_dictionary, line_dictionary_to_add, field)
			elif middle_term == "all":
				update_MQall_for_combined_line_dictionary(combined_line_dictionary, line_dictionary_to_add, field)
			else:
				raise IOError, 'Unrecognized middle term in field: \'%s\'' % field
		else:
			raise IOError, 'Unrecognized field: \'%s\'; should be of the form \'MQ(<something>)\''

	# update secondary fields for "combined_line_dictionary"
	for field in global_header_field_list:
		if (field in tertiary_fields_list) or (field in primary_fields_list):
			continue
		if field in ["refseq", "quality_cutoff"]:
			combined_line_dictionary[field] += '+' + line_dictionary_to_add[field]
		elif field in ["position", "refbase"]:
			"do nothing"
		elif field == "library":
			if combined_line_dictionary[field] != line_dictionary_to_add[field]:
				raise IOError, 'Tried to combine two line dictionaries with different ' + field
		elif is_integer_field_to_add(field, bases_uppercase, bases_lowercase):	# calls function is_integer_field_to_add()
			combined_line_dictionary[field] = str(int(combined_line_dictionary[field]) + int(line_dictionary_to_add[field]))
		else:
			raise IOError, 'Unrecognized field: \'' + field + '\''
	
	# compute fields that depend on updated primary and secondary fields
	for field in tertiary_fields_list:
		if (len(field) == 4) and (field[0:2] == "Fr"):
			# update frequency of base change
			base = field[2]
			update_FrNn_for_combined_line_dictionary(combined_line_dictionary, base, field)
		elif (len(field) == 5) and (field[0:4] == "Skew"):
			# update Skew
			base = field[4]
			update_Skew_for_combined_line_dictionary(combined_line_dictionary, base, field)
		elif (len(field) >= 11) and (field[0:9] == "FrHQ_for_"):
			# update FrHQ
			suffix = field[9:]
			if suffix in bases_upper_and_lowercase:
				base = suffix[0]
				update_FrHQ_for_combined_line_dictionary(combined_line_dictionary, base, field)
			elif suffix == "ref":
				update_FrHQ_for_reference_in_combined_line_dictionary(combined_line_dictionary, field)
			else:
				raise IOError, 'Unrecognized suffix after \'FrHQ_for_\' in field: \'%s\'' % field
		elif (len(field) == 8) and (field[0:7] == "deltaQ-"):
			# update deltaQ-N
			base = field[7]
			update_FrHQ_for_base_in_combined_line_dictionary(combined_line_dictionary, base, field)
		else:
			raise IOError, 'Unrecognized field: \'' + field + '\''
	return

def is_integer_field_to_add(field, bases_uppercase, bases_lowercase):
	is_HQ_and_LQ_field = (len(field) >= 12) and (field[(len(field)-10):len(field)] == "_HQ_and_LQ")
	return ((field in bases_uppercase) or (field in bases_lowercase) or (field in ["comma", "dot", "coverage"]) or is_HQ_and_LQ_field)

def update_MQNn_for_combined_line_dictionary(combined_line_dictionary, line_dictionary_to_add, base, base_lc, field):
	Nn_combined = int(combined_line_dictionary[base]) + int(combined_line_dictionary[base_lc])
	MQNn_combined = float(combined_line_dictionary["MQ(" + base + base_lc + ")"])
	Nn_dict_to_add = int(line_dictionary_to_add[base]) + int(line_dictionary_to_add[base_lc])
	MQNn_dict_to_add = float(line_dictionary_to_add["MQ(" + base + base_lc + ")"])
	MQNn = (MQNn_combined * Nn_combined) + (MQNn_dict_to_add * Nn_dict_to_add)
	combined_line_dictionary["MQ(" + base + base_lc + ")"] = str(MQNn)
	return

def update_MQall_for_combined_line_dictionary(combined_line_dictionary, line_dictionary_to_add, field):
	all_combined = int(combined_line_dictionary["coverage"])
	MQall_combined = float(combined_line_dictionary["MQ(all)"])
	all_dict_to_add = int(line_dictionary_to_add["coverage"])
	MQall_dict_to_add = float(line_dictionary_to_add["MQ(all)"])
	MQall = (MQall_combined * all_combined) + (MQall_dict_to_add * all_dict_to_add)
	combined_line_dictionary["MQ(all)"] = str(MQall)
	return

def update_MQref_for_combined_line_dictionary(combined_line_dictionary, line_dictionary_to_add, field):
	ref_combined = int(combined_line_dictionary["comma"]) + int(combined_line_dictionary["dot"])
	MQref_combined = float(combined_line_dictionary["MQ(CommaDot)"])
	ref_dict_to_add = int(line_dictionary_to_add["comma"]) + int(line_dictionary_to_add["dot"])
	MQref_dict_to_add = float(line_dictionary_to_add["MQ(CommaDot)"])
	MQref = (MQref_combined * ref_combined) + (MQref_dict_to_add * ref_dict_to_add)
	combined_line_dictionary["MQ(CommaDot)"] = str(MQref)
	return

def update_FrNn_for_combined_line_dictionary(combined_line_dictionary, base, field):
	if not (base in ['A', 'C', 'T', 'G']):
		raise IOError, 'Third character in field \'%s\' is not uppercase A, C, T, or G' % field
	base_lc = base.lower()
	num_base_changes = int(combined_line_dictionary[base]) + int(combined_line_dictionary[base_lc])
	coverage = int(combined_line_dictionary["coverage"])
	frequency_of_base_change = float(num_base_changes) / float(coverage)
	combined_line_dictionary[field] = str(frequency_of_base_change)
	return

def update_Skew_for_combined_line_dictionary(combined_line_dictionary, base, field):
	if not (base in ['A', 'C', 'T', 'G']):
		raise IOError, 'Fifth character in field \'%s\' is not uppercase A, C, T, or G' % field
	base_lc = base.lower()
	num_base_changes_forward = int(combined_line_dictionary[base])
	num_base_changes_reverse = int(combined_line_dictionary[base_lc])
	num_reference_forward = int(combined_line_dictionary["comma"])
	num_reference_reverse = int(combined_line_dictionary["dot"])
	skew_modified = float((num_base_changes_forward + 1) * (num_reference_reverse + 1)) / float((num_base_changes_reverse + 1) * (num_reference_forward + 1))
	combined_line_dictionary[field] = str(skew_modified)
	return

def update_FrHQ_for_combined_line_dictionary(combined_line_dictionary, base, field):
	if not (base in ['A', 'C', 'T', 'G']):
		raise IOError, 'Tenth character in field \'%s\' is not uppercase A, C, T, or G' % field
	base_lc = base.lower()

	if combined_line_dictionary[base + base_lc + "_HQ_and_LQ"] == 0:
		combined_line_dictionary[field] = 'NA'
	else:
		HQ_reads_for_base = int(combined_line_dictionary[base]) + int(combined_line_dictionary[base_lc])
		total_reads_for_base = int(combined_line_dictionary[base + base_lc + "_HQ_and_LQ"])
		frequency_of_high_quality_reads = float(HQ_reads_for_base) / float(total_reads_for_base)
		combined_line_dictionary[field] = str(frequency_of_high_quality_reads)
	return

def update_FrHQ_for_reference_in_combined_line_dictionary(combined_line_dictionary, field):
	if combined_line_dictionary["ref_HQ_and_LQ"] == 0:
		combined_line_dictionary[field] = 'NA'
	else:
		HQ_reference_reads = int(combined_line_dictionary["comma"]) + int(combined_line_dictionary["dot"])
		total_reference_reads = int(combined_line_dictionary["ref_HQ_and_LQ"])
		frequency_of_high_quality_reads = float(HQ_reference_reads) / float(total_reference_reads)
		combined_line_dictionary[field] = str(frequency_of_high_quality_reads)
	return

def update_FrHQ_for_base_in_combined_line_dictionary(combined_line_dictionary, base, field):
	if not (base in ['A', 'C', 'T', 'G']):
		raise IOError, 'Base in \'deltaQ-N\' field[%s] must be uppercase A, C, T, or G' % field
	base_lc = base.lower()
	combined_line_dictionary[field] = str(float(combined_line_dictionary["MQ(" + base + base_lc + ")"]) - float(combined_line_dictionary["MQ(CommaDot)"]))
	return

def evaluate_position_in_gene(line_dictionary_list_for_current_position, position,
				library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
				candidate_list_for_gene,
				number_of_candidate_mutations_per_position_to_frequency_dict, estimated_prob_refbase_read_as_not_refbase_dict,
				prob_of_each_base_change_on_refbase_looks_reasonable_dict,
				output_option):

	predict_mutation_at_position = False

	##### 3-D pooling not yet implemented for methods besides bayesian method: #########
	if global_use_3D_pooling and (global_method_to_use != 'bayesian'):
		raise IOError, '3-D pooling is not yet implemented for methods besides bayesian method'

	if global_method_to_use == 'poisson2':
		mutation_base = 'N'
		predict_mutation_at_position = \
				evaluate_position_in_gene_for_mutation_to_specific_base__poisson_method_2(
									line_dictionary_list_for_current_position, position,
									mutation_base, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
									candidate_list_for_gene, 
									number_of_candidate_mutations_per_position_to_frequency_dict, estimated_prob_refbase_read_as_not_refbase_dict,
									prob_of_each_base_change_on_refbase_looks_reasonable_dict,
									output_option)
	elif global_method_to_use == 'bayesian':
		predict_mutation_at_position = \
				evaluate_position_in_gene_for_mutation__bayesian_method(line_dictionary_list_for_current_position, position,
									library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
									candidate_list_for_gene, 
									number_of_candidate_mutations_per_position_to_frequency_dict, estimated_prob_refbase_read_as_not_refbase_dict,
									prob_of_each_base_change_on_refbase_looks_reasonable_dict,
									output_option)
	else:
		for mutation_base in ['A', 'C', 'G', 'T']:
			if global_method_to_use == 'z_scores':
				predict_mutation_at_position_for_mutation_base = \
						evaluate_position_in_gene_for_mutation_to_specific_base__z_scores_method(
											line_dictionary_list_for_current_position, position,
											mutation_base, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
											candidate_list_for_gene, 
											number_of_candidate_mutations_per_position_to_frequency_dict, estimated_prob_refbase_read_as_not_refbase_dict,
											prob_of_each_base_change_on_refbase_looks_reasonable_dict,
											output_option)
			elif global_method_to_use == 'alternate':
				predict_mutation_at_position_for_mutation_base = \
						evaluate_position_in_gene_for_mutation_to_specific_base__alternate_method(
											line_dictionary_list_for_current_position, position,
											mutation_base, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
											candidate_list_for_gene, 
											number_of_candidate_mutations_per_position_to_frequency_dict, estimated_prob_refbase_read_as_not_refbase_dict,
											prob_of_each_base_change_on_refbase_looks_reasonable_dict,
											output_option)
			elif global_method_to_use == 'outlier':
				predict_mutation_at_position_for_mutation_base = \
						evaluate_position_in_gene_for_mutation_to_specific_base__outlier_method(
											line_dictionary_list_for_current_position, position,
											mutation_base, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
											candidate_list_for_gene, 
											number_of_candidate_mutations_per_position_to_frequency_dict, estimated_prob_refbase_read_as_not_refbase_dict,
											prob_of_each_base_change_on_refbase_looks_reasonable_dict,
											output_option)
			elif global_method_to_use == 'poisson':
				predict_mutation_at_position_for_mutation_base = \
						evaluate_position_in_gene_for_mutation_to_specific_base__poisson_method(
											line_dictionary_list_for_current_position, position,
											mutation_base, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
											candidate_list_for_gene, 
											number_of_candidate_mutations_per_position_to_frequency_dict, estimated_prob_refbase_read_as_not_refbase_dict,
											prob_of_each_base_change_on_refbase_looks_reasonable_dict,
											output_option)
			else:
				raise IOError, 'Internal error, unexpected method: \'%s\'' % global_method_to_use
			
			if predict_mutation_at_position_for_mutation_base:
				predict_mutation_at_position = True
	return predict_mutation_at_position



def evaluate_position_in_gene_for_mutation_to_specific_base__simple_validation_method(line_dictionary_list, position, mutation_base, 
								library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
								candidate_list_for_gene, number_of_candidate_mutations_per_position_to_frequency_dict, 
								estimated_prob_refbase_read_as_not_refbase_dict, 
								prob_of_each_base_change_on_refbase_looks_reasonable_dict,
								output_option):

	bool_evaluate_position_for_mutation_to_specific_base, reference_base, mutation_base_lc, \
		line_dictionary_list, line_dictionary_list_row_libraries, line_dictionary_list_column_libraries, \
		line_dictionary_list_d_libraries, \
		overall_SkewN \
		= perform_checks_and_preprocessing_for_evaluation_of_mutation_to_specific_base_at_position(line_dictionary_list, position, mutation_base)

	if not bool_evaluate_position_for_mutation_to_specific_base:
		return False

	## Camelina, wheat, and arabidopsis should only have G->A and C->T mutations
	if ((global_organism == "Camelina_sativa") or (global_organism == "Triticum_aestivum") or (global_organism == "Triticum_durum") or \
	    (global_organism == "arabidopsis_tetraploid")) and \
	   (not (((reference_base == 'G') and (mutation_base == 'A')) or ((reference_base == 'C') and (mutation_base == 'T')))):
		return False


	predict_mutation_at_position = False

	candidate_row_list = get_candidate_library_list__simple_validation_method(line_dictionary_list_row_libraries, mutation_base, mutation_base_lc)
	candidate_column_list = get_candidate_library_list__simple_validation_method(line_dictionary_list_column_libraries, mutation_base, mutation_base_lc)

	## Record statistics on number of candidate wells:

	number_of_candidate_mutations_per_position = len(candidate_row_list) * len(candidate_column_list)
	if not (number_of_candidate_mutations_per_position in number_of_candidate_mutations_per_position_to_frequency_dict):
		number_of_candidate_mutations_per_position_to_frequency_dict[number_of_candidate_mutations_per_position] = 0
	number_of_candidate_mutations_per_position_to_frequency_dict[number_of_candidate_mutations_per_position] += 1	


	## Handle all cases for number of good-looking rows and columns

	if (len(candidate_row_list) > 1) or (len(candidate_column_list) > 1):
		## found more than one good row or more than one good column
		return False
	elif (len(candidate_row_list) == 1) and (len(candidate_column_list) == 1):
		## well mutation candidate
		'do nothing'
	elif (len(candidate_row_list) == 1) or (len(candidate_column_list) == 1):
		## "orphan" mutation candidate
		if global_well_only:
			return False
	elif (len(candidate_row_list) == 0) and (len(candidate_column_list) == 0):
		## no good row or column
		return False
	else:
		raise IOError, 'unhandled case for number of good-looking rows and columns for simple validation method'


	## predict a mutation

	predict_mutation_at_position = True

	candidate_dict = get_candidate_dict__simple_validation_method(candidate_row_list, candidate_column_list, position,
								      reference_base, mutation_base, mutation_base_lc, line_dictionary_list,
								      library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
	candidate_list_for_gene.append(candidate_dict)

	if (output_option == "standard_output") and global_verbose:
		raise IOError, 'Error: global_verbose=True and output_option=standard_output is not handled by simple_validation_method'

	return predict_mutation_at_position


def get_candidate_library_list__simple_validation_method(line_dictionary_list, mutation_base, mutation_base_lc):
	candidate_library_list = []
	for line_dictionary in line_dictionary_list:
		library_name = line_dictionary["library"]
		FrNn_for_library = get_Laplace_corrected_FrNn_for_library(line_dictionary, mutation_base, mutation_base_lc)
		if FrNn_for_library > global_simple_validation_method_threshold:
			coverage_for_library = int(line_dictionary["coverage"])
			candidate_tuple = (library_name, FrNn_for_library, coverage_for_library)
			candidate_library_list.append(candidate_tuple)
	return candidate_library_list

def get_candidate_dict__simple_validation_method(candidate_row_list, candidate_column_list, position,
						reference_base, mutation_base, mutation_base_lc, line_dictionary_list,
						library_to_fraction_of_base_reads_that_come_from_global_gene_dict):

	## get information for single row candidate

	if len(candidate_row_list) == 1:
		row_library_tuple = candidate_row_list[0]
		(row_library_name, FrNn_for_row_library, coverage_for_row_library) = row_library_tuple
		row_library_number_string = str(get_number_from_library_name(row_library_name))
		FrNn_string_for_row_library = ('%f' % FrNn_for_row_library)
		coverage_string_for_row_library = ('%d' % coverage_for_row_library)

		row_line_dictionary = get_single_line_dictionary_with_value_for_field(line_dictionary_list, 'library', row_library_name)
		FrHQ_string_for_mutation_base_in_row_library = row_line_dictionary['FrHQ_for_' + mutation_base + mutation_base_lc]
		fraction_of_base_reads_string_for_row_library_that_come_from_global_gene_dict = \
			('%.3f' % float(library_to_fraction_of_base_reads_that_come_from_global_gene_dict[row_library_name]))
	else:
		row_library_name, row_library_number_string, FrNn_string_for_row_library, coverage_string_for_row_library = 'NA', 'NA', 'NA', 'NA'
		FrHQ_string_for_mutation_base_in_row_library = 'NA'
		fraction_of_base_reads_string_for_row_library_that_come_from_global_gene_dict = 'NA'

	## get information for single column candidate

	if len(candidate_column_list) == 1:
		column_library_tuple = candidate_column_list[0]
		(column_library_name, FrNn_for_column_library, coverage_for_column_library) = column_library_tuple
		column_library_number_string = str(get_number_from_library_name(column_library_name))
		FrNn_string_for_column_library = ('%f' % FrNn_for_column_library)
		coverage_string_for_column_library = ('%d' % coverage_for_column_library)

		column_line_dictionary = get_single_line_dictionary_with_value_for_field(line_dictionary_list, 'library', column_library_name)
		FrHQ_string_for_mutation_base_in_column_library = column_line_dictionary['FrHQ_for_' + mutation_base + mutation_base_lc]
		fraction_of_base_reads_string_for_column_library_that_come_from_global_gene_dict = \
			('%.3f' % float(library_to_fraction_of_base_reads_that_come_from_global_gene_dict[column_library_name]))
	else:
		column_library_name, column_library_number_string, FrNn_string_for_column_library, coverage_string_for_column_library = 'NA', 'NA', 'NA', 'NA'
		FrHQ_string_for_mutation_base_in_column_library = 'NA'
		fraction_of_base_reads_string_for_column_library_that_come_from_global_gene_dict = 'NA'

	## get other information

	organism_abbreviation = global_organism_name_to_abbreviation_dict[global_organism]
	gene_name = line_dictionary_list[0]["refseq"]

	if (row_library_name != 'NA') and (column_library_name != 'NA'):
		pool_ID_string = str(get_2D_pool_ID(row_library_name, column_library_name))
	else:
		pool_ID_string = 'NA'


	candidate_dict = {}
	candidate_dict["string"] = (organism_abbreviation + '\t' + gene_name + '\t' + 'NA' + \
			            '\t' + reference_base + str(position) + mutation_base + '\t' + 'NA' + \
			            '\t' + row_library_number_string + '\t' + column_library_number_string + \
				    '\t' + str(pool_ID_string) + '\t' + 'NA' + '\t' + 'NA' + \
			            '\t' + 'NA' + '\t' + 'NA' + '\t' + 'NA' + \
			            '\t' + FrHQ_string_for_mutation_base_in_row_library + '\t' + FrHQ_string_for_mutation_base_in_column_library + \
				    '\t' + coverage_string_for_row_library + '\t' + coverage_string_for_column_library + \
				    '\t' + fraction_of_base_reads_string_for_row_library_that_come_from_global_gene_dict + \
				    '\t' + fraction_of_base_reads_string_for_column_library_that_come_from_global_gene_dict + \
				    '\t' + FrNn_string_for_row_library + '\t' + FrNn_string_for_column_library)

	return candidate_dict




## Takes into account estimates, specific to the current position, of the error rate of calling the specified mutation base,
## although these estimates are not weighted by the coverage levels of individual libraries

##  Uses the same preprocessing as the bayesian method

def evaluate_position_in_gene_for_mutation_to_specific_base__z_scores_method(
								line_dictionary_list, position, mutation_base, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
								candidate_list_for_gene, number_of_candidate_mutations_per_position_to_frequency_dict, 
								estimated_prob_refbase_read_as_not_refbase_dict, 
								prob_of_each_base_change_on_refbase_looks_reasonable_dict,
								output_option):

	bool_evaluate_position_for_mutation_to_specific_base, reference_base, mutation_base_lc, \
		line_dictionary_list, line_dictionary_list_row_libraries, line_dictionary_list_column_libraries, \
		line_dictionary_list_d_libraries, \
		overall_SkewN \
		= perform_checks_and_preprocessing_for_evaluation_of_mutation_to_specific_base_at_position(line_dictionary_list, position, mutation_base)

	if not bool_evaluate_position_for_mutation_to_specific_base:
		return False

	## Camelina, wheat, and arabidopsis should only have G->A and C->T mutations
	if ((global_organism == "Camelina_sativa") or (global_organism == "Triticum_aestivum") or (global_organism == "Triticum_durum") or \
	    (global_organism == "arabidopsis_tetraploid")) and \
	   (not (((reference_base == 'G') and (mutation_base == 'A')) or ((reference_base == 'C') and (mutation_base == 'T')))):
		return False


	## Check for well candidates

	found_candidate_well = False
	predict_mutation_at_position = False

	candidate_list = []

	for row_line_dictionary in line_dictionary_list_row_libraries:
		row_library_name = row_line_dictionary["library"]
	
		for column_line_dictionary in line_dictionary_list_column_libraries:
			column_library_name = column_line_dictionary["library"]


			## Check for a good-looking mutation candidate in the current pool (row, column combination)

			line_dictionary_list_other_libraries = exclude_from_line_dictionary_list(line_dictionary_list, "library", [row_library_name, column_library_name])

			FrNn_list = []
			for line_dictionary in line_dictionary_list_other_libraries:
				FrNn_string = line_dictionary['Fr' + mutation_base + mutation_base_lc]
				## MODIFIED TO ALSO DISCARD NA's
				if (FrNn_string != 'NaN') and (FrNn_string != 'NA'):
					FrNn_list.append(float(FrNn_string))
			FrNn_list.sort()

			FrNn_string_row_library = row_line_dictionary['Fr' + mutation_base + mutation_base_lc]
			FrNn_string_column_library = column_line_dictionary['Fr' + mutation_base + mutation_base_lc]

			## The following checks will cause the mutation candidate to not be detected in any (row, column) library
			## combination, if there are fewer than 4 libraries for which FrNn != 'NaN' and FrNn != 'NA'
			if len(FrNn_list) <= 1:
				# cannot compute the standard deviation of the FrNn of the remaining viable libraries, so break
				continue
			## MODIFIED TO ALSO DISCARD NA's
			if (FrNn_string_row_library in ['NaN', 'NA']) or (FrNn_string_column_library in ['NaN', 'NA']):
				# cannot determine the FrNn for both the row and the column, so break
				continue
			
			min_FrNn_of_row_or_column = min(float(FrNn_string_row_library),
							float(FrNn_string_column_library))

			mean_FrNn = compute_mean_of_list_of_numbers(FrNn_list)

			## Add extra constant - should revise this
			stdev_FrNn = compute_stdev_of_list_of_numbers(FrNn_list)  + .0001

			z_score = float(min_FrNn_of_row_or_column - mean_FrNn) / float(stdev_FrNn)
			

			## Test SkewN

			skewN_looks_good, pval_of_SkewN_for_row_library, pval_of_SkewN_for_column_library, \
				found_accurate_pval_of_skewN_for_row_and_column_libraries, \
				refbase_orientation_bias_for_row, refbase_orientation_bias_for_column, \
				= skewN_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary, reference_base, mutation_base, mutation_base_lc)


			## Check percentage of base reads that are high quality

			FrHQ_looks_good, FrHQ_for_mutation_base_in_row_library, FrHQ_for_mutation_base_in_column_library = \
				percentage_of_base_change_reads_that_are_high_quality_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary,
															reference_base, mutation_base, mutation_base_lc)




			## predict a well mutation if the FrNn for both the selected row and column have a high enough z-score
			## with respect to the distribution of FrNn values for the other libraries

			if global_z_score_threshold == 'NA':
				if skewN_looks_good and FrHQ_looks_good:
					min_thresh_to_not_call_well = z_score

					candidate_dict = get_well_candidate_dict__alternate_method_or_z_scores_method(
															    row_line_dictionary, column_line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					threshold_candidate_tuple = (min_thresh_to_not_call_well, candidate_dict)
					candidate_list.append(threshold_candidate_tuple)
					found_candidate_well = True
			else:
				if (z_score > global_z_score_threshold) and skewN_looks_good and FrHQ_looks_good:
					## good-looking well mutation
					candidate_dict = get_well_candidate_dict__alternate_method_or_z_scores_method(
															    row_line_dictionary, column_line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					candidate_list.append(candidate_dict)
					found_candidate_well = True
	
	
	if global_z_score_threshold == 'NA':
		if len(candidate_list) >= 1:
			predict_mutation_at_position = True

			candidate_list.sort()
			candidate_list.reverse()

			## record best-looking well candidate
			## or all feasible well candidates
			if global_print_all_feasible_candidates:
				range_end=len(candidate_list)
			else:
				range_end=1

			for i in range(0, range_end):
				(min_thresh_to_not_call_well, well_candidate_dict) = candidate_list[i]
				if len(candidate_list) == (i + 1):
					min_thresh_to_not_call_next_best_well = 'NA'
				else:
					min_thresh_to_not_call_next_best_well = candidate_list[i + 1][0]
                                
				## modify the mutation candidates file to output the FrNn threshold "sweet spot" 
				## where the best-looking well mutation is called
				if min_thresh_to_not_call_next_best_well == 'NA':
					well_candidate_dict['string'] += '\t%s\t%f' % (min_thresh_to_not_call_next_best_well, min_thresh_to_not_call_well)
				else:
					well_candidate_dict['string'] += '\t%f\t%f' % (min_thresh_to_not_call_next_best_well, min_thresh_to_not_call_well)
                                
				candidate_list_for_gene.append(well_candidate_dict)
		
	else:
		if len(candidate_list) == 1:
			## found a single good-looking well mutation
			predict_mutation_at_position = True
		
		if global_print_all_feasible_candidates:
			for i in range(0, len(candidate_list)):
				candidate_dict = candidate_list[i]
				candidate_list_for_gene.append(candidate_dict)
		else:
			if len(candidate_list) == 1:
				## found a single good-looking well mutation
				candidate_dict = candidate_list[0]
				candidate_list_for_gene.append(candidate_dict)



	if (not global_well_only) and (not found_candidate_well):

		## Check for orphan candidates
        
		orphan_candidate_list = []
        
		for line_dictionary in line_dictionary_list:
			library_name = line_dictionary["library"]
		
			## Check for a strong signal in the current library
        
			line_dictionary_list_other_libraries = exclude_from_line_dictionary_list(line_dictionary_list, "library", [library_name])
        
			FrNn_list = []
			for line_dictionary in line_dictionary_list_other_libraries:
				FrNn_list.append(float(line_dictionary['Fr' + mutation_base + mutation_base_lc]))
			FrNn_list.sort()
        
			FrNn_of_current_library = float(line_dictionary['Fr' + mutation_base + mutation_base_lc])
        
			mean_FrNn = compute_mean_of_list_of_numbers(FrNn_list)
        
			## Add extra constant - SHOULD REVISE THIS
			stdev_FrNn = compute_stdev_of_list_of_numbers(FrNn_list)  + .0001
        
			z_score = float(FrNn_of_current_library - mean_FrNn) / float(stdev_FrNn)
			
        
			## Test SkewN
        
			skewN_looks_good, pval_of_SkewN_for_library, \
				found_accurate_pval_of_skewN_for_library, \
				refbase_orientation_bias_for_library, \
				deltaQ_N_for_library \
				= skewN_looks_good_for_library(line_dictionary, reference_base, mutation_base, mutation_base_lc)
        
        
			## Check percentage of base reads that are high quality
        
			FrHQ_looks_good, FrHQ_for_mutation_base_in_library = \
				percentage_of_base_change_reads_that_are_high_quality_looks_good_for_library(line_dictionary, reference_base, mutation_base, mutation_base_lc)
        
        
			## test if the FrNn for the current library has a high enough z-score
			## with respect to the distribution of FrNn values for the other libraries
        
			if global_z_score_threshold == 'NA':
				if skewN_looks_good and FrHQ_looks_good:
					min_thresh_to_not_call_library = z_score
        
					candidate_dict = get_single_library_candidate_dict__alternate_method_or_z_scores_method(
															    line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					threshold_candidate_tuple = (min_thresh_to_not_call_library, candidate_dict)
					orphan_candidate_list.append(threshold_candidate_tuple)
			else:
				if (z_score > global_z_score_threshold) and skewN_looks_good and FrHQ_looks_good:
					## good-looking signal in library
					candidate_dict = get_single_library_candidate_dict__alternate_method_or_z_scores_method(
															    line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					orphan_candidate_list.append(candidate_dict)
        
        
		if global_z_score_threshold == 'NA':
			if len(orphan_candidate_list) >= 1:
				orphan_candidate_list.sort()
				orphan_candidate_list.reverse()

				## record best-looking individual library
				## or all feasible individual libraries
				if global_print_all_feasible_candidates:
					range_end=len(orphan_candidate_list)
				else:
					range_end=1

				for i in range(0, range_end):
					(min_thresh_to_not_call_orphan, orphan_candidate_dict) = orphan_candidate_list[i]
					if len(orphan_candidate_list) == (i + 1):
						min_thresh_to_not_call_next_best_orphan = 'NA'
					else:
						min_thresh_to_not_call_next_best_orphan = orphan_candidate_list[i + 1][0]
        
					## modify the mutation candidates file to output the threshold range
					## in which only the best-looking orphan is called
					if min_thresh_to_not_call_next_best_orphan == 'NA':
						orphan_candidate_dict['string'] += '\t%s\t%f' % (min_thresh_to_not_call_next_best_orphan, min_thresh_to_not_call_orphan)
					else:
						orphan_candidate_dict['string'] += '\t%f\t%f' % (min_thresh_to_not_call_next_best_orphan, min_thresh_to_not_call_orphan)
	        
					candidate_list_for_gene.append(orphan_candidate_dict)
			
		else:
			if global_print_all_feasible_candidates:
				for i in range(0, len(orphan_candidate_list)):
					candidate_dict = orphan_candidate_list[i]
					candidate_list_for_gene.append(candidate_dict)
			else:
				if len(orphan_candidate_list) == 1:
					## found a single good-looking orphan
					candidate_dict = orphan_candidate_list[0]
					candidate_list_for_gene.append(candidate_dict)

	if (output_option == "standard_output") and global_verbose:
		raise IOError, 'Error: global_verbose=True and output_option=standard_output is not handled by simple_position_specific_method'


	return predict_mutation_at_position


## Takes into account estimates, specific to the current position, of the error rate of calling the specified mutation base
## Uses the same preprocessing as the bayesian method
def evaluate_position_in_gene_for_mutation_to_specific_base__alternate_method(
								line_dictionary_list, position, mutation_base, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
								candidate_list_for_gene, number_of_candidate_mutations_per_position_to_frequency_dict, 
								estimated_prob_refbase_read_as_not_refbase_dict, 
								prob_of_each_base_change_on_refbase_looks_reasonable_dict,
								output_option):

	bool_evaluate_position_for_mutation_to_specific_base, reference_base, mutation_base_lc, \
		line_dictionary_list, line_dictionary_list_row_libraries, line_dictionary_list_column_libraries, \
		line_dictionary_list_d_libraries, \
		overall_SkewN \
		= perform_checks_and_preprocessing_for_evaluation_of_mutation_to_specific_base_at_position(line_dictionary_list, position, mutation_base)

	if not bool_evaluate_position_for_mutation_to_specific_base:
		return False

	## Camelina, wheat, and arabidopsis should only have G->A and C->T mutations
	if ((global_organism == "Camelina_sativa") or (global_organism == "Triticum_aestivum") or (global_organism == "Triticum_durum") or \
	    (global_organism == "arabidopsis_tetraploid")) and \
	   (not (((reference_base == 'G') and (mutation_base == 'A')) or ((reference_base == 'C') and (mutation_base == 'T')))):
		return False


	found_candidate_well = False
	predict_mutation_at_position = False

	candidate_list = []

	for row_line_dictionary in line_dictionary_list_row_libraries:
		row_library_name = row_line_dictionary["library"]
	
		for column_line_dictionary in line_dictionary_list_column_libraries:
			column_library_name = column_line_dictionary["library"]


			## Check for a good-looking mutation candidate in the current pool (row, column combination)

			line_dictionary_list_other_libraries = exclude_from_line_dictionary_list(line_dictionary_list, "library", [row_library_name, column_library_name])
			estimated_error_rate, valid_result = get_base_change_rate(line_dictionary_list_other_libraries, reference_base, mutation_base, mutation_base_lc)
			if not(valid_result):
				continue


			## Test SkewN

			skewN_looks_good, pval_of_SkewN_for_row_library, pval_of_SkewN_for_column_library, \
				found_accurate_pval_of_skewN_for_row_and_column_libraries, \
				refbase_orientation_bias_for_row, refbase_orientation_bias_for_column, \
				= skewN_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary, reference_base, mutation_base, mutation_base_lc)


			## Check percentage of base reads that are high quality

			FrHQ_looks_good, FrHQ_for_mutation_base_in_row_library, FrHQ_for_mutation_base_in_column_library = \
				percentage_of_base_change_reads_that_are_high_quality_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary,
															reference_base, mutation_base, mutation_base_lc)




			## predict a well mutation if the FrNn for both the selected row and column are high enough above the estimated error rate

			FrNn_for_row_library = get_Laplace_corrected_FrNn_for_library(row_line_dictionary, mutation_base, mutation_base_lc)
			FrNn_for_column_library = get_Laplace_corrected_FrNn_for_library(column_line_dictionary, mutation_base, mutation_base_lc)

			if global_FrNn_threshold == 'NA':
				if skewN_looks_good and FrHQ_looks_good:
					min_thresh_to_not_call_well = min((FrNn_for_row_library - estimated_error_rate),
								         (FrNn_for_column_library - estimated_error_rate))

					candidate_dict = get_well_candidate_dict__alternate_method_or_z_scores_method(
															    row_line_dictionary, column_line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					threshold_candidate_tuple = (min_thresh_to_not_call_well, candidate_dict)
					candidate_list.append(threshold_candidate_tuple)
					found_candidate_well = True
			else:
				adjusted_FrNn_threshold = estimated_error_rate + global_FrNn_threshold
				if (FrNn_for_row_library > adjusted_FrNn_threshold) and (FrNn_for_column_library > adjusted_FrNn_threshold) and skewN_looks_good and FrHQ_looks_good:
					## good-looking well mutation
					candidate_dict = get_well_candidate_dict__alternate_method_or_z_scores_method(
															    row_line_dictionary, column_line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					candidate_list.append(candidate_dict)
					found_candidate_well = True

	if global_FrNn_threshold == 'NA':
		if len(candidate_list) >= 1:
			## record best-looking well mutation
			predict_mutation_at_position = True

			candidate_list.sort()
			candidate_list.reverse()
			(min_thresh_to_not_call_best_well, best_well_candidate_dict) = candidate_list[0]
			if len(candidate_list) == 1:
				min_thresh_to_not_call_second_best_well = 'NA'
			else:
				min_thresh_to_not_call_second_best_well = candidate_list[1][0]

			## modify the mutation candidates file to output the FrNn threshold "sweet spot" 
			## where the best-looking well mutation is called
			if min_thresh_to_not_call_second_best_well == 'NA':
				best_well_candidate_dict['string'] += '\t%s\t%f' % (min_thresh_to_not_call_second_best_well, min_thresh_to_not_call_best_well)
			else:
				best_well_candidate_dict['string'] += '\t%f\t%f' % (min_thresh_to_not_call_second_best_well, min_thresh_to_not_call_best_well)

			candidate_list_for_gene.append(best_well_candidate_dict)
		
	else:
		if len(candidate_list) == 1:
			## found a single good-looking well mutation
			predict_mutation_at_position = True
        
			candidate_dict = candidate_list[0]
			candidate_list_for_gene.append(candidate_dict)

	if (output_option == "standard_output") and global_verbose:
		raise IOError, 'Error: global_verbose=True and output_option=standard_output is not handled by simple_position_specific_method'

	return predict_mutation_at_position


####################################################################
####### Modified POisson Method to take into account	      ######
####### all good-looking wells for a given mutation candidate ######
####################################################################

## Takes into account estimates, specific to the current position, of the error rate of calling the specified mutation base
## Uses the same preprocessing as the bayesian method
def evaluate_position_in_gene_for_mutation_to_specific_base__poisson_method(
								line_dictionary_list, position, mutation_base, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
								candidate_list_for_gene, number_of_candidate_mutations_per_position_to_frequency_dict, 
								estimated_prob_refbase_read_as_not_refbase_dict, 
								prob_of_each_base_change_on_refbase_looks_reasonable_dict,
								output_option):

	bool_evaluate_position_for_mutation_to_specific_base, reference_base, mutation_base_lc, \
		line_dictionary_list, line_dictionary_list_row_libraries, line_dictionary_list_column_libraries, \
		line_dictionary_list_d_libraries, \
		overall_SkewN \
		= perform_checks_and_preprocessing_for_evaluation_of_mutation_to_specific_base_at_position(line_dictionary_list, position, mutation_base)

	if not bool_evaluate_position_for_mutation_to_specific_base:
		return False

	## Camelina, wheat, and arabidopsis should only have G->A and C->T mutations
	if ((global_organism == "Camelina_sativa") or (global_organism == "Triticum_aestivum") or (global_organism == "Triticum_durum") or \
	    (global_organism == "arabidopsis_tetraploid")) and \
	   (not (((reference_base == 'G') and (mutation_base == 'A')) or ((reference_base == 'C') and (mutation_base == 'T')))):
		return False


	found_candidate_well = False
	predict_mutation_at_position = False

	candidate_list = []

	for row_line_dictionary in line_dictionary_list_row_libraries:
		row_library_name = row_line_dictionary["library"]
	
		for column_line_dictionary in line_dictionary_list_column_libraries:
			column_library_name = column_line_dictionary["library"]


			## Check for a good-looking mutation candidate in the current pool (row, column combination)

			#line_dictionary_list_other_libraries = exclude_from_line_dictionary_list(line_dictionary_list, "library", [row_library_name, column_library_name])

			## USE ALL LIBRARIES TO ESTIMATE ERROR RATE, AS IN 454 TILLING PAPER
			estimated_error_rate, valid_result = get_base_change_rate(line_dictionary_list, reference_base, mutation_base, mutation_base_lc)
			if not(valid_result):
				continue


			## Test SkewN

			skewN_looks_good, pval_of_SkewN_for_row_library, pval_of_SkewN_for_column_library, \
				found_accurate_pval_of_skewN_for_row_and_column_libraries, \
				refbase_orientation_bias_for_row, refbase_orientation_bias_for_column, \
				= skewN_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary, reference_base, mutation_base, mutation_base_lc)


			## Check percentage of base reads that are high quality

			FrHQ_looks_good, FrHQ_for_mutation_base_in_row_library, FrHQ_for_mutation_base_in_column_library = \
				percentage_of_base_change_reads_that_are_high_quality_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary,
															reference_base, mutation_base, mutation_base_lc)




			## predict a well mutation if the base change to coverage ratio for both the selected row and column is high enough above the estimated error rate,
			## using the Poisson distribution

			pvalue_for_row_library = get_poisson_pvalue_for_library(estimated_error_rate, row_line_dictionary, reference_base, mutation_base, mutation_base_lc)
			pvalue_for_column_library = get_poisson_pvalue_for_library(estimated_error_rate, column_line_dictionary, reference_base, mutation_base, mutation_base_lc)

			if global_poisson_pvalue_threshold == 'NA':
				if skewN_looks_good and FrHQ_looks_good:
					max_thresh_to_not_call_well = max(pvalue_for_row_library, pvalue_for_column_library)

					candidate_dict = get_well_candidate_dict__alternate_method_or_z_scores_method(
															    row_line_dictionary, column_line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					threshold_candidate_tuple = (max_thresh_to_not_call_well, candidate_dict)
					candidate_list.append(threshold_candidate_tuple)
					found_candidate_well = True
			else:
				if (pvalue_for_row_library < global_poisson_pvalue_threshold) and (pvalue_for_column_library < global_poisson_pvalue_threshold) and skewN_looks_good and FrHQ_looks_good:
					## good-looking well mutation
					candidate_dict = get_well_candidate_dict__alternate_method_or_z_scores_method(
															    row_line_dictionary, column_line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					candidate_list.append(candidate_dict)
					found_candidate_well = True

	#### Call ALL good-looking well occurrences ####
	if global_poisson_pvalue_threshold == 'NA':
		if len(candidate_list) >= 1:
			predict_mutation_at_position = True

			candidate_list.sort()
			for i in range(0, len(candidate_list)):
				(max_thresh_to_not_call_well, well_candidate_dict) = candidate_list[i]
        
				## modify the mutation candidates file to output the deciding threshold of the mutation candidate at the current well
				well_candidate_dict['string'] += '\tNA\t%.20e' % max_thresh_to_not_call_well
        
				candidate_list_for_gene.append(well_candidate_dict)
	else:
		for i in range(0, len(candidate_list)):
			## found at least one well that looks good for the candidate mutation
			predict_mutation_at_position = True
        
			candidate_dict = candidate_list[i]
			candidate_list_for_gene.append(candidate_dict)

	if (output_option == "standard_output") and global_verbose:
		raise IOError, 'Error: global_verbose=True and output_option=standard_output is not handled by \'poisson\' mutation detection method'

	return predict_mutation_at_position

####################################################################
####### Modified POisson Method to take into account	      ######
####### all good-looking wells for a given mutation candidate ######
####################################################################
#######               					      ######
####### NOTE: This is the version that should be used to detect ####
#######     natural variation.  We also use this version to    #####
#######     detect mutations in organisms where both canonical #####
#######     and non-canonical mutations are induced by EMS     #####
#######     treatment.					      ######
#######               					      ######
####### IMPORTANT: This method should only be used with       ######
#######            parameter mutation_base == 'n'.            ######
#######               					      ######
####################################################################

##################################################################################################
###### VERY IMPORTANT: For future use, note warning comments written in the function below:
##################################################################################################

## Takes into account estimates, specific to the current position, of the error rate of calling the specified mutation base
## Uses the same preprocessing as the bayesian method
def evaluate_position_in_gene_for_mutation_to_specific_base__poisson_method_2(
								line_dictionary_list, position, mutation_base, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
								candidate_list_for_gene, number_of_candidate_mutations_per_position_to_frequency_dict, 
								estimated_prob_refbase_read_as_not_refbase_dict, 
								prob_of_each_base_change_on_refbase_looks_reasonable_dict,
								output_option):

	bool_evaluate_position_for_mutation_to_specific_base, reference_base, mutation_base_lc, \
		line_dictionary_list, line_dictionary_list_row_libraries, line_dictionary_list_column_libraries, \
		line_dictionary_list_d_libraries, \
		overall_SkewN \
		= perform_checks_and_preprocessing_for_evaluation_of_mutation_to_specific_base_at_position(line_dictionary_list, position, mutation_base)

	if not bool_evaluate_position_for_mutation_to_specific_base:
		return False

	## Camelina, wheat, and arabidopsis should only have G->A and C->T mutations
	if ((global_organism == "Camelina_sativa") or (global_organism == "Triticum_aestivum") or (global_organism == "Triticum_durum") or \
	    (global_organism == "arabidopsis_tetraploid")) and \
	   (not (((reference_base == 'G') and (mutation_base == 'A')) or ((reference_base == 'C') and (mutation_base == 'T')))):
		return False


	found_candidate_well = False
	predict_mutation_at_position = False

	candidate_list = []

	for row_line_dictionary in line_dictionary_list_row_libraries:
		row_library_name = row_line_dictionary["library"]
	
		for column_line_dictionary in line_dictionary_list_column_libraries:
			column_library_name = column_line_dictionary["library"]


			## Check for a good-looking mutation candidate in the current pool (row, column combination)

			#line_dictionary_list_other_libraries = exclude_from_line_dictionary_list(line_dictionary_list, "library", [row_library_name, column_library_name])

			## USE ALL LIBRARIES TO ESTIMATE ERROR RATE, AS IN 454 TILLING PAPER
			estimated_error_rate, valid_result = get_base_change_rate(line_dictionary_list, reference_base, mutation_base, mutation_base_lc)
			if not(valid_result):
				continue


			## Test SkewN

			skewN_looks_good, pval_of_SkewN_for_row_library, pval_of_SkewN_for_column_library, \
				found_accurate_pval_of_skewN_for_row_and_column_libraries, \
				refbase_orientation_bias_for_row, refbase_orientation_bias_for_column, \
				= skewN_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary, reference_base, mutation_base, mutation_base_lc)


			## Check percentage of base reads that are high quality

			######## NOTE: this is not set up correctly for Poisson method version 2 (natural variation), but we are not considering FrHQ, so it does not matter for our tests

			FrHQ_looks_good, FrHQ_for_mutation_base_in_row_library, FrHQ_for_mutation_base_in_column_library = \
				percentage_of_base_change_reads_that_are_high_quality_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary,
															reference_base, mutation_base, mutation_base_lc)




			## predict a well mutation if the base change to coverage ratio for both the selected row and column is high enough above the estimated error rate,
			## using the Poisson distribution

			pvalue_for_row_library = get_poisson_pvalue_for_library(estimated_error_rate, row_line_dictionary, reference_base, mutation_base, mutation_base_lc)
			pvalue_for_column_library = get_poisson_pvalue_for_library(estimated_error_rate, column_line_dictionary, reference_base, mutation_base, mutation_base_lc)

			if global_poisson_pvalue_threshold == 'NA':
				if skewN_looks_good and FrHQ_looks_good:
					max_thresh_to_not_call_well = max(pvalue_for_row_library, pvalue_for_column_library)

					candidate_dict = get_well_candidate_dict__alternate_method_or_z_scores_method(
															    row_line_dictionary, column_line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					threshold_candidate_tuple = (max_thresh_to_not_call_well, candidate_dict)
					candidate_list.append(threshold_candidate_tuple)
					found_candidate_well = True
			else:
				if (pvalue_for_row_library < global_poisson_pvalue_threshold) and (pvalue_for_column_library < global_poisson_pvalue_threshold) and skewN_looks_good and FrHQ_looks_good:
					## good-looking well mutation
					candidate_dict = get_well_candidate_dict__alternate_method_or_z_scores_method(
															    row_line_dictionary, column_line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					candidate_list.append(candidate_dict)
					found_candidate_well = True

	#### Call ALL good-looking well occurrences ####
	if global_poisson_pvalue_threshold == 'NA':
		if len(candidate_list) >= 1:
			predict_mutation_at_position = True

			candidate_list.sort()
			for i in range(0, len(candidate_list)):
				(max_thresh_to_not_call_well, well_candidate_dict) = candidate_list[i]
        
				## modify the mutation candidates file to output the deciding threshold of the mutation candidate at the current well
				well_candidate_dict['string'] += '\tNA\t%.20e' % max_thresh_to_not_call_well
        
				candidate_list_for_gene.append(well_candidate_dict)
	else:
		for i in range(0, len(candidate_list)):
			## found at least one well that looks good for the candidate mutation
			predict_mutation_at_position = True
        
			candidate_dict = candidate_list[i]
			candidate_list_for_gene.append(candidate_dict)

	if (output_option == "standard_output") and global_verbose:
		raise IOError, 'Error: global_verbose=True and output_option=standard_output is not handled by \'poisson\' mutation detection method'

	return predict_mutation_at_position

def get_poisson_pvalue_for_library(estimated_error_rate, line_dictionary, reference_base, mutation_base, mutation_base_lc):
	if mutation_base == 'N':
		num_base_reads_for_newbase = 0
		base_list = ['A', 'C', 'G', 'T']
		base_list.remove(reference_base)
		for base in base_list:
			base_lc = base.lower()
			num_base_reads_for_newbase += int(line_dictionary[base]) + int(line_dictionary[base_lc])
	else:
		num_base_reads_for_newbase = int(line_dictionary[mutation_base]) + int(line_dictionary[mutation_base_lc])
	total_num_base_reads = int(line_dictionary['coverage'])

	x = num_base_reads_for_newbase

	p = estimated_error_rate
	n = total_num_base_reads
	lambda_var = p * n
	
	pvalue = 1.0
	current_prob = math.pow(math.e, -lambda_var)
	for i in range(0, x):
		## get poisson probability
		if i != 0:
			current_prob *= lambda_var
			current_prob /= float(i)
		pvalue -= current_prob
	return current_prob 

## Takes into account estimates, specific to the current position, of the error rate of calling the specified mutation base
## Uses the same preprocessing as the bayesian method
def evaluate_position_in_gene_for_mutation_to_specific_base__outlier_method(
								line_dictionary_list, position, mutation_base, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
								candidate_list_for_gene, number_of_candidate_mutations_per_position_to_frequency_dict, 
								estimated_prob_refbase_read_as_not_refbase_dict, 
								prob_of_each_base_change_on_refbase_looks_reasonable_dict,
								output_option):

	bool_evaluate_position_for_mutation_to_specific_base, reference_base, mutation_base_lc, \
		line_dictionary_list, line_dictionary_list_row_libraries, line_dictionary_list_column_libraries, \
		line_dictionary_list_d_libraries, \
		overall_SkewN \
		= perform_checks_and_preprocessing_for_evaluation_of_mutation_to_specific_base_at_position(line_dictionary_list, position, mutation_base)

	if not bool_evaluate_position_for_mutation_to_specific_base:
		return False

	## Camelina, wheat, and arabidopsis should only have G->A and C->T mutations
	if ((global_organism == "Camelina_sativa") or (global_organism == "Triticum_aestivum") or (global_organism == "Triticum_durum") or \
	    (global_organism == "arabidopsis_tetraploid")) and \
	   (not (((reference_base == 'G') and (mutation_base == 'A')) or ((reference_base == 'C') and (mutation_base == 'T')))):
		return False


	found_candidate_well = False
	predict_mutation_at_position = False

	candidate_list = []

	for row_line_dictionary in line_dictionary_list_row_libraries:
		row_library_name = row_line_dictionary["library"]
	
		for column_line_dictionary in line_dictionary_list_column_libraries:
			column_library_name = column_line_dictionary["library"]


			## Check for a good-looking mutation candidate in the current pool (row, column combination)

			line_dictionary_list_other_libraries = exclude_from_line_dictionary_list(line_dictionary_list, "library", [row_library_name, column_library_name])
			estimated_error_rate, valid_result = get_base_change_rate(line_dictionary_list_other_libraries, reference_base, mutation_base, mutation_base_lc)
			if not(valid_result):
				continue


			## Test SkewN

			skewN_looks_good, pval_of_SkewN_for_row_library, pval_of_SkewN_for_column_library, \
				found_accurate_pval_of_skewN_for_row_and_column_libraries, \
				refbase_orientation_bias_for_row, refbase_orientation_bias_for_column, \
				= skewN_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary, reference_base, mutation_base, mutation_base_lc)


			## Check percentage of base reads that are high quality

			FrHQ_looks_good, FrHQ_for_mutation_base_in_row_library, FrHQ_for_mutation_base_in_column_library = \
				percentage_of_base_change_reads_that_are_high_quality_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary,
															reference_base, mutation_base, mutation_base_lc)




			## predict a well mutation if both the row and column are outliers with a high enough pvalue, using a binomial distribution

			pvalue_of_row_library = get_pval_of_base_change_to_coverage_ratio_given_expected_base_change_rate(row_line_dictionary, estimated_error_rate, mutation_base, mutation_base.lower())
			pvalue_of_column_library = get_pval_of_base_change_to_coverage_ratio_given_expected_base_change_rate(row_line_dictionary, estimated_error_rate, mutation_base, mutation_base.lower())

			pvalue_of_well = max(pvalue_of_row_library, pvalue_of_column_library)

			#estimated_error_rate
			if global_outlier_pvalue_threshold == 'NA':
				if skewN_looks_good and FrHQ_looks_good:
					candidate_dict = get_well_candidate_dict__alternate_method_or_z_scores_method(
															    row_line_dictionary, column_line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					threshold_candidate_tuple = (pvalue_of_well, candidate_dict)
					candidate_list.append(threshold_candidate_tuple)
					found_candidate_well = True
			else:
				if (pvalue_of_well < global_outlier_pvalue_threshold) and skewN_looks_good and FrHQ_looks_good:
					## good-looking well mutation
					candidate_dict = get_well_candidate_dict__alternate_method_or_z_scores_method(
															    row_line_dictionary, column_line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					candidate_list.append(candidate_dict)
					found_candidate_well = True

	if global_outlier_pvalue_threshold == 'NA':
		if len(candidate_list) >= 1:
			## record best-looking well mutation
			predict_mutation_at_position = True

			candidate_list.sort()
			(pvalue_of_best_well, best_well_candidate_dict) = candidate_list[0]
			if len(candidate_list) == 1:
				pvalue_of_second_best_well = 'NA'
			else:
				pvalue_of_second_best_well = candidate_list[1][0]

			## modify the mutation candidates file to output the FrNn threshold "sweet spot" 
			## where the best-looking well mutation is called
			if pvalue_of_second_best_well == 'NA':
				best_well_candidate_dict['string'] += '\t%s\t%f' % (pvalue_of_second_best_well, pvalue_of_best_well)
			else:
				best_well_candidate_dict['string'] += '\t%f\t%f' % (pvalue_of_second_best_well, pvalue_of_best_well)

			candidate_list_for_gene.append(best_well_candidate_dict)
		
	else:
		if len(candidate_list) == 1:
			## found a single good-looking well mutation
			predict_mutation_at_position = True
        
			candidate_dict = candidate_list[0]
			candidate_list_for_gene.append(candidate_dict)

	#if (not global_well_only) and (not found_candidate_well):
	if (not global_well_only):

		## Check for orphan candidates
        
		orphan_candidate_list = []
        
		for line_dictionary in line_dictionary_list:
			library_name = line_dictionary["library"]
		
			## Check for a strong signal in the current library
        
			line_dictionary_list_other_libraries = exclude_from_line_dictionary_list(line_dictionary_list, "library", [library_name])
        
			estimated_error_rate, valid_result = get_base_change_rate(line_dictionary_list_other_libraries, reference_base, mutation_base, mutation_base_lc)
			if not(valid_result):
				continue

			pvalue_of_library = get_pval_of_base_change_to_coverage_ratio_given_expected_base_change_rate(line_dictionary, estimated_error_rate, mutation_base, mutation_base.lower())
        
			## Test SkewN
        
			skewN_looks_good, pval_of_SkewN_for_library, \
				found_accurate_pval_of_skewN_for_library, \
				refbase_orientation_bias_for_library, \
				deltaQ_N_for_library \
				= skewN_looks_good_for_library(line_dictionary, reference_base, mutation_base, mutation_base_lc)
        
        
			## Check percentage of base reads that are high quality
        
			FrHQ_looks_good, FrHQ_for_mutation_base_in_library = \
				percentage_of_base_change_reads_that_are_high_quality_looks_good_for_library(line_dictionary, reference_base, mutation_base, mutation_base_lc)
        
        
			## test if the current library has a low enough pvalue
        
			if global_outlier_pvalue_threshold == 'NA':
				if skewN_looks_good and FrHQ_looks_good:
					candidate_dict = get_single_library_candidate_dict__alternate_method_or_z_scores_method(
															    line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					threshold_candidate_tuple = (pvalue_of_library, candidate_dict)
					orphan_candidate_list.append(threshold_candidate_tuple)
			else:
				if (pvalue_of_library <  global_outlier_pvalue_threshold) and skewN_looks_good and FrHQ_looks_good:
					## good-looking signal in library
					candidate_dict = get_single_library_candidate_dict__alternate_method_or_z_scores_method(
															    line_dictionary,
															    position, 
															    mutation_base, mutation_base_lc,
															    library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
					orphan_candidate_list.append(candidate_dict)
        
        
		if global_outlier_pvalue_threshold == 'NA':
			if len(orphan_candidate_list) >= 1:
				orphan_candidate_list.sort()

				## record best-looking individual library
				## or all feasible individual libraries
				if global_print_all_feasible_candidates:
					range_end=len(orphan_candidate_list)
				else:
					range_end=1

				for i in range(0, range_end):
					(max_thresh_to_not_call_orphan, orphan_candidate_dict) = orphan_candidate_list[i]
					if len(orphan_candidate_list) == (i + 1):
						max_thresh_to_not_call_next_best_orphan = 'NA'
					else:
						max_thresh_to_not_call_next_best_orphan = orphan_candidate_list[i + 1][0]
        
					## modify the mutation candidates file to output the threshold range
					## in which only the best-looking orphan is called
					if max_thresh_to_not_call_next_best_orphan == 'NA':
						orphan_candidate_dict['string'] += '\t%s\t%f' % (max_thresh_to_not_call_next_best_orphan, max_thresh_to_not_call_orphan)
					else:
						orphan_candidate_dict['string'] += '\t%f\t%f' % (max_thresh_to_not_call_next_best_orphan, max_thresh_to_not_call_orphan)
	        
					candidate_list_for_gene.append(orphan_candidate_dict)
			
		else:
			if global_print_all_feasible_candidates:
				for i in range(0, len(orphan_candidate_list)):
					candidate_dict = orphan_candidate_list[i]
					candidate_list_for_gene.append(candidate_dict)
			else:
				if len(orphan_candidate_list) == 1:
					## found a single good-looking orphan
					candidate_dict = orphan_candidate_list[0]
					candidate_list_for_gene.append(candidate_dict)



	if (output_option == "standard_output") and global_verbose:
		raise IOError, 'Error: global_verbose=True and output_option=standard_output is not handled by simple_position_specific_method'

	return predict_mutation_at_position





def get_well_candidate_dict__alternate_method_or_z_scores_method(row_line_dictionary, column_line_dictionary, position, 
						mutation_base, mutation_base_lc, library_to_fraction_of_base_reads_that_come_from_global_gene_dict):

	if (row_line_dictionary == 'NA') or (column_line_dictionary == 'NA'):
		raise IOError, 'function \'get_well_candidate_dict__alternate_method_or_z_scores_method\' should only be called for well mutation candidates'

	## get information for row and column library

	organism_abbreviation = global_organism_name_to_abbreviation_dict[global_organism]
	gene_name = row_line_dictionary["refseq"]
	reference_base = row_line_dictionary["refbase"]

	row_library_name = row_line_dictionary["library"]
	column_library_name = column_line_dictionary["library"]

	pool_ID_string = str(get_2D_pool_ID(row_library_name, column_library_name))

	row_library_number_string = str(get_number_from_library_name(row_library_name))
	column_library_number_string = str(get_number_from_library_name(column_library_name))

	if mutation_base == 'N':
		FrHQ_string_for_mutation_base_in_row_library = '--'
		FrHQ_string_for_mutation_base_in_column_library = '--'
	else:
		FrHQ_string_for_mutation_base_in_row_library = row_line_dictionary['FrHQ_for_' + mutation_base + mutation_base_lc]
		FrHQ_string_for_mutation_base_in_column_library = column_line_dictionary['FrHQ_for_' + mutation_base + mutation_base_lc]

        coverage_string_for_row_library = row_line_dictionary["coverage"]
        coverage_string_for_column_library = column_line_dictionary["coverage"]

	fraction_of_base_reads_string_for_row_library_that_come_from_global_gene_dict = \
			('%.3f' % float(library_to_fraction_of_base_reads_that_come_from_global_gene_dict[row_library_name]))
	fraction_of_base_reads_string_for_column_library_that_come_from_global_gene_dict = \
			('%.3f' % float(library_to_fraction_of_base_reads_that_come_from_global_gene_dict[column_library_name]))

	if mutation_base == 'N':
		# NOTE: We can compute this, but I have not yet added the code in:
	        FrNn_string_for_row_library = '--'
	        FrNn_string_for_column_library = '--'
	else:
	        FrNn_string_for_row_library = row_line_dictionary["Fr" + mutation_base + mutation_base_lc]
	        FrNn_string_for_column_library = column_line_dictionary["Fr" + mutation_base + mutation_base_lc]


	candidate_dict = {}
	candidate_dict["string"] = (organism_abbreviation + '\t' + gene_name + '\t' + 'NA' + \
			            '\t' + reference_base + str(position) + mutation_base + '\t' + 'NA' + \
			            '\t' + row_library_number_string + '\t' + column_library_number_string + \
				    '\t' + str(pool_ID_string) + '\t' + 'NA' + '\t' + 'NA' + \
			            '\t' + 'NA' + '\t' + 'NA' + '\t' + 'NA' + \
			            '\t' + FrHQ_string_for_mutation_base_in_row_library + '\t' + FrHQ_string_for_mutation_base_in_column_library + \
				    '\t' + coverage_string_for_row_library + '\t' + coverage_string_for_column_library + \
				    '\t' + fraction_of_base_reads_string_for_row_library_that_come_from_global_gene_dict + \
				    '\t' + fraction_of_base_reads_string_for_column_library_that_come_from_global_gene_dict + \
				    '\t' + FrNn_string_for_row_library + '\t' + FrNn_string_for_column_library)

	return candidate_dict

def get_single_library_candidate_dict__alternate_method_or_z_scores_method(line_dictionary, position,
						mutation_base, mutation_base_lc, library_to_fraction_of_base_reads_that_come_from_global_gene_dict):

	## get information for library

	organism_abbreviation = global_organism_name_to_abbreviation_dict[global_organism]
	gene_name = line_dictionary["refseq"]
	reference_base = line_dictionary["refbase"]

	library_name = line_dictionary["library"]


	pool_ID_string = 'NA'

	library_number_string = str(get_number_from_library_name(library_name))

	FrHQ_string_for_mutation_base_in_library = line_dictionary['FrHQ_for_' + mutation_base + mutation_base_lc]

        coverage_string_for_library = line_dictionary["coverage"]

	fraction_of_base_reads_string_for_library_that_come_from_global_gene_dict = \
			('%.3f' % float(library_to_fraction_of_base_reads_that_come_from_global_gene_dict[library_name]))

        FrNn_string_for_library = line_dictionary["Fr" + mutation_base + mutation_base_lc]

	if library_name in global_row_library_list:
		## row library
		row_and_column_library_number_string = '%s\tNA' % library_number_string
		FrHQ_string_for_mutation_base_in_row_and_column_libraries = '%s\tNA' % FrHQ_string_for_mutation_base_in_library
		coverage_string_for_row_and_column_libraries = '%s\tNA' % coverage_string_for_library
		fraction_of_base_reads_string_for_row_and_column_libraries_that_come_from_global_gene_dict = '%s\tNA' % fraction_of_base_reads_string_for_library_that_come_from_global_gene_dict
		FrNn_string_for_row_and_column_libraries = '%s\tNA' % FrNn_string_for_library
	elif library_name in global_column_library_list:
		## column library
		row_and_column_library_number_string = 'NA\t%s' % library_number_string
		FrHQ_string_for_mutation_base_in_row_and_column_libraries = 'NA\t%s' % FrHQ_string_for_mutation_base_in_library
		coverage_string_for_row_and_column_libraries = 'NA\t%s' % coverage_string_for_library
		fraction_of_base_reads_string_for_row_and_column_libraries_that_come_from_global_gene_dict = 'NA\t%s' % fraction_of_base_reads_string_for_library_that_come_from_global_gene_dict
		FrNn_string_for_row_and_column_libraries = 'NA\t%s' % FrNn_string_for_library
	else:
		raise IOError, 'Did not recognize library \'%s\' as a row or a column library' % library_name

	candidate_dict = {}
	candidate_dict["string"] = (organism_abbreviation + '\t' + gene_name + '\t' + 'NA' + \
			            '\t' + reference_base + str(position) + mutation_base + '\t' + 'NA' + \
			            '\t' + row_and_column_library_number_string + \
				    '\t' + str(pool_ID_string) + '\t' + 'NA' + '\t' + 'NA' + \
			            '\t' + 'NA' + '\t' + 'NA' + '\t' + 'NA' + \
			            '\t' + FrHQ_string_for_mutation_base_in_row_and_column_libraries + \
				    '\t' + coverage_string_for_row_and_column_libraries + \
				    '\t' + fraction_of_base_reads_string_for_row_and_column_libraries_that_come_from_global_gene_dict + \
				    '\t' + FrNn_string_for_row_and_column_libraries)

	return candidate_dict



def get_Laplace_corrected_FrNn_for_library(line_dictionary, mutation_base, mutation_base_lc):
	total_base_reads_for_mutation_base_at_position = int(line_dictionary[mutation_base]) + int(line_dictionary[mutation_base_lc])
	total_base_reads_at_position = int(line_dictionary['coverage'])

	Laplace_corrected_FrNn = float(total_base_reads_for_mutation_base_at_position + 1) /  float(total_base_reads_at_position + 2)

	return Laplace_corrected_FrNn


def evaluate_position_in_gene_for_mutation__bayesian_method(line_dictionary_list, position, library_to_fraction_of_base_reads_that_come_from_global_gene_dict,
								candidate_list_for_gene, number_of_candidate_mutations_per_position_to_frequency_dict, 
								estimated_prob_refbase_read_as_not_refbase_dict, 
								prob_of_each_base_change_on_refbase_looks_reasonable_dict,
								output_option):

	## perform pre-processing to determine possible new bases

	reference_base, offset_position = 'NA', 'NA'

	base_list = ['A', 'C', 'G', 'T']
	possible_new_base_list = [base for base in base_list]

	possible_new_base_list_copy = [base for base in possible_new_base_list]
	for new_base in possible_new_base_list_copy:
		bool_evaluate_position_for_mutation_to_specific_base, reference_base, new_base_lc, \
			line_dictionary_list_inner_loop, line_dictionary_list_row_libraries_inner_loop, line_dictionary_list_column_libraries_inner_loop, \
			line_dictionary_list_d_libraries_inner_loop, \
			overall_SkewN_for_new_base \
			= perform_checks_and_preprocessing_for_evaluation_of_mutation_to_specific_base_at_position(line_dictionary_list, position, new_base)
		if not bool_evaluate_position_for_mutation_to_specific_base:
			## this condition should remove the reference base from \'possible_new_base_list\':
			possible_new_base_list.remove(new_base)
	if len(possible_new_base_list) == 0:
		return False

	line_dictionary_list, line_dictionary_list_row_libraries, line_dictionary_list_column_libraries, line_dictionary_list_d_libraries = \
			line_dictionary_list_inner_loop, line_dictionary_list_row_libraries_inner_loop, line_dictionary_list_column_libraries_inner_loop, \
			line_dictionary_list_d_libraries_inner_loop


	predict_mutation_at_position = False
	candidate_list = []

	## Check for a pool with a statistically-significant chance of containing a mutant individual
	one_minus_probability_of_best_candidate_mutation, chosen_new_base, chosen_row, chosen_column, chosen_d_library, one_minus_probability_of_next_best_candidate_mutation, \
		probability_no_mutant_in_any_well, bool_found_at_least_one_candidate = \
			get_best_mutant_genotype(reference_base, possible_new_base_list, line_dictionary_list, position, number_of_candidate_mutations_per_position_to_frequency_dict)

	if global_potential_mutation_pvalue_threshold != 'NA':
		raise IOError, 'Expected global_potential_mutation_pvalue_threshold == \'NA\' for BA-THRESHER'

	## Check if we found any mutation candidates (at any probability)
	if not bool_found_at_least_one_candidate:
		return False

	mutation_factor = one_minus_probability_of_best_candidate_mutation
	row_line_dictionary = get_line_dictionary_for_library(line_dictionary_list, chosen_row)
	column_line_dictionary = get_line_dictionary_for_library(line_dictionary_list, chosen_column)
	if global_use_3D_pooling:
		d_library_line_dictionary = get_line_dictionary_for_library(line_dictionary_list, chosen_d_library)
	else:
		d_library_line_dictionary = 'NA'

	## Record the selected mutation candidate
	predict_mutation_at_position = True
	well_candidate_dict = get_well_candidate_dict__bayesian_method(candidate_list, mutation_factor, position, reference_base, chosen_new_base,
						 row_line_dictionary, column_line_dictionary, d_library_line_dictionary,
						 chosen_row, chosen_column, chosen_d_library,
						 library_to_fraction_of_base_reads_that_come_from_global_gene_dict)
	if one_minus_probability_of_next_best_candidate_mutation == 'NA':
		well_candidate_dict['string'] += '\tNA\t%.20e' % mutation_factor
	else:
		well_candidate_dict['string'] += '\t%.20e\t%.20e' % (one_minus_probability_of_next_best_candidate_mutation, mutation_factor)
		
	candidate_list_for_gene.append(well_candidate_dict)
	
	return predict_mutation_at_position


## Evaluate mutation sub-functions:

def perform_checks_and_preprocessing_for_evaluation_of_mutation_to_specific_base_at_position(line_dictionary_list, position, mutation_base):

	bool_evaluate_position_for_mutation_to_specific_base = True

	## Check for simple errors

	if len(line_dictionary_list) == 0:
		raise IOError, 'Error: Did not find any lines for current position'

	reference_base = (line_dictionary_list[0])["refbase"]
	if (reference_base == 'N') or (reference_base == mutation_base):
		bool_evaluate_position_for_mutation_to_specific_base = False	


	line_dictionary_list, line_dictionary_list_row_libraries, line_dictionary_list_column_libraries, line_dictionary_list_d_libraries = \
		get_and_check_line_dictionary_lists_for_row_column_and_d_libraries(line_dictionary_list, position)

	## Break out of the preprocessing if we have already seen an error

	if not bool_evaluate_position_for_mutation_to_specific_base:
		mutation_base_lc, overall_SkewN = 'not_computed', 'not_computed'
	else:
		# NOTE:
		# 	This is an ad-hoc method to check that the computed error rates will be reasonably accurate
		total_coverage = get_sum_of_coverage(line_dictionary_list)
		if not global_use_3D_pooling:
			total_num_libraries = global_num_pools_per_row_library + global_num_pools_per_column_library
		else:
			total_num_libraries = global_num_pools_per_row_library + global_num_pools_per_column_library + global_num_pools_per_d_library
        
		if (global_use_3D_pooling and (total_num_libraries < 4)) or ((not global_use_3D_pooling) and (total_num_libraries < 3)):
			raise IOError, 'Error: the total number of libraries is too small, so it is impossible to compute the error rate for a given base change at a given position\n' + \
					'given a mutant condition'
        
		## number of libraries to exclude when computing the error rate, given a condition with a mutant in a single well
		if global_use_3D_pooling:
			num_to_exclude = 3
		else:
			num_to_exclude = 2
		if (total_coverage * (total_num_libraries - num_to_exclude) / float(total_num_libraries)) < 10000:
			bool_evaluate_position_for_mutation_to_specific_base = False
        
        
		## get overall skewN
        
		mutation_base_lc = mutation_base.lower()
		overall_SkewN = get_overall_SkewN(line_dictionary_list, reference_base, mutation_base, mutation_base_lc)
	
	return bool_evaluate_position_for_mutation_to_specific_base, reference_base, mutation_base_lc, \
		line_dictionary_list, line_dictionary_list_row_libraries, line_dictionary_list_column_libraries, \
		line_dictionary_list_d_libraries, \
		overall_SkewN


def get_and_check_line_dictionary_lists_for_row_column_and_d_libraries(line_dictionary_list, position):
	# remove any extra libraries
	if global_use_3D_pooling:
		list_of_all_libraries = (global_row_library_list + global_column_library_list + global_d_library_list)
	else:
		list_of_all_libraries = (global_row_library_list + global_column_library_list)
	line_dictionary_list = subset_line_dictionary_list(line_dictionary_list, "library", list_of_all_libraries)
	
	# isolate row, column, and d libraries
	if global_number_of_libraries != (len(list_of_all_libraries)):
		#print "DEBUG: list of all libraries: %s" % ",".join(list_of_all_libraries)
		raise IOError, "Value of \"global_number_of_libraries\" (%d) is not equal to the total of all listed libraries (%d)" % (global_number_of_libraries, len(list_of_all_libraries))
	if len(line_dictionary_list) != global_number_of_libraries:
		for line_dict in line_dictionary_list:
			print 'lib=%s' % line_dict['library']
		print 'Warning: Found %d libraries at position %d' % (len(line_dictionary_list), position)
		raise IOError, "Did not find exactly one line for each library at this position: %d" % position
	line_dictionary_list_row_libraries = subset_line_dictionary_list(line_dictionary_list, "library", global_row_library_list)
	line_dictionary_list_column_libraries = subset_line_dictionary_list(line_dictionary_list, "library", global_column_library_list)
	if len(line_dictionary_list_row_libraries) != len(global_row_library_list):
		raise IOError, 'Did not find exactly one line for each row library'
	if len(line_dictionary_list_column_libraries) != len(global_column_library_list):
		raise IOError, 'Did not find exactly one line for each column library'

	if global_use_3D_pooling:
		line_dictionary_list_d_libraries = subset_line_dictionary_list(line_dictionary_list, "library", global_d_library_list)
		if len(line_dictionary_list_d_libraries) != len(global_d_library_list):
			raise IOError, 'Did not find exactly one line for each \'d\' library'
	else:
		line_dictionary_list_d_libraries = 'NA'

	return line_dictionary_list, line_dictionary_list_row_libraries, line_dictionary_list_column_libraries, line_dictionary_list_d_libraries


def get_expected_base_change_rates_given_mutation_and_no_mutation(estimated_error_rate):
	####### MODIFIED TO NOT WORK WITH OLD HOMOLOG-PROCESSING CODE:

	#adjusted_heterozygous_row_mutation_fraction = library_to_fraction_of_base_reads_that_come_from_global_gene_dict[row_library_name] \
	#						* global_heterozygous_row_mutation_fraction
	#adjusted_homozygous_row_mutation_fraction = library_to_fraction_of_base_reads_that_come_from_global_gene_dict[row_library_name] \
	#						* global_homozygous_row_mutation_fraction
	#adjusted_heterozygous_column_mutation_fraction = library_to_fraction_of_base_reads_that_come_from_global_gene_dict[column_library_name] \
	#						* global_heterozygous_column_mutation_fraction
	#adjusted_homozygous_column_mutation_fraction = library_to_fraction_of_base_reads_that_come_from_global_gene_dict[column_library_name] \
	#						* global_homozygous_column_mutation_fraction

	base_change_rate_given_single_mutation_of_zygosity_in_library_type_dict = {}
	base_change_rate_given_single_mutation_of_zygosity_in_library_type_dict['het'] = {}
	base_change_rate_given_single_mutation_of_zygosity_in_library_type_dict['hom'] = {}

	base_change_rate_given_single_mutation_of_zygosity_in_library_type_dict['het']['row'] = \
		get_expected_base_change_rate_given_mutation(global_heterozygous_row_mutation_fraction, estimated_error_rate)

	base_change_rate_given_single_mutation_of_zygosity_in_library_type_dict['hom']['row'] = \
		get_expected_base_change_rate_given_mutation(global_homozygous_row_mutation_fraction, estimated_error_rate)

	base_change_rate_given_single_mutation_of_zygosity_in_library_type_dict['het']['column'] = \
		get_expected_base_change_rate_given_mutation(global_heterozygous_column_mutation_fraction, estimated_error_rate)

	base_change_rate_given_single_mutation_of_zygosity_in_library_type_dict['hom']['column'] = \
		get_expected_base_change_rate_given_mutation(global_homozygous_column_mutation_fraction, estimated_error_rate)

	if global_use_3D_pooling:
		base_change_rate_given_single_mutation_of_zygosity_in_library_type_dict['het']['d_library'] = \
			get_expected_base_change_rate_given_mutation(global_heterozygous_d_library_mutation_fraction, estimated_error_rate)
		
		base_change_rate_given_single_mutation_of_zygosity_in_library_type_dict['hom']['d_library'] = \
			get_expected_base_change_rate_given_mutation(global_homozygous_d_library_mutation_fraction, estimated_error_rate)
	
	return base_change_rate_given_single_mutation_of_zygosity_in_library_type_dict


def get_best_mutant_genotype(reference_base, possible_new_base_list, line_dictionary_list, position, number_of_candidate_mutations_per_position_to_frequency_dict):
	genotype_to_joint_probability_of_genotype_and_observed_results_dict = {}

	## estimate background rate of each base change from libraries that have no mutation according to the current genotype
	error_rate_for_new_base_and_well_dict = {}
	expected_rate_of_new_base_given_mutation_for_well_zygosity_and_library_type_dict = {}
	probability_of_base_change_to_coverage_ratio_for_library_dict = {}

	row_line_dictionary_list = subset_line_dictionary_list(line_dictionary_list, "library", global_row_library_list)
	column_line_dictionary_list = subset_line_dictionary_list(line_dictionary_list, "library", global_column_library_list)
	if global_use_3D_pooling:
		d_library_line_dictionary_list = subset_line_dictionary_list(line_dictionary_list, "library", global_d_library_list)
	else:
		d_library_line_dictionary_list = ['NA']

	for new_base in possible_new_base_list:
		error_rate_for_new_base_and_well_dict[new_base] = {}
		expected_rate_of_new_base_given_mutation_for_well_zygosity_and_library_type_dict[new_base] = {}
		probability_of_base_change_to_coverage_ratio_for_library_dict[new_base] = {}
		probability_of_base_change_to_coverage_ratio_for_library_dict[new_base]['none'] = {}
		probability_of_base_change_to_coverage_ratio_for_library_dict[new_base]['none']['NA'] = {}

		## case of no mutation in any well
		error_rate_for_new_base_and_well_dict[new_base]['none'], valid_result = get_base_change_rate(line_dictionary_list, reference_base, new_base, new_base.lower())
		if not(valid_result):
			raise IOError, 'Internal error: previous coverage checks should ensure that this error does not occur - position %d' % position
		for line_dictionary in line_dictionary_list:
			library_name = line_dictionary['library']
			expected_base_change_rate = error_rate_for_new_base_and_well_dict[new_base]['none']
			probability_of_base_change_to_coverage_ratio_for_library_dict[new_base]['none']['NA'][library_name] = \
				get_probability_of_base_change_to_coverage_ratio_given_expected_base_change_rate(line_dictionary, 
														 expected_base_change_rate,
														 new_base,
														 new_base.lower())

		## case of mutation in single well
		for row_line_dictionary in row_line_dictionary_list:
			for column_line_dictionary in column_line_dictionary_list:
				for d_library_line_dictionary in d_library_line_dictionary_list:
					row_library = row_line_dictionary['library']
					column_library = column_line_dictionary['library']
					if global_use_3D_pooling:
						d_library = d_library_line_dictionary['library']
						excluded_library_list = [row_library, column_library, d_library]
					else:
						d_library = 'NA'
						excluded_library_list = [row_library, column_library]
					if T7_tomato_and_rice and (not well_is_in_global_well_dict(row_library, column_library, d_library)):
						continue
					well = row_library + '\t' + column_library + '\t' + d_library
					line_dictionary_list_remaining_libraries = exclude_from_line_dictionary_list(line_dictionary_list, 'library', excluded_library_list)
					error_rate_for_new_base_and_well_dict[new_base][well], valid_result = get_base_change_rate(line_dictionary_list_remaining_libraries, reference_base, new_base, new_base.lower())
					if not(valid_result):
						raise IOError, 'Internal error: previous coverage checks should ensure that this error does not occur (2).'
                                        
					## estimate rate of each base change in a library with a homozygous or heterozygous mutation
					expected_rate_of_new_base_given_mutation_for_well_zygosity_and_library_type_dict[new_base][well] \
							= get_expected_base_change_rates_given_mutation_and_no_mutation(error_rate_for_new_base_and_well_dict[new_base][well])
                                        
					probability_of_base_change_to_coverage_ratio_for_library_dict[new_base][well] = {}
					for zygosity in ['het', 'hom']:
						probability_of_base_change_to_coverage_ratio_for_library_dict[new_base][well][zygosity] = {}
						for line_dictionary in line_dictionary_list:
							current_library = line_dictionary['library']
							if current_library in global_row_library_list:
								library_type = 'row'
							elif current_library in global_column_library_list:
								library_type = 'column'
							elif global_use_3D_pooling and (current_library in global_d_library_list):
								library_type = 'd_library'
							else:
								raise IOError, 'cannot determine if library \'%s\' is a row, column, or \'d\' library' % current_library
                                        
							if (current_library in [row_library, column_library]) or (global_use_3D_pooling and (current_library == d_library)):
								## if library is predicted to contain a mutation in the given case
								expected_base_change_rate = \
									expected_rate_of_new_base_given_mutation_for_well_zygosity_and_library_type_dict[new_base][well][zygosity][library_type]
							else:
								## if library is predicted to contain an error in the given case
								expected_base_change_rate = error_rate_for_new_base_and_well_dict[new_base][well]
                                        
							probability_of_base_change_to_coverage_ratio_for_library_dict[new_base][well][zygosity][current_library] = \
								get_probability_of_base_change_to_coverage_ratio_given_expected_base_change_rate(line_dictionary, 
																		 expected_base_change_rate,
																		 new_base,
																		 new_base.lower())

	## Joint probabilities of the observed base change ratios and
	## a candidate mutation for a base change not in "possible_new_base_list"
	## are implicitly set to 0
	if global_use_3D_pooling:
		list_of_d_libraries = global_d_library_list
	else:
		list_of_d_libraries = ['NA']
	for mutant_row_library in global_row_library_list:
		for mutant_column_library in global_column_library_list:
			for mutant_d_library in list_of_d_libraries:
				if T7_tomato_and_rice and (not well_is_in_global_well_dict(mutant_row_library, mutant_column_library, mutant_d_library)):
					continue
				for mut_base in possible_new_base_list:

					## Test whether we should consider the given base change in the specified well
					row_line_dictionary = get_line_dictionary_for_library(line_dictionary_list, mutant_row_library)
					column_line_dictionary = get_line_dictionary_for_library(line_dictionary_list, mutant_column_library)
					if global_use_3D_pooling:
						d_library_line_dictionary = get_line_dictionary_for_library(line_dictionary_list, mutant_d_library)
					else:
						d_library_line_dictionary = 'NA'
                                

					# Test SkewN
					skewN_looks_good = skewN_looks_good_for_row_column_and_d_library(row_line_dictionary, column_line_dictionary, 
													 d_library_line_dictionary, reference_base, mut_base, mut_base.lower())
                                

					# Check percentage of base reads that are high quality
					FrHQ_looks_good, FrHQ_for_mutation_base_in_row_library, FrHQ_for_mutation_base_in_column_library = \
						percentage_of_base_change_reads_that_are_high_quality_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary,
																	reference_base, mut_base, mut_base.lower())
					if global_use_3D_pooling:
						FrHQ_looks_good_for_d_library, FrHQ_for_mutation_base_in_d_library = \
								percentage_of_base_change_reads_that_are_high_quality_looks_good_for_library(d_library_line_dictionary, reference_base, \
																	     mut_base, mut_base.lower())
						if not FrHQ_looks_good_for_d_library:
							FrHQ_looks_good = False


					# Break if well does not pass pre-processing test
					if not (skewN_looks_good and FrHQ_looks_good):
						continue
                                
					for zygosity in ['het', 'hom']:
						## compute probability of the observed ratios of candidate base change calls to coverage across all libraries, given the current genotype
						probability_of_observed_results_given_genotype = \
								compute_probability_of_observed_results_given_genotype(mutant_row_library, mutant_column_library, mutant_d_library,
															mut_base, zygosity, line_dictionary_list,
															possible_new_base_list,
															probability_of_base_change_to_coverage_ratio_for_library_dict)
						prior_of_genotype = compute_prior_of_well_mutation(reference_base, mut_base, zygosity)
						#if zygosity == 'het':
						#	prior_of_genotype = p_het_in_well
						#else:
						#	prior_of_genotype = p_hom_in_well
                                                
						genotype = zygosity + '\t' + mut_base + '\t' + mutant_row_library + '\t' + mutant_column_library + '\t' + mutant_d_library
						genotype_to_joint_probability_of_genotype_and_observed_results_dict[genotype] = \
								prior_of_genotype * probability_of_observed_results_given_genotype
                                        

	## handle condition where there is no mutant individual, for any base change
	prior_of_genotype = compute_prior_of_no_mut_in_any_well(reference_base)

	# compute probability of the observed ratios of candidate base change calls to coverage across all libraries, given no mutation in any well, for any base change
	probability_of_observed_results_given_genotype = 1.0
	for line_dictionary in line_dictionary_list:
		library_name = line_dictionary['library']
		for base in possible_new_base_list:
			probability_of_observed_results_given_genotype *= \
				probability_of_base_change_to_coverage_ratio_for_library_dict[base]['none']['NA'][library_name]

	genotype = 'no_mut_in_any_well'
	genotype_to_joint_probability_of_genotype_and_observed_results_dict[genotype] = \
			prior_of_genotype * probability_of_observed_results_given_genotype


	#print 'VDEBUG:'
	#print genotype_to_joint_probability_of_genotype_and_observed_results_dict

	## compute denominator
	joint_probability_sum = 0.0
	for genotype in genotype_to_joint_probability_of_genotype_and_observed_results_dict:
		joint_probability_sum += genotype_to_joint_probability_of_genotype_and_observed_results_dict[genotype]

	if joint_probability_sum == 0.0:
		print 'WARNING: we have a sum of joint probabilities of 0 at position %d' % position
		probability_no_mutant_in_any_well, one_minus_probability_of_best_candidate_mutation, chosen_new_base, chosen_row, chosen_column, chosen_d_library, \
			one_minus_probability_of_next_best_candidate_mutation = \
				'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA'
		bool_found_at_least_one_candidate = False
		number_of_candidate_mutations_per_position = 0
	else:

		## compute probability that no well contains a mutant individual
		probability_no_mutant_in_any_well = (float(genotype_to_joint_probability_of_genotype_and_observed_results_dict['no_mut_in_any_well']) / \
						     float(joint_probability_sum))
        
		## compute one minus the probability of a mutant individual in each well, for each possible base change

		# initialize and add joint probabiilty of no mutation in any well
		joint_probaiblity_sum_minus_joint_probability_of_well_base_combination_dict = {}
		if global_use_3D_pooling:
			list_of_d_libraries = global_d_library_list
		else:
			list_of_d_libraries = ['NA']
		for mutant_row_library in global_row_library_list:
			for mutant_column_library in global_column_library_list:
				for mutant_d_library in list_of_d_libraries:
					if T7_tomato_and_rice and (not well_is_in_global_well_dict(mutant_row_library, mutant_column_library, mutant_d_library)):
						continue
					for base in possible_new_base_list:
						well_base_combination = base + '\t' + mutant_row_library + '\t' + mutant_column_library + '\t' + mutant_d_library
						het_genotype = 'het' + '\t' + well_base_combination
						hom_genotype = 'hom' + '\t' + well_base_combination
						if ((het_genotype in genotype_to_joint_probability_of_genotype_and_observed_results_dict) and 
						    (hom_genotype in genotype_to_joint_probability_of_genotype_and_observed_results_dict)):
							joint_probaiblity_sum_minus_joint_probability_of_well_base_combination_dict[well_base_combination] = 0
							joint_probaiblity_sum_minus_joint_probability_of_well_base_combination_dict[well_base_combination] += \
									genotype_to_joint_probability_of_genotype_and_observed_results_dict['no_mut_in_any_well']
						elif ((het_genotype in genotype_to_joint_probability_of_genotype_and_observed_results_dict) or
						      (hom_genotype in genotype_to_joint_probability_of_genotype_and_observed_results_dict)):
							raise IOError, 'Internal error: it should be the case that the het and hom genotypes both have a joint probability, ' + \
									'or neither of them has a joint probability.'

		# add joint probability of different mutation conditions
		candidate_mutation_and_one_minus_probability_tuple_list = []
		for excluded_well_base_combination in joint_probaiblity_sum_minus_joint_probability_of_well_base_combination_dict:
			for well_base_combination in joint_probaiblity_sum_minus_joint_probability_of_well_base_combination_dict:
				het_genotype = 'het' + '\t' + well_base_combination
				hom_genotype = 'hom' + '\t' + well_base_combination
				if well_base_combination != excluded_well_base_combination:
					joint_probaiblity_sum_minus_joint_probability_of_well_base_combination_dict[excluded_well_base_combination] += \
							(genotype_to_joint_probability_of_genotype_and_observed_results_dict[het_genotype] + \
							 genotype_to_joint_probability_of_genotype_and_observed_results_dict[hom_genotype])
			
			one_minus_probability_of_mutant_with_given_base_change_in_well = \
					float(joint_probaiblity_sum_minus_joint_probability_of_well_base_combination_dict[excluded_well_base_combination]) / float(joint_probability_sum)
			(base, mutant_row_library, mutant_column_library, mutant_d_library) = excluded_well_base_combination.split('\t')
			candidate_mutation_and_one_minus_probability_tuple_list.append((one_minus_probability_of_mutant_with_given_base_change_in_well,
									      base, mutant_row_library, mutant_column_library, mutant_d_library))

		## compute probability of a mutant individual in each well, for each possible base change
		candidate_mutation_and_probability_tuple_list = []
		for well_base_combination in joint_probaiblity_sum_minus_joint_probability_of_well_base_combination_dict:
			het_genotype = 'het' + '\t' + well_base_combination
			hom_genotype = 'hom' + '\t' + well_base_combination
			(base, mutant_row_library, mutant_column_library, mutant_d_library) = well_base_combination.split('\t')
			probability_of_mutant_with_given_base_change_in_well = float(genotype_to_joint_probability_of_genotype_and_observed_results_dict[het_genotype] + \
											genotype_to_joint_probability_of_genotype_and_observed_results_dict[hom_genotype]) /\
										float(joint_probability_sum)
			candidate_mutation_and_probability_tuple_list.append((probability_of_mutant_with_given_base_change_in_well, base, mutant_row_library, \
									      mutant_column_library, mutant_d_library))
        
		if len(candidate_mutation_and_one_minus_probability_tuple_list) == 0:
			one_minus_probability_of_best_candidate_mutation, chosen_new_base, chosen_row, chosen_column, chosen_d_library, one_minus_probability_of_next_best_candidate_mutation \
					= 'NA', 'NA', 'NA', 'NA', 'NA', 'NA'
			bool_found_at_least_one_candidate = False
			number_of_candidate_mutations_per_position = 0
		else:
			## select well with the highest probability (the lowest value for [1 - probability])
			candidate_mutation_and_one_minus_probability_tuple_list.sort()
			(one_minus_probability_of_best_candidate_mutation, chosen_new_base, chosen_row, chosen_column, chosen_d_library) = \
					candidate_mutation_and_one_minus_probability_tuple_list[0]
			if len(candidate_mutation_and_one_minus_probability_tuple_list) == 1:
				one_minus_probability_of_next_best_candidate_mutation = 'NA'
			else:
				one_minus_probability_of_next_best_candidate_mutation = candidate_mutation_and_one_minus_probability_tuple_list[1][0]
			bool_found_at_least_one_candidate = True

			## Record statistics on number of candidate wells:
			## look for strong well signals (prob >= prob of no mutation) that do not appear to be errors (were not discarded by the preprocessing)
			broke_early = False
			candidate_mutation_and_probability_tuple_list.sort()
			candidate_mutation_and_probability_tuple_list.reverse()
			for i in range(0, len(candidate_mutation_and_probability_tuple_list)):
				probability_of_current_candidate_mutation = candidate_mutation_and_probability_tuple_list[i][0]
				if probability_of_current_candidate_mutation < probability_no_mutant_in_any_well:
					num_candidate_mutations_with_at_least_the_probability_of_no_mut_in_any_well = i
					broke_early = True
					break
			if not broke_early:
				num_candidate_mutations_with_at_least_the_probability_of_no_mut_in_any_well = len(candidate_mutation_and_probability_tuple_list)
			
			number_of_candidate_mutations_per_position = num_candidate_mutations_with_at_least_the_probability_of_no_mut_in_any_well

	if not (number_of_candidate_mutations_per_position in number_of_candidate_mutations_per_position_to_frequency_dict):
		number_of_candidate_mutations_per_position_to_frequency_dict[number_of_candidate_mutations_per_position] = 0
	number_of_candidate_mutations_per_position_to_frequency_dict[number_of_candidate_mutations_per_position] += 1	

	return one_minus_probability_of_best_candidate_mutation, chosen_new_base, chosen_row, chosen_column, chosen_d_library, one_minus_probability_of_next_best_candidate_mutation, \
		probability_no_mutant_in_any_well, bool_found_at_least_one_candidate

def compute_prior_of_no_mut_in_any_well(reference_base):
	if global_is_contamination_test:
		raise IOError, 'Must revise conditions for contamination test'

	# We assume mutagenesis by EMS
	p_c, p_nc = get_probability_of_canonical_and_non_canonical_mutations()

	w = global_num_individuals_per_pool		# i.e num individuals per well
	num_rows = len(global_row_library_list)
	num_columns = len(global_column_library_list)
	if global_use_3D_pooling:
		num_d_libraries = len(global_d_library_list)
		n_wells = num_rows * num_columns * num_d_libraries
	else:
		n_wells = num_rows * num_columns

	## probability specific [non]canonical base change does not occur in well
	p_cw0 = pmf_of_binomial_dist(0, w, p_c)
	p_ncw0 = pmf_of_binomial_dist(0, w, p_nc)

	if reference_base in ['G', 'C']:
		p_no_mut = math.pow(p_cw0, n_wells) * math.pow(p_ncw0, 2*n_wells)
	else:
		p_no_mut = math.pow(p_ncw0, 3*n_wells)

	return p_no_mut

def compute_prior_of_well_mutation(reference_base, mutation_base, zygosity):
	if global_is_contamination_test:
		raise IOError, 'Must revise conditions for contamination test (2)'

	#### Assume that Heterozygous / Homozygous mutation ratio for M2 plants is always 2:1
	t_hom = float(1.0 / 3.0)
	t_het = 1.0 - t_hom

	if zygosity == 'het':
		t_z = t_het
	elif zygosity == 'hom':
		t_z = t_hom
	else:
		raise IOError, 'unexpected zygosity: %s' % zygosity

	# We assume mutagenesis by EMS
	p_c, p_nc = get_probability_of_canonical_and_non_canonical_mutations()

	w = global_num_individuals_per_pool		# i.e num individuals per well
	num_rows = len(global_row_library_list)
	num_columns = len(global_column_library_list)
	if global_use_3D_pooling:
		num_d_libraries = len(global_d_library_list)
		n_wells = num_rows * num_columns * num_d_libraries
	else:
		n_wells = num_rows * num_columns

	## probability of specific [non]canonical base change in 1 individual from well
	p_cw1 = pmf_of_binomial_dist(1, w, p_c)
	p_ncw1 = pmf_of_binomial_dist(1, w, p_nc)
	## probability specific [non]canonical base change does not occur in well
	p_cw0 = pmf_of_binomial_dist(0, w, p_c)
	p_ncw0 = pmf_of_binomial_dist(0, w, p_nc)

	if is_canonical_EMS_mutation(reference_base, mutation_base):
		## probability of mutation for the canonical base change, not the two non-canonical base changes
		p_mut = t_z * p_cw1 * math.pow(p_cw0, n_wells - 1) * math.pow(p_ncw0, 2*n_wells)
	else:
		if reference_base in ['G', 'C']:
			## probabilty of mutation for one of the two non-canonical base changes, not the canonical base change
			p_mut = t_z * p_ncw1 * math.pow(p_ncw0, 2*n_wells - 1) * math.pow(p_cw0, n_wells)
		else:
			## probablity of mutation for one of the three non-canonical base changes
			p_mut = t_z * p_ncw1 * math.pow(p_ncw0, 3*n_wells - 1)
	return p_mut

def get_probability_of_any_base_change(reference_base):
	# We assume mutagenesis by EMS
	p_specific_canonical_EMS_mutation_at_position, p_specific_non_canonical_EMS_mutation_at_position = get_probability_of_canonical_and_non_canonical_mutations()

	if global_is_contamination_test:
		raise IOError, 'Must revise conditions for contamination test, since I have just updated the code to test each well together:'
		## In order to detect contamination, which can result in all types of base changes, we consider each base change to be equally likely.
		p_of_any_base_change = 'NA'
	else:
		if reference_base in ['G', 'C']:
			p_of_any_base_change = p_specific_canonical_EMS_mutation_at_position + 2 * p_specific_non_canonical_EMS_mutation_at_position
		else:
			p_of_any_base_change = 3 * p_specific_non_canonical_EMS_mutation_at_position

	return p_of_any_base_change

def is_canonical_EMS_mutation(reference_base, mutation_base):
	if ((reference_base == 'G') and (mutation_base == 'A')) or ((reference_base == 'C') and (mutation_base == 'T')):
		return True
	else:
		return False

def get_probability_of_given_base_change(reference_base, mutation_base):
	# We assume mutagenesis by EMS
	p_specific_canonical_EMS_mutation_at_position, p_specific_non_canonical_EMS_mutation_at_position = get_probability_of_canonical_and_non_canonical_mutations()

	if global_is_contamination_test:
		raise IOError, 'Must revise conditions for contamination test, since I have just updated the code to test each well together:'
		## In order to detect contamination, which can result in all types of base changes, we consider each base change to be equally likely.
		p_of_given_base_change = max(p_specific_canonical_EMS_mutation_at_position, p_specific_non_canonical_EMS_mutation_at_position)
	else:
		if ((reference_base == 'G') and (mutation_base == 'A')) or ((reference_base == 'C') and (mutation_base == 'T')):
			p_of_given_base_change = p_specific_canonical_EMS_mutation_at_position
		else:
			p_of_given_base_change = p_specific_non_canonical_EMS_mutation_at_position

	return p_of_given_base_change


def get_probability_of_canonical_and_non_canonical_mutations():
	################################################################################################################################
	# NOTE: Prof Comai told me that all Tilling2 organisms (except Oryza_sativa) will have essentially only G->A and C->T (EMS-type) mutations,
	#       so I should add those organisms to the condition for Camelina:
	################################################################################################################################
	if global_organism == "Oryza_sativa":
		# By previous experiment (MNU mutagenesis in Till et al. 07):
		p_specific_canonical_EMS_mutation_at_position = 5.596671e-06
		p_specific_non_canonical_EMS_mutation_at_position = 4.931025e-07
	elif global_organism == "Triticum_durum":
		# By previous experiment:
		p_specific_canonical_EMS_mutation_at_position = 3.876694e-05
		p_specific_non_canonical_EMS_mutation_at_position = 0.000000e+00
		# By CVIM predictions (estimated 10% False Positive rate):
		#p_specific_canonical_EMS_mutation_at_position = 7.228697e-05
		#p_specific_non_canonical_EMS_mutation_at_position = 0.000000e+00
	elif global_organism == 'Solanum_lycopersicum':
		# By previous experiment (Minoa et al. 2010):
		p_specific_canonical_EMS_mutation_at_position = 2.376508e-06
		p_specific_non_canonical_EMS_mutation_at_position = 8.443156e-07
	elif((global_organism == "Camelina_sativa") or (global_organism == "Triticum_aestivum") or (global_organism == "arabidopsis_tetraploid")):
		p_specific_canonical_EMS_mutation_at_position = float(global_organism_mutation_rate) / float(global_organism_gc_fraction)
		p_specific_non_canonical_EMS_mutation_at_position = 0.0
	else:
		raise IOError, "Script cannot handle organism: " + global_organism
	return p_specific_canonical_EMS_mutation_at_position, p_specific_non_canonical_EMS_mutation_at_position


def compute_probability_of_observed_results_given_genotype(mutant_row_library, mutant_column_library, mutant_d_library,
							mut_base, zygosity, line_dictionary_list,
							possible_new_base_list,
							probability_of_base_change_to_coverage_ratio_for_library_dict):
	if global_use_3D_pooling:
		mut_well = mutant_row_library + '\t' + mutant_column_library + '\t' + mutant_d_library
	else:
		mut_well = mutant_row_library + '\t' + mutant_column_library + '\t' + 'NA'

	probability_of_observed_results_given_genotype = 1.0
	for line_dictionary in line_dictionary_list:
		library_name = line_dictionary['library']
		for base in possible_new_base_list:
			if base == mut_base:
				## given a mutation in 'mut_well' for base change 'base' of zygosity 'zygosity',
				## compute probability of observing the ratio of 'base' to coverage, for the given library
				probability_of_observed_results_given_genotype *= \
					probability_of_base_change_to_coverage_ratio_for_library_dict[base][mut_well][zygosity][library_name]
			else:
				## if the proposed well mutation is for a different base change,
				## then no well mutation should exist for this base change
				probability_of_observed_results_given_genotype *= \
					probability_of_base_change_to_coverage_ratio_for_library_dict[base]['none']['NA'][library_name]
	return probability_of_observed_results_given_genotype


def get_probability_of_no_mutation(reference_base, mutation_base,
					prob_row_library_given_no_mutation,
					prob_column_library_given_no_mutation,
					prob_row_library_given_heterozygous_mutation,
					prob_column_library_given_heterozygous_mutation,
					prob_row_library_given_homozygous_mutation,
					prob_column_library_given_homozygous_mutation,
					prob_row_and_column_library_given_no_mutation,
					prob_row_and_column_library_given_no_mutation_for_row_and_heterozygous_mutation_for_column,
					prob_row_and_column_library_given_no_mutation_for_row_and_homozygous_mutation_for_column,
					prob_row_and_column_library_given_heterozygous_mutation_for_row_and_no_mutation_for_column,
					prob_row_and_column_library_given_homozygous_mutation_for_row_and_no_mutation_for_column,
					prob_row_and_column_library_given_heterozygous_mutation,
					prob_row_and_column_library_given_homozygous_mutation,
					position, row_library_name, column_library_name):

	# Bayes' Theorem
	#prob_no_mutation_given_row_and_column_library = (prob_row_and_column_library_given_no_mutation * p_no_mutation) / \
	#		((prob_row_and_column_library_given_no_mutation * p_no_mutation) + \
	#		 (prob_row_and_column_library_given_homozygous_mutation * p_homozygous_mutation) + \
	#		 (prob_row_and_column_library_given_heterozygous_mutation * p_heterozygous_mutation))

	# get probability of mutation
	prob_no_mutation_in_pool_given_row_and_column_library \
			= ((prob_row_and_column_library_given_no_mutation * p_C_4) + \
			   (prob_row_and_column_library_given_no_mutation_for_row_and_heterozygous_mutation_for_column * p_C_3_het) + \
			   (prob_row_and_column_library_given_no_mutation_for_row_and_homozygous_mutation_for_column * p_C_3_hom) + \
			   (prob_row_and_column_library_given_heterozygous_mutation_for_row_and_no_mutation_for_column * p_C_2_het) + \
			   (prob_row_and_column_library_given_homozygous_mutation_for_row_and_no_mutation_for_column * p_C_2_hom)) / \
			  ((prob_row_and_column_library_given_no_mutation * p_C_4) + \
			   (prob_row_and_column_library_given_no_mutation_for_row_and_heterozygous_mutation_for_column * p_C_3_het) + \
			   (prob_row_and_column_library_given_no_mutation_for_row_and_homozygous_mutation_for_column * p_C_3_hom) + \
			   (prob_row_and_column_library_given_heterozygous_mutation_for_row_and_no_mutation_for_column * p_C_2_het) + \
			   (prob_row_and_column_library_given_homozygous_mutation_for_row_and_no_mutation_for_column * p_C_2_hom) + \
			   (prob_row_and_column_library_given_heterozygous_mutation * p_C_1_het) + \
			   (prob_row_and_column_library_given_homozygous_mutation * p_C_1_hom))
			   
	############## DEBUG: #################
	#if (position == 148) and (row_library_name == 'T3R2') and (column_library_name == 'T3C6'):
	#	print 'Position 148:'
	#	print 'prob_row_and_column_library_given_no_mutation=%.20e' % prob_row_and_column_library_given_no_mutation
	#	print 'prob_row_and_column_library_given_no_mutation_for_row_and_heterozygous_mutation_for_column=%.20e' % prob_row_and_column_library_given_no_mutation_for_row_and_heterozygous_mutation_for_column
	#	print 'prob_row_and_column_library_given_heterozygous_mutation_for_row_and_no_mutation_for_column=%.20e' % prob_row_and_column_library_given_heterozygous_mutation_for_row_and_no_mutation_for_column
	#	print 'prob_row_and_column_library_given_heterozygous_mutation=%.20e' % prob_row_and_column_library_given_heterozygous_mutation
	#	print 'prob_no_mutation_in_pool_given_row_and_column_library=%.20e' % prob_no_mutation_in_pool_given_row_and_column_library
	##################################

	#prob_no_mutation_given_row_library = (prob_row_library_given_no_mutation * p_no_mutation_in_row_at_position) / \
	#		((prob_row_library_given_no_mutation * p_no_mutation_in_row_at_position) + \
	#		 (prob_row_library_given_heterozygous_mutation * p_heterozygous_mutation_in_row_at_position) + \
	#		 (prob_row_library_given_homozygous_mutation * p_homozygous_mutation_in_row_at_position))
	prob_no_mutation_given_row_library = 'NA'

	#prob_no_mutation_given_column_library = (prob_column_library_given_no_mutation * p_no_mutation_in_column_at_position) / \
	#		((prob_column_library_given_no_mutation * p_no_mutation_in_column_at_position) + \
	#		 (prob_column_library_given_heterozygous_mutation * p_heterozygous_mutation_in_column_at_position) + \
	#		 (prob_column_library_given_homozygous_mutation * p_homozygous_mutation_in_column_at_position))
	prob_no_mutation_given_column_library = 'NA'

	return prob_no_mutation_in_pool_given_row_and_column_library, prob_no_mutation_given_row_library, prob_no_mutation_given_column_library


def skewN_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary, reference_base, mutation_base, mutation_base_lc):
	pval_of_SkewN_for_row_library, found_accurate_pval_of_skewN_for_row_library \
			= get_pval_of_forward_to_reverse_read_ratio_for_new_base(row_line_dictionary, reference_base, mutation_base, mutation_base_lc)
	pval_of_SkewN_for_column_library, found_accurate_pval_of_skewN_for_column_library \
			= get_pval_of_forward_to_reverse_read_ratio_for_new_base(column_line_dictionary, reference_base, mutation_base, mutation_base_lc)
	
	if found_accurate_pval_of_skewN_for_row_library and found_accurate_pval_of_skewN_for_column_library:
		found_accurate_pval_of_skewN_for_row_and_column_libraries = True
	else:
		found_accurate_pval_of_skewN_for_row_and_column_libraries = False

	# I should revise this - as some mutations with bad skew (but high orientation bias) will still look good:
	if found_accurate_pval_of_skewN_for_row_and_column_libraries and (pval_of_SkewN_for_row_library >= 1e-2) and (pval_of_SkewN_for_column_library >= 1e-2):
		skewN_looks_good = True
	else:
		skewN_looks_good = False

	# If orientation bias is very high (so we cannot easily determine if there is significant skew)
	# and the delta-quality score of the mutation base is low (it looks like there is probably a base-calling error):
	# Then do not record this (row, column) combination as a candidate mutation.
	refbase_orientation_bias_for_row = get_refbase_orientation_bias(row_line_dictionary)
	refbase_orientation_bias_for_column = get_refbase_orientation_bias(column_line_dictionary)
	
	#deltaQ_N_for_row = float(row_line_dictionary["deltaQ-" + mutation_base])
	#deltaQ_N_for_column = float(column_line_dictionary["deltaQ-" + mutation_base])


	if ((refbase_orientation_bias_for_row < 0.1) or (refbase_orientation_bias_for_row > 10)):		# and (deltaQ_N_for_row < -2.0):
		skewN_looks_good = False

	if ((refbase_orientation_bias_for_column < 0.1) or (refbase_orientation_bias_for_column > 10)):		# and (deltaQ_N_for_column < -2.0):
		skewN_looks_good = False

	#if (deltaQ_N_for_row < -3.0) or (deltaQ_N_for_column < -3.0):
	#	skewN_looks_good = False

	return skewN_looks_good, pval_of_SkewN_for_row_library, pval_of_SkewN_for_column_library, \
		found_accurate_pval_of_skewN_for_row_and_column_libraries, \
		refbase_orientation_bias_for_row, refbase_orientation_bias_for_column
		#deltaQ_N_for_row, deltaQ_N_for_column

def skewN_looks_good_for_row_column_and_d_library(row_line_dictionary, column_line_dictionary, d_library_line_dictionary, reference_base, mutation_base, mutation_base_lc):
	pval_of_SkewN_for_row_library, found_accurate_pval_of_skewN_for_row_library \
			= get_pval_of_forward_to_reverse_read_ratio_for_new_base(row_line_dictionary, reference_base, mutation_base, mutation_base_lc)
	pval_of_SkewN_for_column_library, found_accurate_pval_of_skewN_for_column_library \
			= get_pval_of_forward_to_reverse_read_ratio_for_new_base(column_line_dictionary, reference_base, mutation_base, mutation_base_lc)
	if global_use_3D_pooling:
		pval_of_SkewN_for_d_library, found_accurate_pval_of_skewN_for_d_library \
				= get_pval_of_forward_to_reverse_read_ratio_for_new_base(d_library_line_dictionary, reference_base, mutation_base, mutation_base_lc)
	
	if ((global_use_3D_pooling and found_accurate_pval_of_skewN_for_row_library and found_accurate_pval_of_skewN_for_column_library and found_accurate_pval_of_skewN_for_d_library) or \
	    ((not global_use_3D_pooling) and found_accurate_pval_of_skewN_for_row_library and found_accurate_pval_of_skewN_for_column_library)):
		found_accurate_pval_of_skewN_for_all_relevant_libraries = True
	else:
		found_accurate_pval_of_skewN_for_all_relevant_libraries = False
	
	if ((global_use_3D_pooling and (pval_of_SkewN_for_row_library >= 1e-2) and (pval_of_SkewN_for_column_library >= 1e-2) and (pval_of_SkewN_for_d_library >= 1e-2)) or \
	    ((not global_use_3D_pooling) and (pval_of_SkewN_for_row_library >= 1e-2) and (pval_of_SkewN_for_column_library >= 1e-2))):
		found_high_pvalue_of_SkewN_for_all_relevant_libraries = True
	else:
		found_high_pvalue_of_SkewN_for_all_relevant_libraries = False

	# I should revise this - as some mutations with bad skew (but high orientation bias) will still look good:
	if found_accurate_pval_of_skewN_for_all_relevant_libraries and found_high_pvalue_of_SkewN_for_all_relevant_libraries:
		skewN_looks_good = True
	else:
		skewN_looks_good = False

	# If orientation bias is very high (so we cannot easily determine if there is significant skew)
	# Then do not record this (row, column, d library) combination as a candidate mutation.
	refbase_orientation_bias_for_row = get_refbase_orientation_bias(row_line_dictionary)
	if ((refbase_orientation_bias_for_row < 0.1) or (refbase_orientation_bias_for_row > 10)):
		skewN_looks_good = False

	refbase_orientation_bias_for_column = get_refbase_orientation_bias(column_line_dictionary)
	if ((refbase_orientation_bias_for_column < 0.1) or (refbase_orientation_bias_for_column > 10)):
		skewN_looks_good = False
	
	if global_use_3D_pooling:
		refbase_orientation_bias_for_d_library = get_refbase_orientation_bias(d_library_line_dictionary)
		if ((refbase_orientation_bias_for_d_library < 0.1) or (refbase_orientation_bias_for_d_library > 10)):
			skewN_looks_good = False

	return skewN_looks_good

def skewN_looks_good_for_library(line_dictionary, reference_base, mutation_base, mutation_base_lc):
	pval_of_SkewN_for_library, found_accurate_pval_of_skewN_for_library \
			= get_pval_of_forward_to_reverse_read_ratio_for_new_base(line_dictionary, reference_base, mutation_base, mutation_base_lc)
	
	# I should revise this - as some mutations with bad skew (but high orientation bias) will still look good:
	if found_accurate_pval_of_skewN_for_library and (pval_of_SkewN_for_library >= 1e-2):
		skewN_looks_good = True
	else:
		skewN_looks_good = False

	# If orientation bias is very high (so we cannot easily determine if there is significant skew)
	# and the delta-quality score of the mutation base is low (it looks like there is probably a base-calling error):
	# Then do not record a candidate mutation in this library.
	refbase_orientation_bias = get_refbase_orientation_bias(line_dictionary)
	
	deltaQ_N_for_library = float(line_dictionary["deltaQ-" + mutation_base])

	if ((refbase_orientation_bias < 0.1) or (refbase_orientation_bias > 10)):
		skewN_looks_good = False

	return skewN_looks_good, pval_of_SkewN_for_library, \
		found_accurate_pval_of_skewN_for_library, \
		refbase_orientation_bias, \
		deltaQ_N_for_library


def percentage_of_base_change_reads_that_are_high_quality_looks_good_for_row_and_column_library(row_line_dictionary, column_line_dictionary,
												reference_base, mutation_base, mutation_base_lc):
	# Check that high-quality base reads are a significant majority of all base reads for the base change,
	# for both the row and column library
	FrHQ_for_mutation_base_in_row_library_string = row_line_dictionary['FrHQ_for_' + mutation_base + mutation_base_lc]
	FrHQ_for_mutation_base_in_column_library_string = column_line_dictionary['FrHQ_for_' + mutation_base + mutation_base_lc]
	if (FrHQ_for_mutation_base_in_row_library_string == 'NA') or (FrHQ_for_mutation_base_in_column_library_string == 'NA'):
		# this case should only occur when combining the library data from gene homologs
		# MODIFIED HERE, FOR A RECENT EXPERIMENT
		FrHQ_looks_good = True
		FrHQ_for_mutation_base_in_row_library = 'NA'
		FrHQ_for_mutation_base_in_column_library = 'NA'
	else:
		FrHQ_for_mutation_base_in_row_library = float(FrHQ_for_mutation_base_in_row_library_string)
		FrHQ_for_mutation_base_in_column_library = float(FrHQ_for_mutation_base_in_column_library_string)
		if is_standard_EMS_mutation_candidate(reference_base, mutation_base):
			FrHQ_looks_good = (FrHQ_for_mutation_base_in_row_library >= global_FrHQ_threshold_standard_EMS_mutation_candidate) \
					and (FrHQ_for_mutation_base_in_column_library >= global_FrHQ_threshold_standard_EMS_mutation_candidate)
		else:
			FrHQ_looks_good = (FrHQ_for_mutation_base_in_row_library >= global_FrHQ_threshold_not_standard_EMS_mutation_candidate) \
					and (FrHQ_for_mutation_base_in_column_library >= global_FrHQ_threshold_not_standard_EMS_mutation_candidate)
	return FrHQ_looks_good, FrHQ_for_mutation_base_in_row_library, FrHQ_for_mutation_base_in_column_library

def percentage_of_base_change_reads_that_are_high_quality_looks_good_for_library(line_dictionary, reference_base, mutation_base, mutation_base_lc):
	# Check that high-quality base reads are a significant majority of all base reads for the base change,
	# for the given library
	if (line_dictionary['FrHQ_for_' + mutation_base + mutation_base_lc] == 'NA'):
		# this case should only occur when combining the library data from gene homologs
		FrHQ_looks_good = False
	else:
		FrHQ_for_mutation_base_in_library = float(line_dictionary['FrHQ_for_' + mutation_base + mutation_base_lc])
		if is_standard_EMS_mutation_candidate(reference_base, mutation_base):
			FrHQ_looks_good = (FrHQ_for_mutation_base_in_library >= global_FrHQ_threshold_standard_EMS_mutation_candidate)
		else:
			FrHQ_looks_good = (FrHQ_for_mutation_base_in_library >= global_FrHQ_threshold_not_standard_EMS_mutation_candidate)
	return FrHQ_looks_good, FrHQ_for_mutation_base_in_library


def get_well_candidate_dict__bayesian_method(candidate_list, mutation_factor, position, reference_base, mutation_base,
						 row_line_dictionary, column_line_dictionary, d_library_line_dictionary,
						 row_library_name, column_library_name, d_library_name,
						 library_to_fraction_of_base_reads_that_come_from_global_gene_dict):

	if global_use_3D_pooling:
		pool_ID = get_3D_pool_ID(row_library_name, column_library_name, d_library_name)
	else:
		pool_ID = get_2D_pool_ID(row_library_name, column_library_name)
	effect_of_base_change, BLOSUM62_substitution_score, mutation_type = 'NA', 'NA', 'NA'
	#print 'Position(TIL): ' + str(position)
	if mutation_factor < global_mutation_factor_threshold_for_computing_restricion_enzymes_gained_and_lost:
		restriction_enzyme_gained_string, restriction_enzyme_lost_string = 'NA', 'NA'
	else:
		restriction_enzyme_gained_string, restriction_enzyme_lost_string = "-NA-", '-NA-'

	organism_abbreviation = global_organism_name_to_abbreviation_dict[global_organism]
	gene_name = row_line_dictionary["refseq"]
	#restriction_enzyme_gained_string = 'NA'
	#restriction_enzyme_lost_string = 'NA'

	## create FrNn_string
        FrNn_string = row_line_dictionary["Fr" + mutation_base + mutation_base.lower()] + '\t' + column_line_dictionary["Fr" + mutation_base + mutation_base.lower()]
	if global_use_3D_pooling:
		FrNn_string += '\t' + d_library_line_dictionary["Fr" + mutation_base + mutation_base.lower()]

	## create FrHQ_for_mutation_base_string
	FrHQ_for_mutation_base_string = ''
	FrHQ_for_mutation_base_string = row_line_dictionary["FrHQ_for_" + mutation_base + mutation_base.lower()] + '\t' + \
					column_line_dictionary["FrHQ_for_" + mutation_base + mutation_base.lower()]
	if global_use_3D_pooling:
		FrHQ_for_mutation_base_string += '\t' + d_library_line_dictionary["FrHQ_for_" + mutation_base + mutation_base.lower()]

	## set library number string
	library_number_string = '%d\t%d' % (get_number_from_library_name(row_library_name), get_number_from_library_name(column_library_name))
	if global_use_3D_pooling:
		library_number_string += '\t%d' % get_number_from_library_name(d_library_name)

	## set library coverage string
	library_coverage_string = row_line_dictionary["coverage"] + '\t' + column_line_dictionary["coverage"]
	if global_use_3D_pooling:
		library_coverage_string += '\t' + d_library_line_dictionary['coverage']
	
	## set fraction_of_base_reads_from_library_that_come_from_global_gene_string
	fraction_of_base_reads_from_library_that_come_from_global_gene_string = ('%.3f' + '\t' + '%.3f') % \
			(float(library_to_fraction_of_base_reads_that_come_from_global_gene_dict[row_library_name]),
			 float(library_to_fraction_of_base_reads_that_come_from_global_gene_dict[column_library_name]))
	if global_use_3D_pooling:
		fraction_of_base_reads_from_library_that_come_from_global_gene_string += ('\t' + '%.3f') % \
				float(library_to_fraction_of_base_reads_that_come_from_global_gene_dict[d_library_name])

	candidate_dict = {}
	candidate_dict["string"] = (organism_abbreviation + '\t' + gene_name + '\t' + 'NA' + \
			            '\t' + reference_base + str(position) + mutation_base + '\t' + '%.20e' + \
			            '\t' + library_number_string + \
				    '\t' + str(pool_ID) + '\t' + effect_of_base_change + '\t' + str(BLOSUM62_substitution_score) + \
			            '\t' + mutation_type + '\t' + restriction_enzyme_gained_string + '\t' + restriction_enzyme_lost_string + \
			            '\t' + FrHQ_for_mutation_base_string + '\t' + library_coverage_string + \
				    '\t' + fraction_of_base_reads_from_library_that_come_from_global_gene_string + '\t' + FrNn_string) \
			           % (mutation_factor)
	candidate_dict["pool_ID"] = pool_ID
	candidate_dict["mutation_type"] = mutation_type
	candidate_dict["mutation_factor"] = mutation_factor

	return candidate_dict

def print_verbose_mutation_candidate_output__bayesian_method(reference_base, position, mutation_base, row_library_name, column_library_name, mutation_factor,
								prob_row_library_given_no_mutation, prob_row_library_given_heterozygous_mutation,
								prob_row_library_given_homozygous_mutation, prob_no_mutation_given_row_library,
								prob_column_library_given_no_mutation, prob_column_library_given_heterozygous_mutation,
								prob_column_library_given_homozygous_mutation, prob_no_mutation_given_column_library,
								pval_of_SkewN_for_row_library, pval_of_SkewN_for_column_library, overall_SkewN,
								refbase_orientation_bias_for_row, refbase_orientation_bias_for_column,
								deltaQ_N_for_row, deltaQ_N_for_column,
								found_accurate_pval_of_skewN_for_row_and_column_libraries):
	print (reference_base + str(position) + mutation_base + '\t(' + row_library_name + ', ' + column_library_name + ')\t%.3e') % mutation_factor
	print '\t' + 'Error prob' + '\t\t' + 'heterozygous prob' + '\t' + 'homozygous prob' + '\t' + 'P(no_mutation | library)' \
			+ '\t' + 'pvalue of SkewN for lib' + '\t' + 'Overall SkewN (over all rows and columns)'
	print 'Row\t%-16.3e\t%-16.3e\t%-16.3e\t%-16.3e\t%-16.3e\t%-16.3f' % \
			(prob_row_library_given_no_mutation, \
			prob_row_library_given_heterozygous_mutation, \
			prob_row_library_given_homozygous_mutation, \
			prob_no_mutation_given_row_library, \
			pval_of_SkewN_for_row_library, \
			overall_SkewN)
	print 'Col\t%-16.3e\t%-16.3e\t%-16.3e\t%-16.3e\t%-16.3e' % \
			(prob_column_library_given_no_mutation, \
			prob_column_library_given_heterozygous_mutation, \
			prob_column_library_given_homozygous_mutation, \
			prob_no_mutation_given_column_library, \
			pval_of_SkewN_for_column_library)
	print 'Orientation bias for row library: ' + str(refbase_orientation_bias_for_row) + '\tdeltaQ-N for row: ' + str(deltaQ_N_for_row)
	print 'Orientation bias for col library: ' + str(refbase_orientation_bias_for_column) + '\tdeltaQ-N for column: ' + str(deltaQ_N_for_column)
	if not(found_accurate_pval_of_skewN_for_row_and_column_libraries):
		print '------ pval of skewN may be incorrect for either row or column library --------'
	print ""
	return


########## Extra Helper Functions:

def is_standard_EMS_mutation_candidate(reference_base, mutation_base):
	if ((reference_base == 'G') and (mutation_base == 'A')) or ((reference_base == 'C') and (mutation_base == 'T')):
		return True
	else:
		return False

def get_2D_pool_ID(row_library_name, column_library_name):
	row = get_number_from_library_name(row_library_name)
	column = get_number_from_library_name(column_library_name)

	pool_ID = ((column - 1) * global_num_pools_per_column_library) + row
	return pool_ID

## IMPORTANT NOTE:
## This function contains fixed values set specifically for the current
## 3D pooling scheme
def get_3D_pool_ID(row_library_name, column_library_name, d_library_name):
	row = get_number_from_library_name(row_library_name)
	column = get_number_from_library_name(column_library_name)
	d_library = get_number_from_library_name(d_library_name)

	## adjust for fact that in current 3D pooling scheme,
	## some (row, column, d) combinations do not correspond to a real pool:
	row = ((row - 1) % 8) + 1
	column = ((column - 1) % 8) + 1

	#pool_ID = (((column - 1) * global_num_pools_per_column_library) + row
	pool_ID = ((d_library - 1) * 64) + ((row - 1) * 8) + column
	return pool_ID



def get_number_from_library_name(original_library_name):
	# strip library name down to the form 'R<number>' or 'C<number>'
	library_name = original_library_name.replace(global_tilling_run_prefix, '')
	for modifier in global_extra_library_name_modifiers_list:
		library_name = library_name.replace(modifier, '')

	# strip off 'R', 'C', or 'D'
	if (library_name == "") or (not (library_name[0] in ['R', 'C', 'D'])):
		raise IOError, 'In get_number_from_library_name(): bad library name format: \'' + original_library_name + '\' ==> \'' + library_name + '\''
	library_name = library_name[1:]

	if not library_name.isdigit():
		raise IOError, 'In get_number_from_library_name(): bad library name format: \'' + original_library_name + '\' ==> \'' + library_name + '\''
	number = int(library_name)

	return number

#def get_expected_base_change_rate_given_mutation(expected_fraction_of_base_reads_for_position_coming_from_mutant_alleles, estimated_prob_refbase_read_as_mutbase,
#						 estimated_prob_mutbase_read_as_not_mutbase):
#	expected_base_change_rate = (((1.0 - expected_fraction_of_base_reads_for_position_coming_from_mutant_alleles) * estimated_prob_refbase_read_as_mutbase) + \
#				     (expected_fraction_of_base_reads_for_position_coming_from_mutant_alleles * (1.0 - estimated_prob_mutbase_read_as_not_mutbase)))
#	return expected_base_change_rate

def get_expected_base_change_rate_given_mutation(expected_fraction_of_base_reads_for_position_coming_from_mutant_alleles, estimated_prob_refbase_read_as_mutbase):
	expected_base_change_rate = (((1.0 - expected_fraction_of_base_reads_for_position_coming_from_mutant_alleles) * estimated_prob_refbase_read_as_mutbase) + \
				     expected_fraction_of_base_reads_for_position_coming_from_mutant_alleles)
	return expected_base_change_rate

def get_probability_of_base_change_to_coverage_ratio_given_expected_base_change_rate(line_dictionary, expected_base_change_rate, new_base, new_base_lc):
	num_base_changes = int(line_dictionary[new_base]) + int(line_dictionary[new_base_lc])
	coverage = int(line_dictionary["coverage"])
	probability = pmf_of_binomial_dist(num_base_changes, coverage, expected_base_change_rate)
	#if GLOBAL_DEBUG:
	#	print 'DEBUG: probability: %.20e' % probability
	if probability < 1e-11:
		probability = 1e-11
	return probability

def get_pval_of_base_change_to_coverage_ratio_given_expected_base_change_rate(line_dictionary, expected_base_change_rate, new_base, new_base_lc):
	""" Also, cap the pvalue at 0.5, to guard against the situation where the frequency of base changes is exactly what was expected. """
	num_base_changes = int(line_dictionary[new_base]) + int(line_dictionary[new_base_lc])
	coverage = int(line_dictionary["coverage"])
	if coverage == 0:
		pval = 0.5
	else:
		frequency_of_base_change = float(num_base_changes) / float(coverage)
		#print "Debug: Num base changes: ", num_base_changes, "; Coverage: ", coverage
		if frequency_of_base_change >= expected_base_change_rate:
			#print "Debug: Base change frequency (", frequency_of_base_change, ") is greater than or equal to expected frequency (", expected_base_change_rate, ")\n"
			# compute probability of getting the current number of base changes or greater,
			# given the provided probability of a base change
			if num_base_changes == 0:
				prob_of_less_base_changes = 0
			else:
				prob_of_less_base_changes = summation_of_pmf_of_binomial_dist(0, (num_base_changes - 1), coverage, expected_base_change_rate)
			pval = 1 - prob_of_less_base_changes
			# DEBUG:
			#print ""
			#prob_of_more_or_equal_base_changes = summation_of_pmf_of_binomial_dist(num_base_changes, coverage, coverage, expected_base_change_rate)
			#print "prob_of_more_or_equal_base_changes: ", prob_of_more_or_equal_base_changes
			#opposite_pval = 1 - prob_of_more_or_equal_base_changes
		else:
			#print "Debug: Base change frequency (", frequency_of_base_change, ") is less than expected frequency (", expected_base_change_rate, ")\n"
			# compute probability of getting the current number of base changes or less,
			# given the provided probability of a base change
			pval = summation_of_pmf_of_binomial_dist(0, num_base_changes, coverage, expected_base_change_rate)
			# DEBUG:
			#print ""
			#opposite_pval = summation_of_pmf_of_binomial_dist(num_base_changes + 1, coverage, coverage, expected_base_change_rate)
		#print "\nDebug: pval before capping at 0.5 = ", pval
		#print "Debug: opposite pval = ", opposite_pval
		
		# Cap pval at 0.5
		if pval > 0.5:
			pval = 0.5
	# Double the pvalue to consider both sides of the binomial distribution
	pval = pval * 2.0
	# Set pval to be at least 1e-11
	if pval < (1e-11):
		pval = 1e-11
	#print "Debug: Final pval = ", pval
	return pval

# Returns two values:
# 1. p-value
# 2. Whether an accurate p-value was computed.
def get_pval_of_forward_to_reverse_read_ratio_for_new_base(line_dictionary, reference_base, new_base, new_base_lc):
	forward_reference_reads = int(line_dictionary["comma"])
	reverse_reference_reads = int(line_dictionary["dot"])
	total_reference_reads = forward_reference_reads + reverse_reference_reads

	estimated_probability_of_a_forward_read = (float(forward_reference_reads) + 1.0) / (float(total_reference_reads) + 2.0)


	if new_base == 'N':
		base_list = ['A', 'C', 'G', 'T']
		base_list.remove(reference_base)

		num_forward_reads_for_new_base = 0
        	num_reverse_reads_for_new_base = 0
		for base in base_list:
			base_lc = base.lower()
			num_forward_reads_for_new_base += int(line_dictionary[base])
			num_reverse_reads_for_new_base += int(line_dictionary[base_lc])
	else:
		num_forward_reads_for_new_base = int(line_dictionary[new_base])
		num_reverse_reads_for_new_base = int(line_dictionary[new_base_lc])
		
	total_reads_for_new_base = num_forward_reads_for_new_base + num_reverse_reads_for_new_base

	# Handle errors
	if total_reads_for_new_base == 0:
		return 1.0, True
	#elif total_reference_reads < 1000:
	#	return 1.0, False
	elif total_reads_for_new_base > total_reference_reads:
		return 1.0, False
	else:
		fraction_of_forward_reads_for_new_base = float(num_forward_reads_for_new_base) / float(total_reads_for_new_base)

		if fraction_of_forward_reads_for_new_base <= estimated_probability_of_a_forward_read:
			# compute probability of getting the current number of forward reads or less for the new base
			pval = summation_of_pmf_of_binomial_dist(0, num_forward_reads_for_new_base, total_reads_for_new_base, estimated_probability_of_a_forward_read)
		else:
			# compute probability of getting the current number of forward reads or greater for the new base
			pval = summation_of_pmf_of_binomial_dist(num_forward_reads_for_new_base, total_reads_for_new_base, total_reads_for_new_base, estimated_probability_of_a_forward_read)

		# Cap pval at 0.5
		if pval > 0.5:
			pval = 0.5
		# Double the pvalue to consider both sides of the binomial distribution
		pval = pval * 2.0
		# Set pval to be at least 1e-11
		if pval < (1e-11):
			pval = 1e-11
		return pval, True

def summation_of_pmf_of_binomial_dist(start_k, end_k, n, p):
	result = 'NA'
	if start_k == 0:
		result = scipy.stats.distributions.binom.cdf(end_k, n, p)
	elif start_k > 0:
		result = scipy.stats.distributions.binom.cdf(end_k, n, p) - scipy.stats.distributions.binom.cdf(start_k - 1, n, p)
	else:
		raise IOError, 'Error: bad starting value of k: %d' % start_k
	return result

## version before I used scipy
def summation_of_pmf_of_binomial_dist__a_long_way(start_k, end_k, n, p):
	total_prob = 0.0
	for k in range(start_k, (end_k + 1)):
		total_prob = total_prob + pmf_of_binomial_dist(k, n, p)
	return total_prob

def pmf_of_binomial_dist(k, n, p):
	global GLOBAL_DEBUG
	if (k==0) or (k==n):
		result = choose(n, k) * pow(p, k) * pow((1 - p), (n - k))
	else:
		product = 1.0
		for i in range(1, (k+1)):
			product = product * (float(n - k + i) / float(i)) * p
		
		result = product * pow((1 - p), (n - k))
	#if (k <= 10) or ((k <= 50) and ((k % 4) == 0)) or ((k <= 200) and ((k % 20) == 0)) or ((k <= 1000) and ((k % 50) == 0)) or ((k % 400) == 0):
	#	print "Debug: P(", k, ", ", n, ", ", p, ") = ", result
	#if GLOBAL_DEBUG:
	#	print 'DEBUG: result of pmf_of_binomial_dist (before checking for NaN): %.20e' % result
	#	print result
	#	print float('nan')
	if str(result) == 'nan':
		#result = pmf_of_binomial_dist_avoid_NaNs(k, n, p)
		result = scipy.stats.distributions.binom.pmf(k, n, p)
	return result

def pmf_of_binomial_dist_avoid_NaNs__works_but_is_not_as_fast_as_using_function_from_scipy_module(k, n, p):
	if (k==0) or (k==n):
		result = choose(n, k) * pow(p, k) * pow((1 - p), (n - k))
	else:
		to_multiply = []
		to_divide = []

		for i in range(1, (k+1)):
			to_multiply.append(float(n - k + i))
			to_divide.append(float(1.0) / float(p))
			to_divide.append(float(i))

		for i in range(0, (n - k)):
			to_divide.append(float(1.0) / float(1 - p))

		result = 1.0
		num_to_multiply = len(to_multiply)
		num_to_divide = len(to_divide)
		index_in_mult_list = 0
		index_in_div_list = 0
		while (index_in_mult_list < num_to_multiply) or (index_in_div_list < num_to_divide):
			#if GLOBAL_DEBUG:
			#	print 'DEBUG: result_so_far = %.20e' % result
			
			## while there is still something to multiply or divide the current result by:
			if result > 1.0:
				if index_in_div_list < num_to_divide:
					## if there is something to divide by, then divide
					result = float(result) / float(to_divide[index_in_div_list])
					index_in_div_list += 1
				else:
					## else multiply
					result = float(result) * float(to_multiply[index_in_mult_list])
					index_in_mult_list += 1
			else:
				if index_in_mult_list < num_to_multiply:
					## if there is something to multiply by, then multiply
					result = float(result) * float(to_multiply[index_in_mult_list])
					index_in_mult_list += 1
				else:
					## else divide
					result = float(result) / float(to_divide[index_in_div_list])
					index_in_div_list += 1
		
	#if GLOBAL_DEBUG:
	#	print 'DEBUG: result = %.20e' % result
	if result == float('nan'):
		raise IOError, 'Error: did not expect to get NaN in function \'pmf_of_binomial_dist_avoid_NaNs\' with parameters k=%d, n=%d, p=%e' % (k, n, p)

	#if (k <= 10) or ((k <= 50) and ((k % 4) == 0)) or ((k <= 200) and ((k % 20) == 0)) or ((k <= 1000) and ((k % 50) == 0)) or ((k % 400) == 0):
	#	print "Debug: P(", k, ", ", n, ", ", p, ") = ", result
	return result

def choose(n, k):
	if (k==0) or (k==n):
		result = 1
	else:
		numerator = 1
		for i in range((n-k+1), (n+1)):
			numerator = numerator * i
		denominator = factorial(k)
		result = float(numerator) / float(denominator)
	return result

def factorial(n):
	if (n == 0) or (n == 1):
		result = 1
	elif (n > 1):
		result = 1
		for i in range(2, (n+1)):
			result = result * i
	else:
		raise IOError, 'Error: factorial expects a non-negative integer'
	return result

## Returns 'NA' when the list is of length 1 or less
def compute_stdev_of_list_of_numbers(x_list):
	if len(x_list) <= 1:
		raise IOError, 'compute_stdev_of_list_of_numbers: expected a list of numbers of length > 1'
	mean = compute_mean_of_list_of_numbers(x_list)
	numerator = 0
	for x in x_list:
		numerator += math.pow((x - mean), 2.0)
	denominator = len(x_list) - 1
	stdev = math.pow((float(numerator) / float(denominator)), 0.5)
	return stdev

def compute_mean_of_list_of_numbers(x_list):
	return float(sum(x_list)) / float(len(x_list))

def get_mutation_score(line_dictionary, mutation_base, mutation_base_lc, library_type):
	coverage = int(line_dictionary["coverage"])
	FrNn = float(line_dictionary["Fr" + mutation_base + mutation_base_lc])
	SkewN = float(line_dictionary["Skew" + mutation_base])
	deltaQ_N = float(line_dictionary["deltaQ-" + mutation_base])

	if library_type == "row":
		FrNn_expected = 1.0 / 192.0
	elif library_type == "col":
		FrNn_expected = 1.0 / 128.0
	else:
		raise IOError, "Invalid library type"

	if (coverage >= 4000) and (FrNn >= (0.8 * FrNn_expected)) and (FrNn <= ((8.0 / 3.0) * FrNn_expected)) and (SkewN >= 0.2) and (SkewN <= 5) and (deltaQ_N >= -2.5):
		score = 2
	elif (coverage >= 2000) and (FrNn >= (0.8 * FrNn_expected)) and (FrNn <= (3.0 * FrNn_expected)) and (SkewN >= 0.125) and (SkewN <= 8) and (deltaQ_N >= -5):
		score = 1
	else:
		score = 0
	#print "mutation score is: " + str(score)
	
	return score


def divide_line_dictionary_list_by_position(line_dictionary_list):
	line_dictionary_list_for_each_position = {}
	for line_dictionary in line_dictionary_list:
		position = int(line_dictionary["position"])
		if not (position in line_dictionary_list_for_each_position):
			line_dictionary_list_for_each_position[position] = []
		line_dictionary_list_for_each_position[position].append(line_dictionary)
	return line_dictionary_list_for_each_position


def select_values_from_dictionary_list(dictionary_list, key):
	value_list = []
	for dictionary in dictionary_list:
		value = dictionary[key]
		value_list.append(value)
	return value_list


def find_highest_position_number(line_dictionary_list):
	highest_position_number = 0
	for line_dictionary in line_dictionary_list:
		position_number = int(line_dictionary["position"])
		if position_number > highest_position_number:
			highest_position_number = position_number
	return highest_position_number


def read_parsed_pileup_file(parsed_pileup_filename, lines_to_skip, gene_name):
	parsed_pileup_file = open(parsed_pileup_filename, 'r')
	for i in range(0, int(lines_to_skip)):
		parsed_pileup_file.readline()
	line_dictionary_list = []
	line_count = 0
	for line in parsed_pileup_file:
		line_count = line_count + 1
		#if (line_count % 10000) == 0:
		#	print line_count
		line_dictionary = get_dictionary_for_parsed_pileup_line(line)
		if line_dictionary["refseq"] == gene_name:
			line_dictionary_list.append(line_dictionary)
	line_dictionary_list_for_each_position = divide_line_dictionary_list_by_position(line_dictionary_list)
	return line_dictionary_list_for_each_position

def get_dictionary_for_parsed_pileup_line(line):
	line = line.rstrip('\n\r')
	line_field_list = line.split('\t')
	if len(line_field_list) != len(global_header_field_list):
		raise IOError, 'Each line must have one field for each field in the header'
	line_dictionary = {}
	for i in range(0, len(global_header_field_list)):
		line_dictionary[global_header_field_list[i]] = line_field_list[i]
	line_dictionary["library"] = line_dictionary["library"].rstrip(" ")
	# add extra fields to dictionary
	#for base in ['A', 'C', 'T', 'G']:
	#	add_Nn_HQ_and_LQ_to_line_dictionary(line_dictionary, base)
	#add_reference_HQ_and_LQ_to_line_dictionary(line_dictionary)
	#line_dictionary["FrAa"] = ( float(line_dictionary["A"]) + float(line_dictionary["a"]) ) / float(line_dictionary["coverage"])
	#line_dictionary["FrTt"] = ( float(line_dictionary["T"]) + float(line_dictionary["t"]) ) / float(line_dictionary["coverage"])
	#line_dictionary["skewA"] = get_skew_N(line_dictionary, "A", "a")
	#line_dictionary["skewT"] = get_skew_N(line_dictionary, "T", "t")
	return line_dictionary

# Note:
#	There may be some approximation error in determining the total number of
#	(high quality and low quality) reads of each type
def add_Nn_HQ_and_LQ_to_line_dictionary(line_dictionary, base):
	if not (base in ['A', 'C', 'T', 'G']):
		raise IOError, 'In add_Nn_HQ_and_LQ_to_line_dictionary(): Expected base to be uppercase A, C, T, or G'
	base_lc = base.lower()
	Nn_HQ = int(line_dictionary[base]) + int(line_dictionary[base_lc])
	FrHQ_for_Nn = float(line_dictionary["FrHQ_for_" + base + base_lc])
	if float(FrHQ_for_Nn) != 0.0:
		Nn_HQ_and_LQ = int(round(float(Nn_HQ) / float(FrHQ_for_Nn)))
		line_dictionary[base + base_lc + "_HQ_and_LQ"] = str(Nn_HQ_and_LQ)
	else:
		line_dictionary[base + base_lc + "_HQ_and_LQ"] = 'NA'
	return

# Note:
#	There may be some approximation error in determining the total number of
#	(high quality and low quality) reads of each type
def add_reference_HQ_and_LQ_to_line_dictionary(line_dictionary):
	ref_HQ = int(line_dictionary["comma"]) + int(line_dictionary["dot"])
	FrHQ_for_ref = float(line_dictionary["FrHQ_for_ref"])
	if float(FrHQ_for_ref) != 0.0:
		ref_HQ_and_LQ = int(round(float(ref_HQ) / float(FrHQ_for_ref)))
		line_dictionary["ref_HQ_and_LQ"] = str(ref_HQ_and_LQ)
	else:
		line_dictionary["ref_HQ_and_LQ"] = 'NA'
	return

# This is not necessarily the version of skewN that will be read in from the parsed pileup file:
#def get_skew_N(line_dictionary, base_forward, base_reverse):
#	N = float(line_dictionary[base_forward])
#	n = float(line_dictionary[base_reverse])
#	comma = float(line_dictionary["comma"])
#	dot = float(line_dictionary["dot"])
#
#	skew_N = (float(N) / float(n)) * (float(dot) / float(comma))
#	return skew_N

def get_line_dictionary_for_library(line_dictionary_list, library_name):
	return get_single_line_dictionary_with_value_for_field(line_dictionary_list, 'library', library_name)

# Note:
# If field == position, value should be an integer, not a string
def get_single_line_dictionary_with_value_for_field(line_dictionary_list, field, value):
	value_list = [value]
	line_dictionary_subset_list = subset_line_dictionary_list(line_dictionary_list, field, value_list)
	if len(line_dictionary_subset_list) != 1:
		raise IOError, 'Error in get_single_line_dictionary_with_value_for_field: expected to find exactly one line dictionary with specified value for specified field'
	return line_dictionary_subset_list[0]

# Note:
# If field == position, value_list should be a list of integers, not strings
def subset_line_dictionary_list(line_dictionary_list, field, value_list):
	subset_list = []
	#line_count = 0
	for line_dictionary in line_dictionary_list:
		#line_count = line_count + 1
		#if (line_count % 1000) == 0:
		#	print line_count
		value_of_field = line_dictionary[field]
		if field == "position":
			value_of_field = int(value_of_field)
		if value_of_field in value_list:
			subset_list.append(line_dictionary)
	return subset_list

# Note:
# If field == position, value_list should be a list of integers, not strings
def exclude_from_line_dictionary_list(line_dictionary_list, field, value_list):
	subset_list = []
	for line_dictionary in line_dictionary_list:
		value_of_field = line_dictionary[field]
		if field == "position":
			value_of_field = int(value_of_field)
		if not (value_of_field in value_list):
			subset_list.append(line_dictionary)
	return subset_list

def get_base_change_rate(line_dictionary_list, reference_base, new_base, new_base_lc):
	if len(line_dictionary_list) == 0:
		raise IOError, 'Error: Expected at least one line dictionary in list'

	valid_result = True
	
	if new_base == 'N':
		N_list, n_list = select_base_change_counts_from_dictionary_list(line_dictionary_list, reference_base)
	else:
		N_list = select_values_from_dictionary_list(line_dictionary_list, new_base)
		n_list = select_values_from_dictionary_list(line_dictionary_list, new_base_lc)
	coverage_list = select_values_from_dictionary_list(line_dictionary_list, "coverage")
	
	for i in range(0, len(line_dictionary_list)):
		N_list[i] = int(N_list[i])
		n_list[i] = int(n_list[i])
		coverage_list[i] = int(coverage_list[i])
	
	total_new_base_reads = sum(N_list) + sum(n_list)
	total_reads = sum(coverage_list)
	
	if total_reads == 0:
		base_change_rate = 0.0
		valid_result = False
	else:
		# don't use the rule of succession:
		base_change_rate = float(total_new_base_reads) / float(total_reads)
	
	return base_change_rate, valid_result

def get_overall_SkewN(line_dictionary_list, reference_base, new_base, new_base_lc):
	if len(line_dictionary_list) == 0:
		raise IOError, 'Error: Expected at least one line dictionary in list'
	
	if new_base == 'N':
		## If new_base == 'N', compute the skew across all types of base change
		N_list, n_list = select_base_change_counts_from_dictionary_list(line_dictionary_list, reference_base)
	else:
		N_list = select_values_from_dictionary_list(line_dictionary_list, new_base)
		n_list = select_values_from_dictionary_list(line_dictionary_list, new_base_lc)
	comma_list = select_values_from_dictionary_list(line_dictionary_list, "comma")
	dot_list = select_values_from_dictionary_list(line_dictionary_list, "dot")

	for i in range(0, len(line_dictionary_list)):
		N_list[i] = int(N_list[i])
		n_list[i] = int(n_list[i])
		comma_list[i] = int(comma_list[i])
		dot_list[i] = int(dot_list[i])
	
	total_new_base_forward_reads = sum(N_list)
	total_new_base_reverse_reads = sum(n_list)

	total_reference_forward_reads = sum(comma_list)
	total_reference_reverse_reads = sum(dot_list)

	overall_SkewN = (float(total_new_base_forward_reads+1) / float(total_new_base_reverse_reads+1)) \
			* (float(total_reference_reverse_reads+1) / float(total_reference_forward_reads+1))

	return overall_SkewN

def select_base_change_counts_from_dictionary_list(line_dictionary_list, reference_base):
	base_list = ['A', 'C', 'G', 'T']
	base_list.remove(reference_base)
	forward_polymorphism_count_list = 'NA'
	reverse_polymorphism_count_list = 'NA'
	for base in base_list:
		base_lc = base.lower()
		forward_base_count_list = select_values_from_dictionary_list(line_dictionary_list, base)
		reverse_base_count_list = select_values_from_dictionary_list(line_dictionary_list, base_lc)
		if forward_polymorphism_count_list == 'NA':
			# if this is the first polymorphism base we are considering
			forward_polymorphism_count_list = forward_base_count_list
			reverse_polymorphism_count_list = reverse_base_count_list
		else:
			for i in range(0, len(forward_base_count_list)):
				forward_polymorphism_count_list[i] += forward_base_count_list[i]
				reverse_polymorphism_count_list[i] += reverse_base_count_list[i]
	return forward_polymorphism_count_list, reverse_polymorphism_count_list

def get_refbase_orientation_bias(line_dictionary):
	num_forward_reference_reads = int(line_dictionary["comma"])
	num_reverse_reference_reads = int(line_dictionary["dot"])

	# Use Laplace's theorem of succession to avoid dividing by zero
	return float(num_forward_reference_reads + 1) / float(num_reverse_reference_reads + 1)

def get_sum_of_coverage(line_dictionary_list):
	sum_of_coverage = 0
	for line_dictionary in line_dictionary_list:
		sum_of_coverage = sum_of_coverage + int(line_dictionary["coverage"])
	return sum_of_coverage

def print_parsed_pileup_lines_for_line_dictionary_list(line_dictionary_list):
	for line_dictionary in line_dictionary_list:
		output_line = ""
		output_line = output_line + line_dictionary[global_header_field_list[0]]
		for header_field in global_header_field_list[1:]:
			output_line = output_line + '\t' + line_dictionary[header_field]
		print output_line
	return

def print_candidates_for_gene__z_scores_method_or_alternate_method(candidate_list_for_gene):
	for candidate_dict in candidate_list_for_gene:
		print candidate_dict["string"]
	return

def sort_dictionary_list_by_field(dictionary_list, field):
	tuple_list = [(dictionary[field], dictionary) for dictionary in dictionary_list]
	tuple_list.sort()
	sorted_dictionary_list = [dictionary for (key, dictionary) in tuple_list]
	return sorted_dictionary_list

def sort_string_list_by_length(string_list):
	tuple_list = [(len(string), string) for string in string_list]
	tuple_list.sort()
	sorted_string_list = [string for (length, string) in tuple_list]
	return sorted_string_list

find_mutations(sys.argv[1], sys.argv[2], sys.argv[4], sys.argv[5], sys.argv[6], sys.argv[7], sys.argv[11:])



