#!/usr/bin/env python

import sys, read_in_SNP_data__module
if len(sys.argv) != 4:
	raise IOError, ('Usage: ./get_SNPs_between_the_two_other_alleles_from_two_SNP_files.py SNP_file_1 SNP_file_2 output_file\n' + \
			'Important:\n1. The \'reference allele\' must be the same across both SNP files!\n' + \
			'2. Each input SNP file is assumed to report both (a) positions with a SNP, i.e. A->C, and (b) positions with no SNP, i.e. A->A\n' + \
			'3. The created output SNP file will report only positions where there is a predicted SNP between the two \'other\' alleles')

def main(SNP_filename_1, SNP_filename_2, output_filename):
	SNP_dict_1, SNP_to_refbase_dict_1, SNP_to_base_change_dict_1 = read_in_SNP_data__module.get_SNP_data(SNP_filename_1)
	SNP_dict_2, SNP_to_refbase_dict_2, SNP_to_base_change_dict_2 = read_in_SNP_data__module.get_SNP_data(SNP_filename_2)
	## write to new SNP file
	output_file = open(output_filename, 'w')
	for chromosome in SNP_dict_1:
		for position in SNP_dict_1[chromosome]:
			if position_in_SNP_dict(chromosome, position, SNP_dict_2):
				## we have either a SNP call or a 'no SNP' call at this position, in both files
				if SNP_to_refbase_dict_1[chromosome][position] != SNP_to_refbase_dict_2[chromosome][position]:
					raise IOError, 'The same reference allele must be used in the two SNP files'
				refbase = SNP_to_base_change_dict_1[chromosome][position]
				SNPbase = SNP_to_base_change_dict_2[chromosome][position]
				if refbase != SNPbase:
					## there is only a SNP between the two 'other alleles' at the current position if the two base changes are different
					print_SNP(chromosome, position, refbase, SNPbase, output_file)
	output_file.close()
	return

def position_in_SNP_dict(chromosome, position, SNP_dict):
	if (chromosome in SNP_dict) and (position in SNP_dict[chromosome]):
		result = True
	else:
		result = False
	return result

def print_SNP(chromosome, position, refbase, SNPbase, output_file):
	output_file.write('%s\t%d\t%s\t%s\n' % (chromosome, position, refbase, SNPbase))
	return

main(sys.argv[1], sys.argv[2], sys.argv[3])
