#!/usr/bin/env python
import sys

if len(sys.argv) != 5:
	raise IOError, 'Usage: ./get_SNP_file_with_negative_calls.py SNP_filename pileup_filename negative_call_coverage_threshold_string output_file'

def main(SNP_filename, pileup_filename, negative_call_coverage_threshold_string, output_filename):
	negative_call_coverage_threshold = int(negative_call_coverage_threshold_string)
	SNP_file = open(SNP_filename, 'r')
	output_file = open(output_filename, 'w')
	SNP_location_dict = {}
	## for each line in the SNP file
	for line in SNP_file:
		line = line.rstrip('\n\r')
		field_list = line.split('\t')
		sequence = field_list[0]
		position_string = field_list[1]
		
		## write it to the output file
		output_file.write(line + '\n')
		
		## and if this is not a header line
		position_string = position_string.lower()
		if (position_string != 'pos') and (position_string != 'position'):
			## then remember the location of the SNP on that line
			position = int(position_string)
			location = '%s\t%d' % (sequence, position)
			SNP_location_dict[location] = 0
	SNP_file.close()

	pileup_file = open(pileup_filename, 'r')
	## for each location in the parsed pileup
	for line in pileup_file:
		line = line.rstrip('\n\r')
		field_list = line.split('\t')
		sequence = field_list[0]
		position_string = field_list[1]
		refbase = field_list[2]
		coverage = int(field_list[3])
		
		location = '%s\t%s' % (sequence, position_string)
		if not position_string.isalnum():
			print 'WARNING: Found sequence location \'%s\' where the value in the position column is not an integer.  Skipping location.' % location
			continue
		else:
			position = int(position_string)

		## where there is no SNP call
		if not (location in SNP_location_dict):
			## and the coverage is greater than a user-specified threshold
			if coverage >= negative_call_coverage_threshold:
				## there is probably no SNP there
				## so add a 'negative SNP call' to the output file
				SNPbase = refbase
				output_file.write('%s\t%d\t%s\t%s\n' % (sequence, position, refbase, SNPbase))
	pileup_file.close()
	output_file.close()
	return

main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
