#!/usr/bin/env python

import sys, random
from process_SNP_file__module import *

if len(sys.argv) != 4:
	raise IOError, 'Usage: ./get_difference_between_SNP_files.py in_SNP_file not_in_SNP_file [ v | s ]'


def main(in_SNP_file_name, not_in_SNP_file_name, output_option):
	if not (output_option in ['v', 's']):
		raise IOError, 'output option must be one of \'v\' or \'s\''
	
	SNP_dict_1 = get_SNPs_from_file(in_SNP_file_name)
	SNP_dict_2 = get_SNPs_from_file(not_in_SNP_file_name)

	difference_dict = {}
	for SNP in SNP_dict_1:
		if not (SNP in SNP_dict_2):
			difference_dict[SNP] = 0
	
	SNP_list = []
	for SNP in difference_dict:
		[chr, pos, refbase, SNPbase] = SNP.split('\t')
		pos = int(pos)
		SNP_list.append([chr, pos, refbase, SNPbase])
	SNP_list.sort()
	for SNP in SNP_list:
		[chr, pos, refbase, SNPbase] = SNP
		if output_option in ['v', 's']:
			print '%s\t%d\t%s\t%s' % (chr, pos, refbase, SNPbase)
		
	num_SNPs_in_dict1 = len(SNP_dict_1.keys())
	num_SNPs_in_dict2 = len(SNP_dict_2.keys())
	num_SNPs_in_dict1_but_not_dict2 = len(difference_dict.keys())
	if output_option in ['v']:
		print ''
		print 'SNPs: %d in dict1, %d in dict2, %d in dict1 but not dict2' % (num_SNPs_in_dict1, num_SNPs_in_dict2, num_SNPs_in_dict1_but_not_dict2)
		print '%.2f%% of SNPs in dict1 are not present in dict2' % (float(num_SNPs_in_dict1_but_not_dict2) / float(num_SNPs_in_dict1) * 100)
		print ''
	return

main(sys.argv[1], sys.argv[2], sys.argv[3])
