#!/usr/bin/env python

import sys

if len(sys.argv) != 5:
	raise IOError, 'Usage: ./create_final_output_file.py read_counts_file read_counts_per_allele_file differential_expression_results_file output_file'

def main(read_counts_filename, read_counts_per_allele_filename, differential_expression_results_filename, output_filename):
	locus_and_file_to_read_count_dict, file_list = get_locus_and_key_to_value_dict(read_counts_filename)
	locus_and_file_to_read_count_per_allele_dict, file_plus_allele_list = get_locus_and_key_to_value_dict(read_counts_per_allele_filename)
	locus_and_DE_term_to_value_dict, DE_term_list = get_locus_and_key_to_value_dict(differential_expression_results_filename)

	locus_dict = {}
	for locus in (locus_and_file_to_read_count_dict.keys() + \
		      locus_and_DE_term_to_value_dict.keys() + \
		      locus_and_file_to_read_count_per_allele_dict.keys()):
		locus_dict[locus] = 0
	locus_list = locus_dict.keys()


	## print out information for each locus, in sorted order
	output_file = open(output_filename, 'w')
	
	all_keys_list = file_list + file_plus_allele_list + DE_term_list
	header_line = 'locus_name' + '\t' + ('\t'.join(all_keys_list))
	output_file.write(header_line + '\n')

	locus_list.sort()
	for locus in locus_list:
		output_line = locus
		output_line = append_values_for_locus_to_output_line(output_line, locus, file_list, locus_and_file_to_read_count_dict)
		output_line = append_values_for_locus_to_output_line(output_line, locus, file_plus_allele_list, locus_and_file_to_read_count_per_allele_dict)
		output_line = append_values_for_locus_to_output_line(output_line, locus, DE_term_list, locus_and_DE_term_to_value_dict)
		output_file.write(output_line + '\n')
	output_file.close()
	return

def get_locus_and_key_to_value_dict(input_filename):
	locus_and_key_to_value_dict = {}

	input_file = open(input_filename)
	header_line = input_file.readline()
	header_line = header_line.rstrip('\n\r')
	key_list = header_line.split('\t')

	for line in input_file:
		line = line.rstrip('\r\n')
		field_list = line.split('\t')
		if len(field_list) != len(key_list):
			raise IOError, 'Expected same number of fields on each line'
		locus = field_list[0]
		if not (key_list[0] in ['id', 'locus_name']):
			raise IOError, 'Pipeline error: assumed first column would have header value \'id\' or \'locus_name\''
		locus_and_key_to_value_dict[locus] = {}
		for i in range(1, len(field_list)):
			locus_and_key_to_value_dict[locus][key_list[i]] = field_list[i]
	input_file.close()
	return locus_and_key_to_value_dict, key_list[1:]

def append_values_for_locus_to_output_line(output_line, locus, key_list, locus_and_key_to_value_dict):
	for key in key_list:
		if output_line != '':
			output_line += '\t'
		output_line += locus_and_key_to_value_dict[locus][key]
	return output_line

main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])

