#!/usr/bin/env python
import sys

def get_gene_list_from_gene_list_with_homologs_file(gene_list_with_homologs_filename):
	gene_list = []

	gene_list_with_homologs_file = open(gene_list_with_homologs_filename, 'r')
	for line in gene_list_with_homologs_file:
		if line == "":
			break
		line = line.strip()
		if (line == "") or (line[0:9] == 'Organism:'):
			continue
		if line[0] == '(':
			if line[len(line)-1] != ')':
				raise IOError, 'Expected (homolog1 homolog2 . . . homologN)'
			line = line[1:(len(line)-1)]
		homolog_list = line.split()
		for homolog in homolog_list:
			if homolog in gene_list:
				raise IOError, 'Unexpectedly found same gene (%s) appear more than once in gene_list_with_homologs_file' % homolog
			gene_list.append(homolog)
	return gene_list

def read_parsed_pileup_file(parsed_pileup_filename, lines_to_skip, gene_name, header_field_list):
	parsed_pileup_file = open(parsed_pileup_filename, 'r')
	for i in range(0, int(lines_to_skip)):
		parsed_pileup_file.readline()
	line_dictionary_list = []
	line_count = 0
	for line in parsed_pileup_file:
		line_count = line_count + 1
		#if (line_count % 10000) == 0:
		#	print line_count
		line_dictionary = get_dictionary_for_parsed_pileup_line(line, header_field_list)
		if line_dictionary["refseq"] == gene_name:
			line_dictionary_list.append(line_dictionary)
	line_dictionary_list_for_each_position = divide_line_dictionary_list_by_position(line_dictionary_list)
	return line_dictionary_list_for_each_position

def get_dictionary_for_parsed_pileup_line(line, header_field_list):
	line = line.rstrip('\n\r')
	line_field_list = line.split('\t')
	if len(line_field_list) != len(header_field_list):
		raise IOError, 'Each line must have one field for each field in the header'
	line_dictionary = {}
	for i in range(0, len(header_field_list)):
		line_dictionary[header_field_list[i]] = line_field_list[i]
	line_dictionary["library"] = line_dictionary["library"].rstrip(" ")
	return line_dictionary

def divide_line_dictionary_list_by_position(line_dictionary_list):
	line_dictionary_list_for_each_position = {}
	for line_dictionary in line_dictionary_list:
		position = int(line_dictionary["position"])
		if not (position in line_dictionary_list_for_each_position):
			line_dictionary_list_for_each_position[position] = []
		line_dictionary_list_for_each_position[position].append(line_dictionary)
	return line_dictionary_list_for_each_position



