#!/usr/bin/env python

import sys

#######################################################
#################### Input/Output #####################
#######################################################

# nucleic acid code dictionary
symbol_to_represented_nucleic_acids_dict = {}
symbol_to_represented_nucleic_acids_dict['A'] = ['A']
symbol_to_represented_nucleic_acids_dict['C'] = ['C']
symbol_to_represented_nucleic_acids_dict['T'] = ['T']
symbol_to_represented_nucleic_acids_dict['G'] = ['G']

symbol_to_represented_nucleic_acids_dict['M'] = ['A', 'C']
symbol_to_represented_nucleic_acids_dict['R'] = ['A', 'G']
symbol_to_represented_nucleic_acids_dict['W'] = ['A', 'T']
symbol_to_represented_nucleic_acids_dict['S'] = ['C', 'G']
symbol_to_represented_nucleic_acids_dict['Y'] = ['C', 'T']
symbol_to_represented_nucleic_acids_dict['K'] = ['G', 'T']
symbol_to_represented_nucleic_acids_dict['V'] = ['A', 'C', 'G']
symbol_to_represented_nucleic_acids_dict['H'] = ['A', 'C', 'T']
symbol_to_represented_nucleic_acids_dict['D'] = ['A', 'G', 'T']
symbol_to_represented_nucleic_acids_dict['B'] = ['C', 'G', 'T']
symbol_to_represented_nucleic_acids_dict['N'] = ['A', 'C', 'G', 'T']



def request_fasta_sequence(sequence_name):
	print 'Please enter the ' + sequence_name + ' sequence in FASTA format:'
	# read comment line
	line = sys.stdin.readline()
	line = line.rstrip('\n\r')
	if (len(line) == 0) or (line[0] != '>'):
		raise IOError, 'entered base sequence should begin with a FASTA-style commented line'
	# read base sequence
	base_sequence = []
	while True:
		line = sys.stdin.readline()
		line = line.rstrip('\n\r')
		if line == "":
			break
		for i in range(0, len(line)):
			base = line[i]
			original_base = base
			if base.isalpha():
				base = base.upper()
			if not (base in symbol_to_represented_nucleic_acids_dict):
				raise IOError, 'Invalid base: \'' + original_base + '\''
			base_sequence.append(base)
	return base_sequence

def get_sequence_from_file(gene_name, sequence_filename):
	sequence_file = open(sequence_filename, 'r')
	# find sequence for specified gene
	found_sequence_for_gene = False
	for line in sequence_file:
		line = line.strip()
		if (len(line) != 0) and (line[0] == '>'):
			# found FASTA-style comment - check gene name
			comment_string = line[1:].strip()
			comment_field_list = comment_string.split()
			sequence_name = comment_field_list[0]
			if sequence_name == gene_name:
				found_sequence_for_gene = True
				break
	if not found_sequence_for_gene:
		raise IOError, 'Did not find FASTA-style sequence for gene: ' + gene_name

	base_sequence = []
	for line in sequence_file:
		line = line.rstrip('\n\r')
		if line == "":
			break
		elif line[0] == '>':
			# we have reached the start of another sequence
			break
		else:
			# remove extra whitespace
			line = line.strip()
			append_bases_from_line_to_sequence(line, base_sequence)
	sequence_file.close()
	return base_sequence

def append_bases_from_line_to_sequence(line, base_sequence):
	for i in range(0, len(line)):
		base = line[i]
		original_base = base
		if base.isalpha():
			base = base.upper()
		if not (base in symbol_to_represented_nucleic_acids_dict):
			raise IOError, 'Invalid base: \'' + original_base + '\''
		base_sequence.append(base)
	return

def list_contains_duplicate_elements(list):
	remaining_elements_in_list = [element for element in list]
	result = False
	while len(remaining_elements_in_list) != 0:
		element = remaining_elements_in_list[0]
		remaining_elements_in_list = remaining_elements_in_list[1:]
		if element in remaining_elements_in_list:
			result = True
			break
	return result


