#!/usr/bin/env python
import sys

if len(sys.argv) != 3:
	raise IOError, 'Usage: ./select_sequence_from_file.py sequence_name sequence_filename'

def select_sequence_from_file(sequence_name, sequence_filename):
	base_sequence = get_sequence_from_file(sequence_name, sequence_filename)
	write_sequence_to_stdout(base_sequence, sequence_name)

def get_sequence_from_file(sequence_name, sequence_filename):
	sequence_file = open(sequence_filename, 'r')
	# find sequence for specified gene
	found_sequence_for_gene = False
	for line in sequence_file:
		line = line.rstrip('\r\n')
		if (len(line) != 0) and (line[0] == '>'):
			# found FASTA-style comment - check gene name
			comment_string = line[1:].strip()
			comment_field_list = comment_string.split()
			current_sequence_name = comment_field_list[0]
			if current_sequence_name == sequence_name:
				found_sequence_for_gene = True
				break
	if not found_sequence_for_gene:
		raise IOError, 'Did not find FASTA-style sequence named \'' + sequence_name + '\''

	base_sequence = []
	for line in sequence_file:
		line = line.rstrip('\n\r')
		if line == "":
			break
		elif line[0] == '>':
			# we have reached the start of another sequence
			break
		else:
			# remove extra whitespace
			line = line.strip()
			append_bases_from_line_to_sequence(line, base_sequence)
	sequence_file.close()
	return base_sequence

def append_bases_from_line_to_sequence(line, base_sequence):
	for i in range(0, len(line)):
		base = line[i]
		original_base = base
		if base.isalpha():
			base = base.upper()
		if not (base in ['A', 'C', 'T', 'G', 'M', 'R', 'W', 'S', 'Y', 'K', 'V', 'H', 'D', 'B', 'N']):
			raise IOError, 'Invalid base: \'' + original_base + '\''
		base_sequence.append(base)
	return

def write_sequence_to_stdout(base_sequence, sequence_name):
	# write sequence to output file
	sys.stdout.write('>' + sequence_name + '\n')
	for i in range(0, len(base_sequence)):
		if ((i % 80) == 0) and (i != 0):
			sys.stdout.write('\n')
		sys.stdout.write(base_sequence[i])

select_sequence_from_file(sys.argv[1], sys.argv[2])

