#!/usr/bin/env python

import sys
import fasta_file_helper_functions

def get_num_reads_to_select_from_string(num_reads_to_select_string, sequence_length, read_length):
	## num_reads_to_select_string can specify a coverage, rather than the total number of reads to select
	## in this case, the total number of reads to select is inferred from the coverage and the sequence length
	if num_reads_to_select_string[len(num_reads_to_select_string) - 1] in ['x', 'X']:
		coverage_string = num_reads_to_select_string[0:(len(num_reads_to_select_string) - 1)]
		coverage = float(coverage_string)
		num_reads_to_select = int(round(float(coverage * sequence_length) / float(read_length)))
	else:
		num_reads_to_select = int(num_reads_to_select_string)
	return num_reads_to_select

def print_n_random_reads_of_length_x_from_specific_sequence_to_output_file(num_reads_to_select_string, read_length, sequence_name, name_to_sequence_dict, output_filename, file_option):
	sequence = name_to_sequence_dict[sequence_name]
	sequence_length = len(sequence)

	num_reads_to_select = get_num_reads_to_select_from_string(num_reads_to_select_string, sequence_length, read_length)

	## randomly select specified number of reads
	random_reads_list = select_n_random_reads_of_length_x_from_sequence(num_reads_to_select, read_length, sequence, sequence_name)

	## write selected reads to output file
	output_file = open(output_filename, file_option)
	for i in range(0, len(random_reads_list)):
		read_ID_string = ("VICTOR-IN-SILICO-v1_%s_%d" % (sequence_name, i))
		base_quality_string = 'h' * read_length
		read = random_reads_list[i]
		output_file.write('@%s\n' % read_ID_string)
		output_file.write(read + '\n')
		output_file.write('+%s\n' % read_ID_string)
		output_file.write(base_quality_string + '\n')
	output_file.close()
	return

def select_n_random_reads_of_length_x_from_sequence(num_reads_to_select, read_length, sequence, sequence_name):
	sequence_length = len(sequence)
	print "DEBUG: sequence_length=%d" % sequence_length

	random_reads_list = []
	if sequence_length < read_length:
		print 'Warning: Not printing any reads for sequence \'%s\', since its length (%d) is shorter than our specified read length (%d)' % (sequence_name, sequence_length, read_length)
	else:
		num_possible_reads = sequence_length - read_length + 1
		print "DEBUG: number of possible reads=%d" % num_possible_reads
        
		for i in range(0, num_reads_to_select):
			## randomly select read from sequence
			random_read_start_index = fasta_file_helper_functions.get_random_index(num_possible_reads)
			random_read_stop_index = random_read_start_index + read_length - 1
			read_string = sequence[random_read_start_index:(random_read_stop_index + 1)]
        
			#print "DEBUG: read index range=(%d,%d)" % (random_read_start_index, random_read_stop_index)
        
			## and add it to the random reads list
			random_reads_list.append(read_string)
	return random_reads_list

