#!/bin/sh

if [ $# -ne 12 ]; then
	echo Usage: ./filter_loci_by_genome_mapping_bias.sh num_sampled_reads num_reads_threshold sequencing_depth max_num_allelic_reads_threshold mapping_bias_ratio_threshold read_length bowtie_index_basename condition_list sequence_fasta_file_list sequence_feature_annotations_file_list GMB_estimation_output_directory filtered_loci_output_file
fi

num_sampled_reads=$1
num_reads_threshold=$2
sequencing_depth=$3
max_num_allelic_reads_threshold=$4
mapping_bias_ratio_threshold=$5
read_length=$6
bowtie_index_basename=$7
condition_list_string=$8
sequence_fasta_file_list_string=$9
sequence_feature_annotations_file_list_string=${10}
GMB_estimation_output_directory=${11}
filtered_loci_output_file=${12}

mkdir $GMB_estimation_output_directory


condition_list=( `echo ${condition_list_string} | tr "," " "` )
sequence_fasta_file_list=( `echo ${sequence_fasta_file_list_string} | tr "," " "` )
sequence_feature_annotations_file_list=( `echo ${sequence_feature_annotations_file_list_string} | tr "," " "` )

## assuming an ordering of ref/refP/otherP for the above lists:
ref=${condition_list[0]}
refP=${condition_list[1]}
otherP=${condition_list[2]}



## Identify Strong Orthologs

# hardcoded arguments
query_sequences_to_select_file=NA
num_query_sequences_to_select=all
unspecified_sequencing_depth=NA
method=tophat
PE_reads=false
sequencing_noise_rate=NA
insert_length=NA

# warning: the ordering of the indices is important
first_mapping_file_list_string=
for query_index in 0 1 2; do
	for target_index in 0 1 2; do
		if [ query_index != target_index ]; then 
			query_condition=${condition_list[query_index]}
			target_condition=${condition_list[target_index]}
			mapping_directory=${GMB_estimation_output_directory}/mapping_${num_sampled_reads}_reads_per_locus_from_${query_condition}_to_${target_condition}
			query_fasta_file=${sequence_fasta_file_list[query_index]}
			target_sequence_feature_annotations_file=${sequence_feature_annotations_file_list[target_index]}
			./genome_mapping_bias.sh $query_fasta_file $query_sequences_to_select_file $num_query_sequences_to_select $num_sampled_reads $unspecified_sequencing_depth $read_length $bowtie_index_basename $target_sequence_feature_annotations_file $mapping_directory $method $PE_reads $sequencing_noise_rate $insert_length

			mapping_file=${mapping_directory}/read.mapping.information.txt
			first_mapping_file_list_string=`echo ${first_mapping_file_list_string} ${mapping_file}`
		fi
	done
done

ortholog_file=${GMB_estimation_output_directory}/strong_orthologs_${num_reads_threshold}_out_of_${num_sampled_reads}_reads
get_good_orthologs.py ${first_mapping_file_list_string} ${num_reads_threshold} ${ortholog_file}



## Identify Loci with Strong Orthologs and Low Genome Mapping Bias

# hardcoded arguments
query_sequences_to_select_file=NA
num_query_sequences_to_select=all
unspecified_num_sampled_reads=NA
method=tophat
PE_reads=false
sequencing_noise_rate=NA
insert_length=NA

# warning: the ordering of the indices is important
second_mapping_file_list_string=
for query_index in 1 2; do
	target_index=0
	query_condition=${condition_list[query_index]}
	target_condition=${condition_list[target_index]}
	mapping_directory=${GMB_estimation_output_directory}/mapping_${sequencing_depth}x_coverage_over_loci_from_${query_condition}_to_${target_condition}
	query_fasta_file=${sequence_fasta_file_list[query_index]}
	target_sequence_feature_annotations_file=${sequence_feature_annotations_file_list[target_index]}
	./genome_mapping_bias.sh $query_fasta_file $query_sequences_to_select_file $num_query_sequences_to_select $unspecified_num_sampled_reads $sequencing_depth $read_length $bowtie_index_basename $target_sequence_feature_annotations_file $mapping_directory $method $PE_reads $sequencing_noise_rate $insert_length
	
	mapping_file=${mapping_directory}/read.mapping.information.txt
	second_mapping_file_list_string=`echo ${second_mapping_file_list_string} ${mapping_file}`
done

unspecified_mapping_bias_ratio_threshold=NA
genome_mapping_bias_ratio_file=${GMB_estimation_output_directory}/genome_mapping_bias_ratio_for_each_locus_with_strong_orthology
./get_orthologs_with_low_genome_mapping_bias.py $ortholog_file ${second_mapping_file_list_string} $unspecified_mapping_bias_ratio_threshold $max_num_allelic_reads_threshold >> $genome_mapping_bias_ratio_file

./filter_loci_by_genome_mapping_bias_ratio.py $genome_mapping_bias_ratio_file $mapping_bias_ratio_threshold $filtered_loci_output_file

