#!/bin/bash
# find mutations.sh

#$ -cwd
#$ -S /bin/bash

if [ $# -le 7 ]
then
	echo "Usage: ./find_mutations.sh parsed_pileup_file organism (\"standard_output\" | \"parsed_pileup_subset\") tilling_seq_file is_contamination_test method_to_use threshold_for_method gene_name [ homolog1 homolog2 ... ]"
	exit
fi

## global variables and parameters

lines_to_skip=1		# there is always only 1 header line in each parsed pileup file

parsed_pileup_file=$1
organism=$2
output_option=$3

tilling_sequences_file=$4
is_contamination_test=$5
method_to_use=$6
threshold_for_method=$7
gene_name=$8


# get the string of homologs for the specified gene

argument_list=( $@ )
homolog_string=
# this is 8 because of zero-indexing:
i=8
while [ $i -lt $# ]
do
	homolog=${argument_list[i]}
	#echo "Found homolog: ${homolog}"
	homolog_string="${homolog_string} ${homolog}"
	let i="i+1"
done

homolog_list=( $homolog_string )
if [ ${#homolog_list[*]} -ne 0 ]
then
	#echo "Homolog string: '${homolog_string}'"
	align_homologs=1
else
	align_homologs=0
fi

## local variables

tilling_sequences_for_specified_gene_and_its_homologs_intermediate_file=intermediate_files/tilling_sequences_for_specified_gene_and_its_homologs.fasta
clustal_homolog_alignment_intermediate_file=intermediate_files/clustal_homolog_alignment_output
clustal_extra_output_logfile=intermediate_files/clustal_extra_output_logfile


## remove old intermediate files 

rm -f $tilling_sequences_for_specified_gene_and_its_homologs_intermediate_file
rm -f $clustal_homolog_alignment_intermediate_file
rm -f $clustal_extra_output_logfile
#rm -f intermediate_files/native_fraction_of_base_reads_log 


## get multiple alignment between specified gene and homologs

# create a file containing the tilling sequences for the specified gene and its homologs
./select_sequence_from_file.py $gene_name $tilling_sequences_file > $tilling_sequences_for_specified_gene_and_its_homologs_intermediate_file
#echo "got tilling seq"
for homolog in ${homolog_string}
do
	echo >> $tilling_sequences_for_specified_gene_and_its_homologs_intermediate_file
	./select_sequence_from_file.py $homolog $tilling_sequences_file >> $tilling_sequences_for_specified_gene_and_its_homologs_intermediate_file
done

# run clustal to align the tilling sequences of the specified gene and its homologs
if [ $align_homologs -ne 0 ]
then
	clustalw-2.0.10/src/clustalw2 -INFILE=$tilling_sequences_for_specified_gene_and_its_homologs_intermediate_file -ALIGN -TYPE=DNA -OUTFILE=$clustal_homolog_alignment_intermediate_file -QUIET >> $clustal_extra_output_logfile
fi


## identify mutation candidates

./find_mutations.py $parsed_pileup_file $lines_to_skip $organism $output_option $tilling_sequences_file $gene_name $clustal_homolog_alignment_intermediate_file ${is_contamination_test} ${method_to_use} ${threshold_for_method} ${homolog_string}

rm -f semaphore/gene_${gene_name}_organism_${organism}
