#!/pkg/bin/perl -w
#9/15/2000 I think this is the correct version to use. Not extract2.pl
#This reads in a file of the haplotype data generated by
#the Hudson program, and seperates out the replicates
#into distinct haplotype files. It also generates a genotype by
#combinging two lines at a time to create a genotype line.
#An exception to this is if the resulting genotype only has
#a single hetrozygous site, then it leaves the two haplotypes
#alone, and essentially declares them to be two homozygous genotypes.
#
#This just shortcuts what haplo.c and clark.c would have done
#if given a single-locus hetrozygote.
#The genotypes are written to a file which is used as input
#to haplo.c and to clark.c or clark2 or clarkX (some version above 2).
#
#The program also outputs a file (often called hapsavEXT.REP, where
#EXT is an extension that identifies the type of data, and REP is
# the replicate number, with 
#the genotype followed by the two haplotypes
#that created it, so that we can later compare the results of
#clark and of the LP. In the later two files, the homozygotes
#come first, followed by any hetrozygotes. 

$infile = "$ARGV[0]";
open IN, $infile;
close(IN);

$infile =~ m/data(.*)\.txt/;
$i = 1;

$goutprefix = "$ARGV[1]";
chomp ($goutprefix);

$goutfile = $goutprefix . '.' . $i;
open (OUTG, ">$goutfile");


open (OUTTI, '>title');

$eo2 = 0;
$nhom = 0;

$infile = "$ARGV[0]";
open IN, $infile;

while(<IN>) {
	$line = $_;
	chomp $line;
	
	if($eo2 == 0) {
		#$title = $line;
		print OUTTI "1\n";	
		$eo2 = 1;
	} else {	
		if ($line =~ /^[012]+$/) {
			$genoline = "";
			$linelength = length($line);
     						
     			@arr1 =  split(//, $line);
     			
			for ($j = 0; $j < $linelength; $j++) {
				$genoline = $genoline . "$arr1[$j] "; 
			}
			$homarray[$nhom++] = $genoline;
			$eo2 = 0; 		
		} 
	}
}
close(IN);

print OUTG "$nhom $linelength\n";
$j = 0;
until ($j == $nhom) {
	print OUTG "$homarray[$j]\n";
	$j++;
}

close (OUTG);
close (OUTTI);