#!/pkg/bin/perl -w

$infile = "$ARGV[0]";
open OUT, ">exlog";
open IN, $infile;
@lines = <IN>;
close(IN);

$infile =~ m/data(.*)\.txt/;

$i = 1;


$goutprefix = "$ARGV[1]";
chomp ($goutprefix);


$goutfile = $goutprefix . '.' . $i;
#print $goutfile;
open (OUTG, ">$goutfile");

$hsoutprefix = "hapsav";
$hsoutfile = $hsoutprefix . '.' . $i;
#print $hsoutfile;
#print "\n";
open (OUTHS, ">$hsoutfile");

open (OUTTI, '>title');

$eo = 0;
$numlines = 0;
$numtwos = 0;
$totaltwos = 0;
$nhom = $nhet = $thet = 0;

foreach $line (@lines) {
chomp $line;
if ($line =~ m/nsam: *(\d+)/) {
  $nsam = $1;
  #print OUTH "nsam: $nsam\n";
  #print OUTH "1/1\n";
  $nsam = ($1)/2; 
}
if ($line =~ m/segsites: *(\d+)/) {
  $segsites= $1; 
}

if ($line =~ /\d+\/(\d+)/) {
  $totalreps = $1;
}

if ($line =~ /^[01]+$/) {
$numlines++;
$linelength = length($line);
 #print OUTH "$line\n";
 #print "$line\n";

 if ($eo == 1) {
     $numtwos = 0;
     $genoline = "";
     @arr1 =  split(//, $lastline);
     @arr2 =  split(//, $line);
       for ($j = 0; $j < $linelength; $j++) {
        if ($arr1[$j] eq $arr2[$j]) { 
          $char = $arr1[$j];
        }
        else {$char = '2';
        $numtwos++;
        $totaltwos++;
        $reptwos++;
        }
       $genoline = $genoline . "$char "; 
       }
 #print "$numtwos\n";

if ($numtwos == 0) {
   print OUT "found a true homozygote\n";
   for ($k = 0; $k < 3; $k++) {
   $homarray[$nhom++] = $genoline;
   }
   print OUTTI "1\n";
}
else {
   $hetarray[$nhet++] = $genoline;
   $hetarray[$nhet++] = $lastline;
   $hetarray[$nhet++] = $line;
   $thet++;
   $homarray[$nhom++] = $genoline;
   $homarray[$nhom++] = $lastline;
   $homarray[$nhom++] = $line;
   print OUTTI "1\n";
 }

 $eo = 0;
}
 else {
  $eo = 1;
 }

$lastline = $line;
}

if ($line =~ /bgnseed/) {
#print "hit end of a replicate, and i is now $i\n";
print OUT "hit end of a replicate, and i is now $i\n";

   $numtwos = 0;
   if ($i > 0) {
   #$avgtwo = $reptwos/$nsam;
   #print "The avg. number of twos generated in replication $i is $avgtwo \n";
   #$avgtwo = $reptwos/$tnsam;
   #print "The avg. number of mismatches in a pair of haplotypes is $avgtwo \n";
   #$stopit = <STDIN>;

   #print OUTG "$ext $i\n";
   print OUTG "$nsam $segsites\n";
   $hom = $nhom/3;
   $het = $nhet/3;
   $tot = $hom + $het;
   print OUTHS "$hom  $het $tot\n";

    $j = 0;
    until ($j == $nhom) {
     print OUTG "$homarray[$j]\n";
     for ($k = 0; $k < 3; $k++) {
     $line = $homarray[$j++];
     $line =~ tr/ //d;
     print OUTHS "$line\n"; 
     }
   }
   

   }

if ($i < $totalreps) { 
#close OUTH;
close OUTG;
close OUTHS;

$i++;
$eo = 0;
$reptwos = 0;

#open (OUTH, ">$houtfile");

$goutfile = $goutprefix . '.' . $i;
open (OUTG, ">$goutfile");

$hsoutfile = $hsoutprefix . '.' . $i;

open (OUTHS, ">$hsoutfile");
}

$nhom = $nhet = 0;
}

}


 #$avgtwo =  $totaltwos/$thet;
 #print "there are $totaltwos two's generated.
 #This is an average of $avgtwo per ambiguous genotype generated\n";
 #$avgtwo = $totaltwos/($i * $tnsam);
 #print "Avg. number of mismatches in a mated pair of haplotypes is $avgtwo\n";

#print OUTH "There are $numlines lines detected\n";
#close (OUTH);
close (OUTG);
close (OUTHS);
