# This computes the McMillan value of a code, and also the value based on the frequencies
# of each character in the code. If each of those values is less than or equal to one, it
# runs the UD algorithm to determine for sure if the code is UD.
# There are no guarantees that this is right - please report any errors.
# Dan Gusfield 2007
#
print "Input each character in the alphabet on a new line. End with end\n";
$tprob = 0;
$ciel = 10;

$char = <STDIN>;
chomp $char;
$alphasize = 0;

while ($char ne 'end') {
    $alphasize++;
    $pchar{$char} = 1; 
    $char = <STDIN>;
    chomp $char;
}
print "The alphabet size is $alphasize\n";
$up = 1/$alphasize;

print "Input the code words, each on a new line. End with the word end.\n";


$word = <STDIN>;
chomp $word;

while ($word ne 'end') {
    chomp $word;
    $pwords{$word} = 1;
    push (@code, $word);

    $word = <STDIN>;
    print "$word";
    chomp $word;
}

$ever = 0;
$maxpcode = 0;
while ($loop < 100) {
$tprob = 0;
foreach $char (keys %pchar) {
   $prob = 1 + int(rand($ciel));
    $tprob += $prob; 
    $pchar{$char} = $prob;
    # print "$char, $prob \n";
}

$ttprob = 0;
foreach $char (keys %pchar) {
 $ttprob += $pchar{$char} = $pchar{$char}/$tprob;
 # print "$char, $pchar{$char}, $ttprob\n";
}

$ecode = 0;
$pcode = 0;
%charfreq = ();
$chartotal = 0;
foreach $word (keys %pwords) {

    @chars = split (//,$word);
    # print "@chars\n";

    $eword = 1;
    $pword = 1;
    foreach $char (@chars) {
    $pword *= $pchar{$char};
    $eword *= $up;
    $charfreq{$char}++;
    $chartotal++;
    }
    # print "The probability of word $word is $pword\n";
    $pcode += $pword;
    $ecode += $eword;
} 

# print "The sum of the values in iteration $loop is $pcode\n";
  foreach $char (keys %charfreq) {
	    # print "The frequency of character $char is $charfreq{$char} out of $chartotal characters\n";
    }

    if ($maxpcode < $pcode) {
       $maxpcode = $pcode;
       $maxloop = $loop;
       }

    if ($pcode > 1) {
	    # print "XXXX pcode is greater than 1\n";
      $ever++;
      $loopsav = $loop;
      $pcodesav = $pcode;
    }
$loop++;
}

#print "The McMillan value is $ecode\n";
#print "The maxpcode value is $maxpcode and it occurred in iteration $maxloop \n";

if ($ever > 0) {
	# print "the code is not UD. The number of witnesses is $ever.
# The last witness was iteration $loopsav where pcode has value $pcodesav\n";
}
else {
	# print "the code passed all the necessary conditions and might be UD. Now we
#  run the algorithm to test if the code is UD for sure.\n";
}


#now we test if the code is in fact UD 


@current = @code;
$new = 1;
$UD = 1;

while ($new) {
print "\nStart of next iteration\n\n";
$new = 0;
@nextcurrent = ();

foreach $string1 (@current) {
  foreach $string2 (@code) {

$tail = '';
$tail = tail ($string1, $string2);

print "The test is for $string1 and $string2\n";

if ($tail ne '') {
 print "X $tail\n";
    if (!defined $tails{$tail}) {
    $tails{$tail} = 1;
    print "The tail $tail is new \n";
    if (defined $pwords{$tail}) {
       print "The code is not UD\n";
       $UD = 0;
    } 
    else { 
         push (@nextcurrent, $tail);
         $new = 1;
         }
    }
 }

else {
 print "no tail\n";
}

}}

@current = @nextcurrent;  
}


$pcode = 0;
foreach $word (keys %pwords) {

    @chars = split (//,$word);
    $pword = 1;
    foreach $char (@chars) {
    $pword *= $charfreq{$char}/$chartotal;
    }
    $pcode += $pword;
} 
#print "The pcode using character frequencies is $pcode\n";
#print "The maxpcode was $maxpcode\n";

if ($UD == 0) {
  print "The code is not UD\n";
  foreach $word (@code) {
  print "$word\n";
  }
  exit;
}

print "The code is UD\n";


#############
sub tail {
($string1, $string2) = @_; 

if (length($string1) > length($string2)) {
$prefix = substr($string1,0,length($string2));

if ($prefix eq $string2) {
    $suffix = substr($string1,length($string2));
    #print "$prefix\n";
    #print "$suffix\n";
    return $suffix;
    }
}


elsif (length($string2) > length($string1)) {
$prefix = substr($string2,0,length($string1));

if ($prefix eq $string1) {
   $suffix = substr($string2,length($string1));
   #print "$prefix\n";
   #print "$suffix\n";
   return $suffix;
   }
}

}
