# This computes the McMillan value of a code, and also the value based on the frequencies # of each character in the code. If each of those values is less than or equal to one, it # runs the UD algorithm to determine for sure if the code is UD. # There are no guarantees that this is right - please report any errors. # Dan Gusfield 2007 # print "Input each character in the alphabet on a new line. End with end\n"; $tprob = 0; $ciel = 10; $char = ; chomp $char; $alphasize = 0; while ($char ne 'end') { $alphasize++; $pchar{$char} = 1; $char = ; chomp $char; } print "The alphabet size is $alphasize\n"; $up = 1/$alphasize; print "Input the code words, each on a new line. End with the word end.\n"; $word = ; chomp $word; while ($word ne 'end') { chomp $word; $pwords{$word} = 1; push (@code, $word); $word = ; print "$word"; chomp $word; } $ever = 0; $maxpcode = 0; while ($loop < 100) { $tprob = 0; foreach $char (keys %pchar) { $prob = 1 + int(rand($ciel)); $tprob += $prob; $pchar{$char} = $prob; # print "$char, $prob \n"; } $ttprob = 0; foreach $char (keys %pchar) { $ttprob += $pchar{$char} = $pchar{$char}/$tprob; # print "$char, $pchar{$char}, $ttprob\n"; } $ecode = 0; $pcode = 0; %charfreq = (); $chartotal = 0; foreach $word (keys %pwords) { @chars = split (//,$word); # print "@chars\n"; $eword = 1; $pword = 1; foreach $char (@chars) { $pword *= $pchar{$char}; $eword *= $up; $charfreq{$char}++; $chartotal++; } # print "The probability of word $word is $pword\n"; $pcode += $pword; $ecode += $eword; } # print "The sum of the values in iteration $loop is $pcode\n"; foreach $char (keys %charfreq) { # print "The frequency of character $char is $charfreq{$char} out of $chartotal characters\n"; } if ($maxpcode < $pcode) { $maxpcode = $pcode; $maxloop = $loop; } if ($pcode > 1) { # print "XXXX pcode is greater than 1\n"; $ever++; $loopsav = $loop; $pcodesav = $pcode; } $loop++; } #print "The McMillan value is $ecode\n"; #print "The maxpcode value is $maxpcode and it occurred in iteration $maxloop \n"; if ($ever > 0) { # print "the code is not UD. The number of witnesses is $ever. # The last witness was iteration $loopsav where pcode has value $pcodesav\n"; } else { # print "the code passed all the necessary conditions and might be UD. Now we # run the algorithm to test if the code is UD for sure.\n"; } #now we test if the code is in fact UD @current = @code; $new = 1; $UD = 1; while ($new) { print "\nStart of next iteration\n\n"; $new = 0; @nextcurrent = (); foreach $string1 (@current) { foreach $string2 (@code) { $tail = ''; $tail = tail ($string1, $string2); print "The test is for $string1 and $string2\n"; if ($tail ne '') { print "X $tail\n"; if (!defined $tails{$tail}) { $tails{$tail} = 1; print "The tail $tail is new \n"; if (defined $pwords{$tail}) { print "The code is not UD\n"; $UD = 0; } else { push (@nextcurrent, $tail); $new = 1; } } } else { print "no tail\n"; } }} @current = @nextcurrent; } $pcode = 0; foreach $word (keys %pwords) { @chars = split (//,$word); $pword = 1; foreach $char (@chars) { $pword *= $charfreq{$char}/$chartotal; } $pcode += $pword; } #print "The pcode using character frequencies is $pcode\n"; #print "The maxpcode was $maxpcode\n"; if ($UD == 0) { print "The code is not UD\n"; foreach $word (@code) { print "$word\n"; } exit; } print "The code is UD\n"; ############# sub tail { ($string1, $string2) = @_; if (length($string1) > length($string2)) { $prefix = substr($string1,0,length($string2)); if ($prefix eq $string2) { $suffix = substr($string1,length($string2)); #print "$prefix\n"; #print "$suffix\n"; return $suffix; } } elsif (length($string2) > length($string1)) { $prefix = substr($string2,0,length($string1)); if ($prefix eq $string1) { $suffix = substr($string2,length($string1)); #print "$prefix\n"; #print "$suffix\n"; return $suffix; } } }