#!/pkg/bin/perl -w use diagnostics; # this will generate more complete explanations of any errors # #program kmer4.pl #This program expands on kmer2.pl which expands on kmer1.pl which expands #on kmer.pl. # This program finds all the overlapping k-mers of the input string. It builds # an associative array where each key is one distinct k-mer in the string, # and the associated value is a list of starting positions where that #k-mer is found. For example, if the input is ACACTCA and k is 2, then #one key is AC with a list of 1,3; another key is CA with a list of 2,6; #another key is CT with a list of 4; and one key is TC with a list of 5. # #Note the use of derefrencing in order to build up each array in #the hash of arrays, and its use again to join the array into a string for #printing a comma seperated list. #dg print "Input the string\n"; $dna = <>; chomp $dna; print "Input the length of the window\n"; $k = <>; chomp $k; # %kmer = (); $i = 1; while (length($dna) >= $k) { $dna =~ m/(.{$k})/; print "$1, $i \n"; if (! defined $kmer{$1}) { $kmer{$1} = [$i]; # here we tell Perl that the value of a kmer entry will # be an array. This is done by enclosing $i with [ ]. # More correctly, $kmer{$1} is a reference to an array # whose first value is the value of $i. } else { push (@{$kmer{$1}}, $i)} # here we expand the array associated with key # value $1 by adding another # element to the array. We first have to dereference # the reference $kmer{$1} which is done by enclosing # it with curly brackets. $i++; $dna = substr($dna, 1, length($dna) -1); } foreach $kmerkey (sort keys(%kmer)) { $occrs = join(', ' , @{$kmer{$kmerkey}}); print "The occurrences of string $kmerkey are in positions $occrs \n"; }