#! /usr/bin/perl # Weasel, written in the Perl language. # Copyright 1997 by Wesley R. Elsberry # Modified 1999 by don.lindsay@acm.org: # -Added a default for "target" and "pool" # -Added a printout about time # -Some cosmetics, text, and error checks. # This is a tool for exploring the issues of cumulative selection as an # improvement over exhaustive search. The phrase about weasels # is from Shakespeare. The basic idea was suggested by Richard Dawkins # in "The Blind Watchmaker", Norton, 1987, ISBN 0-393-30448-5 # Parameters # ---------- # Target - A string which will be "found" by cumulative selection. # Pool - The character set from which characters are selected. # Mutation rate - The % probability that each replication of a character # will have a (random) mutation # Population size - How many strings exist in each generation. # Default values of parameters # ---------------------------- $defpopsize = 100; $defmutrate = 6; $deftarget = "Methinks it is like a weasel."; $defpool = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."; # Algorithm # --------- # # All character strings will be the same length as the Target you supply. # All characters will be members of the Pool (character set) you supply. # # An initial string is chosen randomly. This is generation zero. # # In each subsequent generation, the current string is replicated until # we have a full population of them. The replication is imperfect. # Specifically, sometimes a copy becomes a mutant. # # After a population has been generated, we select out the one string # which is "closest" to the target string you specified. This string # is then the parent of the next generation. # # The program creates successive generations until eventually the # "closest" string is in fact identical to the target string. If you # chose a very high mutation rate, this may never happen, because a # "closest" string may be even worse than its parent. If this happens, # the program will eventually notice, and give up. # # The mutation rule is that each character in a string separately risks # mutation. If a character is mutated, it is in fact just replaced by a # random choice from the pool. # Suggestions for the student # --------------------------- # # True Darwinian selection doesn't have a "target" that is known to the # participants. Can you make this program behave that way? Why not? # Try changing the winner-takes-all rule. Currently each generation is # "descended" from the single most winning string. Change this so # that each string has some number of children, and the number is chosen # according to the parent's distance from the target. Remember to keep # the population size in check. # Try adding sex. Choose two winners instead of one, and create children # by hybridizing them. # For example, for each character in each descendant, choose randomly # as to which parent's character to inherit. Try three sexes. # Try making the mutants be variants, instead of random choices. That is, # the choice of the mutant character should be influenced by the # specific value that is being miscopied. # Try a different measurement of the distance between two strings. # Try doing many runs with the same parameter settings, and then do a # histogram that shows how much the results varied. Is the histogram # better if you instead use the logarithms of the values? # Try doing many runs with the same parameter settings, and getting an average. # Then get an average with another parameter setting, and another. Draw # a graph showing how the average was controlled by the way you chose # your parameters. # Try allowing the population size to grow from generation to generation. # Try writing up your findings, and posting them on a web page, or on Usenet's # newsgroup "talk.origins". srand; open(OUTFIL,">>weasel.log"); &snag_parameters(); $mutrate = $mutrate/100.0; # convert from % to a true probability $start_time = time; $ratio = &weasel_run($target,$pool,$mutrate,$popsize); &print_time_info( $start_time, $ratio ); close(OUTFIL); ######################################################################## # # snag_parameters # # Sets globals $target, $pool, $mutrate, $popsize. # May exit. sub snag_parameters { while ($ARGV = shift) { push(@args,$ARGV); } # Interactive single run if (-1 >= $#args) { print "Population size ($defpopsize): "; chop($popsize = ); if (0 >= $popsize) { $popsize = $defpopsize; } print "Mutation rate, expressed as % chance ($defmutrate): "; chop($mutrate = ); if (0 >= $mutrate) { $mutrate = $defmutrate; } print "Target string ($deftarget): "; chop($target = ); if( $target eq "" ) { $target = $deftarget; } print "Pool of characters ($defpool): "; chop($pool = ); if( $pool eq "" ) { $pool = $defpool; } } elsif (3 == $#args) { # Assume the user knows what the user is doing # Grab parameters $popsize = $args[0]; $mutrate = $args[1]; $pool = $args[2]; $target = $args[3]; # Check for picking up the pool from a file $_ = $pool; if (/\.wsl$/) { print "Found a weasel in the pool.\n"; open(INFIL,"<$_"); $_ = ; close(INFIL); chop; $pool = $_; print "The pool is $pool\n"; } # Check for picking up the target from a file $_ = $target; if (/\.wsl$/) { print "Found a weasel at the target.\n"; # Check for a target length if (/:/) { ($tlen,$tfile) = split(':',$_); } else { $tfile = $target; } open(INFIL,"<$tfile"); $_ = ; close(INFIL); chop; if (0 < $tlen) { $target = substr($_,0,$tlen); } else { $target = $_; } print "The target is $target\n"; } } else { print "WEASEL. Copyright 1997 by W.R. Elsberry\n"; print "Usage: WEASEL [popsize mut_rate {pool|file.wsl}" ." {target|[length:]file.wsl}]\n"; exit 1; } # Check $target does not have characters outside $pool. $_ = $target; eval "tr/$pool//d"; die $@ if $@; # complain if that didn't evaluate if(length) { die "Target uses characters not in the pool.\n"; } } ######################################################################## # # weasel_prob # Prints the probability that random search would find the target string. # Arguments: Target (string), Pool (string), Totalcand (scalar), # Uniqcand (scalar) # Returns the ratio of search space size to search size. sub weasel_prob { local($target,$pool,$totalcand,$uniqcand) = @_; local($totalrandprob,$searchspace,$weaselprob,$space_ratio); # Find search space size # Assumptions: each character in pool is unique, # each character in pool is equiprobable. $searchspace = length($pool) ** length($target); $weaselprob = $uniqcand / $searchspace; $space_ratio = $searchspace / $totalcand; print "\nTarget = \"$target\"\n"; print "Pool = \"$pool\"\n"; printf "Search space size = %8.4g\n", $searchspace; print "Number of candidates considered = $totalcand\n"; printf "Fraction of search space actually explored = %8.4g\n", $weaselprob; printf "Weasel search improvement over random search = %8.4g\n",$space_ratio; print OUTFIL "\"LOG\",$searchspace,$totalcand,$uniqcand,$weaselprob,$space_ratio,"; return $space_ratio; } ######################################################################## # # weasel_run # Does one run of the genetic string matching, and prints results. # Arguments: Target (string), Pool (string), Mutation rate (scalar), # Populatiion size (scalar) sub weasel_run { local($target,$pool,$mutrate,$popsize) = @_; local($gencnt,$best,$bdiff,$old_bdiff,$cdiff,$ii,$candcnt,@pop); local(%cand,$uniqcand); $gencnt = 0; $best = &replicate($target,$pool,100); # Random starting point $old_bdiff = &distance($best,$target); for(;;) { $bdiff = &distance($best,$target); print "Generation $gencnt: distance $bdiff: \"$best\"\n"; if( $bdiff == 0 ) { last; } # stop the loop $gencnt++; if( $old_bdiff < $bdiff && $gencnt > 1200 ) { # The distance just got worse, not better. # And, we've been at this for quite a while. # This could be a batch run, filling up someone's disk. Have pity. die "Giving up: this isn't converging.\n"; } $old_bdiff = $bdiff; # Generate a population of $popsize mutations of $best for ($ii=0; $ii < $popsize; $ii++) { $pop[$ii] = &replicate($best,$pool,$mutrate); $cand{$pop[$ii]}++; } # Search the new population for its best member $best = &pick_best_d($target,$popsize,@pop); }; $candcnt = $gencnt*$popsize + 1; @keys = (keys %cand); $uniqcand = $#keys + 1; # Index of last key plus 1 (base zero) print OUTFIL "$popsize,$mutrate,\"$target\",\"$pool\"\n"; return &weasel_prob($target,$pool,$candcnt,$uniqcand); } ######################################################################## # # replicate # Returns a new string created by mutating a template string. # Each character in the template separately risks being mutated. # If not mutated, the new character is the same as in the template. # If mutated, the new character is a random choice from the pool. # Arguments: Subject (string), Pool (string), Mutrate (scalar) sub replicate { local($subject,$pool,$mutrate) = @_; local($newsub,$ii,$jj,$mutate); $newsub = ""; for ($ii=0; $ii < length($subject); $ii++) { if ( rand() < $mutrate ) { # Pull character from pool $jj = int(rand(length($pool))); $newsub = $newsub . substr($pool,$jj,1); } else { # Just copy from the subject $newsub = $newsub . substr($subject,$ii,1); } } return($newsub); } ######################################################################## # # pick_best_d # Arguments: Target (string), Popsize (scalar), Pop (array of string) # Searches Pop and returns the entry in Pop which distance() says # is the closest one to Target. sub pick_best_d { local($target,$popsize,@pop) = @_; local($best,$bdiff,$cdiff,$ii); $best = $pop[0]; # seed the best slot with a candidate $bdiff = &distance($best,$target); # find the distance for the seed # print("PBD: $best $bdiff $pop[0]\n"); for ($ii=0; $ii < $popsize; $ii++) { $_ = $pop[$ii]; $cdiff = &distance($_,$target); # find the distance for the current string if ($cdiff < $bdiff) { $best = $_; $bdiff = $cdiff; } # print("PBD: $best $bdiff $pop[$ii]\n"); } return($best); } ######################################################################## # # distance # Arguments: Subject (string), Target (string) # Returns a number indicating the degree of dissimilarity between Subject # and Target. # (Specifically, at each character position, computes the distance between # the two character values. Returns the sum of those differences.) sub distance { local ($subject, $target) = @_; # Split parameters into local copies local ($cnt) = 0; local ($ii, $diff); for ($ii = 0; $ii < length($subject); $ii++) { $diff = ord(substr($target,$ii,1)) - ord(substr($subject,$ii,1)); if (0 < $diff) { $cnt = $cnt + $diff; } else { $cnt = $cnt - $diff; } } return($cnt); } ######################################################################## # # mismatch # Arguments: Subject (string), Target (string) # Returns the number of character positions in which Subject differs # from Target. sub mismatch { local ($subject, $target) = @_; # Split parameters into local copies local ($cnt) = 0; local ($ii, $diff); for ($ii = 0; $ii < length($subject); $ii++) { $diff = ord(substr($target,$ii,1)) - ord(substr($subject,$ii,1)); if (0 != $diff) { $cnt = $cnt + 1; } } return($cnt); } ######################################################################## # # print_time_info # Arguments: Start_time (scalar), search space Ratio (scalar) sub print_time_info { local ($start_time, $ratio) = @_; local ($end_time); $end_time = time; # in seconds $elapsed = $end_time - $start_time; print "Weasel search took $elapsed seconds.\n"; $one_year = 60*60*24*365.24; # (approximate) in seconds $years = $elapsed * $ratio / $one_year; printf "A complete search would have taken %8.4g years.\n", $years; } ########################################################################