/* NAME * gastring - breed strings with a genetic algorithm * NOTES * None. * MISCELLANY * The fitness function works in three steps. First, the number of * correct characters is tallied and denoted the raw fitness. The * scaled fitness is then set to pow(PBASE, raw fitness - string * len). The normalized fitness is then set to the scaled fitness * divided by the sum of the scaled fitnesses. Thus the sum of the * normalized fitnesses must be equal to one while a string with one * letter more correct than another string is PBASE times as likely * to reproduce, where PBASE is the value supplied with the -pbase * option. * BUGS * No sanity checks are performed to make sure that any of the * options make sense. * AUTHOR * Copyright (c) 1997, Gary William Flake. * * Permission granted for any use according to the standard GNU * ``copyleft'' agreement provided that the author's comments are * neither modified nor removed. No warranty is given or implied. */ #include #include #include #include "misc.h" int size = 500, steps = 50, seed = 0; double crate = 0.75, mrate = 0.01, pbase = 2; char *target = "furious green ideas sweat profusely"; char help_string[] = "\ Use a genetic algorithm to breed strings that match a user-specified \ target string. This program illustrates how GAs can perform a type of \ stochastic search in a space of discrete objects. Reproduction of \ strings entails crossover and mutation with strings being selected \ based on fitness.\ "; OPTION options[] = { { "-target", OPT_STRING, &target, "Target string." }, { "-size", OPT_INT, &size, "Population size." }, { "-steps", OPT_INT, &steps, "Number of generations." }, { "-seed", OPT_INT, &seed, "Random seed." }, { "-crate", OPT_DOUBLE, &crate, "Crossover rate." }, { "-mrate", OPT_DOUBLE, &mrate, "Mutation rate." }, { "-pbase", OPT_DOUBLE, &pbase, "Power base for fitness." }, { NULL, OPT_NULL, NULL, NULL } }; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Generate a random letter or space with equal probability. */ int random_letter_or_space(void) { int letter; letter = (random() % ('z' - 'a' + 2)) + 'a'; if(letter > 'z') letter = ' '; return(letter); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Compute the fitness of each string. */ void compute_fitness(int tlen, char **pop, int *correct, double *fit) { int i, j, count; double sum; sum = 0; /* For each member of the popluation... */ for(i = 0; i < size; i++) { /* Count the number of letters that are correct. */ count = 0; for(j = 0; j < tlen; j++) if(pop[i][j] == target[j]) count++; correct[i] = count; /* Compute pbase raised to the (no. correct - len) power. * Thus, having one more letter correct is pbase times * as good. */ fit[i] = pow(pbase, correct[i] - tlen); /* Sum up the powers so that they can be normalized below. */ sum += fit[i]; } for(i = 0; i < size; i++) fit[i] /= sum; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Perform random roulette selection according to normalized fitness. */ int select_one(double *normfit) { int i; double x, sum; sum = 0; x = random_range(0, 1); for(i = 0; i < size; i++) { sum += normfit[i]; /* Accept a choice based on cumulative sum of fitness (which * should be equal to 1 if done over all strings). */ if(x <= sum) return(i); } /* Just in case there was a subtle numerical error. */ return(size - 1); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Make love not war. Optionally cross parent A (PA) with parent P (PB). Children may also be affected by gamma rays. Index refers to where the two new children should be placed in the new population. */ void reproduce(int tlen, char **oldpop, char **newpop, int pa, int pb, int index) { int i, cpoint; /* Pick a crossover point. Note that a choice of 0 or tlen * does nothing. */ cpoint = (random_range(0, 1) < crate) ? (random() % (tlen - 1)) + 1 : tlen; /* Copy over the first cpoint characters. */ for(i = 0; i < cpoint; i++) { newpop[index][i] = oldpop[pa][i]; newpop[index + 1][i] = oldpop[pb][i]; } /* Copy over the remaining characters, but this time * swap the DNA from the two parents. */ for(i = cpoint; i < tlen; i++) { newpop[index][i] = oldpop[pb][i]; newpop[index + 1][i] = oldpop[pa][i]; } /* Optionally mutate the children. */ for(i = 0; i < tlen; i++) { if(random_range(0, 1) < mrate) newpop[index][i] = random_letter_or_space(); if(random_range(0, 1) < mrate) newpop[index + 1][i] = random_letter_or_space(); } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Ugly-print some statistics. */ void dump_stats(int time, int tlen, double *fit, int *correct, char **pop) { int i, besti = -1; double best = -1, ave = 0; /* Find the best match and average the scores. */ for(i = 0; i < size; i++) { if(fit[i] > best) { besti = i; best = fit[i]; } ave += correct[i]; } ave /= size; ave /= tlen; printf("---\ntime = %d\n", time); printf("average %% letters correct = %f\n", ave); printf("best %% letters correct = %f\n", correct[besti] / (double)tlen); printf("best = \"%s\"\n", pop[besti]); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ int main(int argc, char **argv) { int i, j, t, targetlen, parent_a, parent_b, *numcorrect; char **swap, **newpop, **oldpop; double *normfit; get_options(argc, argv, options, help_string); srandom(seed); /* Force the size to be even. */ size += (size / 2 * 2 != size); /* Initialize the population. */ targetlen = strlen(target); newpop = xmalloc(sizeof(char *) * size); oldpop = xmalloc(sizeof(char *) * size); numcorrect = xmalloc(sizeof(int) * size); normfit = xmalloc(sizeof(double) * size); for(i = 0; i < size; i++) { newpop[i] = xmalloc(sizeof(char) * targetlen + 1); oldpop[i] = xmalloc(sizeof(char) * targetlen + 1); for(j = 0; j < targetlen; j++) oldpop[i][j] = random_letter_or_space(); oldpop[i][targetlen] = 0; newpop[i][targetlen] = 0; } /* For each time step... */ for(t = 0; t < steps; t++) { compute_fitness(targetlen, oldpop, numcorrect, normfit); dump_stats(t, targetlen, normfit, numcorrect, oldpop); /* Pick two parents by fitness and mate them until the * next generation has been made. */ for(i = 0; i < size; i += 2) { parent_a = select_one(normfit); parent_b = select_one(normfit); reproduce(targetlen, oldpop, newpop, parent_a, parent_b, i); } /* Make everything old new again. */ swap = newpop; newpop = oldpop; oldpop = swap; } exit(0); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */