/* NAME * zcs - run a zeroth level classifier system on a terrain * NOTES * None. * MISCELLANY * The file format for the specifications files must contain the * width and height of the world followed by the details of the * world in ASCII format, where '.' is an empty cell, 'O' is a rock, * and 'F' represent food. See any of the examples in the 'data' * subdirectory of this distribution for more details. * * The log file, 'zcs.log', contains one line per classifier with * the condition string, the action string, and the strength of the * classifier. The classifiers are sorted so the the strongest * classifiers are printed first. * * The ASCII output of the program shows the most recent, windowed * average and the total average for the number of steps needed to * find food. * HINTS * See the author's book, "The Computational Beauty of Nature," for * more details. * BUGS * No sanity checks are performed to make sure that any of the * options make sense. * AUTHOR * Copyright (c) 1997, Gary William Flake. * * Permission granted for any use according to the standard GNU * ``copyleft'' agreement provided that the author's comments are * neither modified nor removed. No warranty is given or implied. */ #include #include #include #include "misc.h" /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ int width, height, avelen = 50; int size = 400, steps = 5000, seed = 0, mag = 10, invert = 1; double sinit = 20, lrate = 0.2, drate = 0.71, trate = 0.1, crate = 0.5; double mrate = 0.002, grate = 0.25, cover = 0.5, wild = 0.33; char *term = NULL, *specs = "data/woods1.txt"; char help_string[] = "\ Train a zeroth level classifier system (ZCS) to traverse a \ two-dimensional terrain, avoid obstacles, and find food with the \ implicit bucket brigade algorithm and a genetic algorithm. At the \ beginning of each step the ZCS is placed at a random location of it's \ world. It interacts with its environment until it finds food, which \ yields a reward. The simulation then restarts with the ZCS placed at \ a new random location. The progress of the ZCS is continuously \ plotted, while the statistics on the time to find food are calculated \ and displayed. At the end of the simulation the classifiers that \ make up the final ZCS are saved to a log file. \ "; OPTION options[] = { { "-specs", OPT_STRING, &specs, "World specification file." }, { "-steps", OPT_INT, &steps, "Number of simulated trials." }, { "-seed", OPT_INT, &seed, "Random seed for initial state." }, { "-size", OPT_INT, &size, "Population size." }, { "-sinit", OPT_DOUBLE, &sinit, "Initial classifier strength." }, { "-lrate", OPT_DOUBLE, &lrate, "BB learning rate." }, { "-drate", OPT_DOUBLE, &drate, "BB discount rate." }, { "-trate", OPT_DOUBLE, &trate, "Tax rate for strength reduce." }, { "-crate", OPT_DOUBLE, &crate, "GA crossover rate." }, { "-mrate", OPT_DOUBLE, &mrate, "GA mutation rate." }, { "-grate", OPT_DOUBLE, &grate, "GA invocation rate." }, { "-cover", OPT_DOUBLE, &cover, "Covering factor." }, { "-wild", OPT_DOUBLE, &wild, "Probability of # in cover." }, { "-avelen", OPT_INT, &avelen, "Length of windowed average." }, { "-inv", OPT_SWITCH, &invert, "Invert all colors?" }, { "-mag", OPT_INT, &mag, "Magnification factor." }, { "-term", OPT_STRING, &term, "How to plot points." }, { NULL, OPT_NULL, NULL, NULL } }; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* These define how the objects are represented internally and also the color that objects are drawn in: food = 11, rock = 10, empty = 00. */ #define EMPTY 0 #define FOOD 1 #define ROCK 2 #define ME 3 /* How much to reward the CS when food is found. */ #define FOOD_REWARD 1000 /* Macros for handy conversions. */ #define CHAR2NUM(c) (c == 'F' ? FOOD : c == 'O' ? ROCK : EMPTY) #define NUM2BIN1(n) (n == EMPTY ? '0' : '1') #define NUM2BIN2(n) (n == FOOD ? '1' : '0') /* The length of a classifier and the action bit strings. */ #define CLEN 16 #define ALEN 3 /* A type declaration for a single classifier: */ typedef struct CLASSIFIER { double str; /* Strength. */ char cond[CLEN+1]; /* Condition. */ char act[ALEN+1]; /* Action. */ } CLASSIFIER; /* A list structure for classifiers. */ typedef struct CLIST { CLASSIFIER *class; struct CLIST *next; } CLIST; /* Globals to minimize parameter passing: an array of classifiers, A two-dimensional array to hold the state of the world, and our ZCS's (x, y) coordinates. */ CLASSIFIER *pop; char **world; int me_w, me_h; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* A function to compare classifiers based on strength. */ int classcomp(const void *a, const void *b) { const CLASSIFIER *ac = a, *bc = b; return(ac->str < bc->str ? 1 : ac->str > bc->str ? -1 : 0); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Read in the world specification and initialize memory. */ void initialize(void) { FILE *fp; SCANNER *scan; char *str; int i, j; /* Read world specifications. */ if(specs == NULL || (fp = fopen(specs, "r")) == NULL) { fprintf(stderr, "Cannot open specs file \"%s\".\n", specs); exit(1); } /* The characters 'F', 'O', and '.' are tokens. '#' is for comments. */ scan = scan_init(fp, "FO.", " \t\n", "#"); /* Width and height are read in first. */ if((str = scan_get(scan)) == NULL) goto BADFILE; width = atoi(str); if((str = scan_get(scan)) == NULL) goto BADFILE; height = atoi(str); /* Make space for the world. */ world = xmalloc(sizeof(char *) * height); for(i = 0; i < height; i++) { world[i] = xmalloc(sizeof(char) * width); for(j = 0; j < width; j++) { if((str = scan_get(scan)) == NULL) goto BADFILE; world[i][j] = CHAR2NUM(*str); } } /* Find an empty place to initially reside. */ while(1) { me_h = random() % height; me_w = random() % width; if(world[me_h][me_w] == EMPTY) break; } world[me_h][me_w] = ME; /* Initialize classifier system. */ pop = xmalloc(size * sizeof(CLASSIFIER)); for(i = 0; i < size; i++) { pop[i].str = sinit; for(j = 0; j < CLEN; j++) { pop[i].cond[j] = '0' + (random() % 3); pop[i].cond[j] = (pop[i].cond[j] == '2') ? '#' : pop[i].cond[j]; } pop[i].cond[CLEN] = 0; for(j = 0; j < ALEN; j++) pop[i].act[j] = '0' + (random() % 2); pop[i].act[ALEN] = 0; } return; BADFILE: fprintf(stderr, "Problem found in specs file.\n"); exit(1); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Redraw everything. */ void draw_world(void) { int i, j; for(i = 0; i < height; i++) for(j = 0; j < width; j++) plot_point(j, i, world[i][j]); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Restart the simulation after food has been found. */ void restart(void) { /* Since the previous run only ends when food is found, place food * on the current location. */ world[me_h][me_w] = FOOD; plot_point(me_w, me_h, FOOD); /* Find a new empty location to start from. */ while(1) { me_h = random() % height; me_w = random() % width; if(world[me_h][me_w] == EMPTY) break; } world[me_h][me_w] = ME; plot_point(me_w, me_h, ME); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Builds a new list from a new front element and a sublist. */ CLIST *cons(CLASSIFIER *class, CLIST *list) { CLIST *new; new = xmalloc(sizeof(CLIST)); new->class = class; new->next = list; return(new); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Destroy a list and free up memory. */ void delete(CLIST *list) { CLIST *next; while(list) { next = list->next; free(list); list = next; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Encode the ZCS's environment into a string. */ void environment(char *env) { /* This order contains the x and y offsets for moving clockwise * around the current position, starting from the northern-most * position. { 0, -1 } is north because for plotting (0, 0) is * the upper-left cell. */ int order[8][2] = { { 0, -1}, { 1, -1}, { 1, 0}, { 1, 1}, { 0, 1}, {-1, 1}, {-1, 0}, {-1, -1} }; int i, x, y; for(i = 0; i < 8; i++) { x = (me_w + order[i][0] + width) % width; y = (me_h + order[i][1] + height) % height; /* Covert the world state at this position to two bits. */ env[2 * i] = NUM2BIN1(world[y][x]); env[2 * i + 1] = NUM2BIN2(world[y][x]); } env[CLEN] = 0; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Checks if two strings are "equal" considering while card characters. */ int condeq(char *cond, char *env) { int i; for(i = 0; i < CLEN; i++) if(cond[i] != '#' && cond[i] != env[i]) return(0); return(1); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Steps through the conditions of all classifiers and forms a match list of all classifiers that match the environment. */ CLIST *matchlist(char *env) { int i; CLIST *mlist = NULL; for(i = 0; i < size; i++) if(condeq(pop[i].cond, env)) mlist = cons(&pop[i], mlist); return(mlist); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Performs random roulette selection based on the normalized strengths of the classifiers. */ int picklarge(int skip) { int i; double x, sum, runsum; /* Calculate the sum of strengths of everything but the * skip'th classifier. */ sum = 0; for(i = 0; i < size; i++) if(i != skip) sum += pop[i].str; runsum = 0; x = random_range(0, 1); for(i = 0; i < size; i++) { if(i == skip) continue; runsum += pop[i].str / sum; /* Accept a choice based on cumulative sum of strengths (which * should be equal to 1 if done over all strings). */ if(x <= runsum) return(i); } /* Just in case there was a subtle numerical error. */ return(size - 1); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Performs random roulette selection based on the normalized inverse strengths of the classifiers. */ int picksmall(int skip) { int i; double x, sum, runsum; /* Calculate the sum of inverse strengths of everything but the * skip'th classifier. */ sum = 0; for(i = 0; i < size; i++) if(i != skip) sum += 1 / pop[i].str; runsum = 0; x = random_range(0, 1); for(i = 0; i < size; i++) { if(i == skip) continue; runsum += (1 / pop[i].str) / sum; /* Accept a choice based on cumulative sum of inverse strengths * (which should be equal to 1 if done over all strings). */ if(x <= runsum) return(i); } /* Just in case there was a subtle numerical error. */ return(size - 1); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Optionally perform covering, which entails building a new classifier if none match the current environment. */ CLIST *covering(CLIST *mlist, char *env) { CLIST *l; int i, replace; double str = 0, mean = 0; /* Get the total strength of the matchlist. Note that this will * be zero if the match list is empty. */ for(l = mlist; l != NULL; l = l->next) str += l->class->str; /* Get the average strength of all classifiers. */ for(i = 0; i < size; i++) mean += pop[i].str; mean /= size; /* Check first bailout condition: total strength is greater * than mean of math list times cover constant. */ if(str > (mean * cover)) return(mlist); /* Pick something very weak from all classifiers. */ replace = picksmall(-1); /* Copy in the current environment. */ strcpy(pop[replace].cond, env); /* Sprinkle in some wildcards. */ for(i = 0; i < CLEN; i++) if(random_range(0, 1) < wild) pop[replace].cond[i] = '#'; /* Set the action of the new classifier to some random string. */ for(i = 0; i < ALEN; i++) pop[replace].act[i] = '0' + (random() % 2); /* Give it the mean fitness. */ pop[replace].str = mean; /* Add it to the match list. */ return(cons(&pop[replace], mlist)); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Compute an action list from a match list. */ CLIST *actlist(CLIST *mlist) { CLIST *l, *alist = NULL; CLASSIFIER *pick = NULL; double sum, runsum, x; /* Get the sum of strengths of the match list. */ sum = 0; for(l = mlist; l != NULL; l = l->next) sum += l->class->str; /* Do random roulette selection based on strengths. */ runsum = 0; x = random_range(0, 1); for(l = mlist; l != NULL; l = l->next) { runsum += l->class->str / sum; /* Accept a choice based on cumulative sum of strengths (which * should be equal to 1 if done over all strings). */ if(x <= runsum) { pick = l->class; break; } } /* If (l == NULL), then the above loop hit a strange statistical * burp. Pick the first thing in the match list to fix. */ if(l == NULL) pick = mlist->class; /* Form a list of every member of the match list that advocates * the same action and put them into an action list. */ for(l = mlist; l != NULL; l = l->next) if(strcmp(pick->act, l->class->act) == 0) alist = cons(l->class, alist); return(alist); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Move based on the action string. */ int move(char *act) { /* This order contains the x and y offsets for moving clockwise * around the current position, starting from the northern-most * position. { 0, -1 } is north because for plotting (0, 0) is * the upper-left cell. */ int order[8][2] = { { 0, -1}, { 1, -1}, { 1, 0}, { 1, 1}, { 0, 1}, {-1, 1}, {-1, 0}, {-1, -1} }; int i, index = 0, new_w, new_h, reward = 0; /* Convert the action into an integer between 0 and 2^ALEN - 1. */ for(i = 0; i < ALEN; i++) index = index * 2 + (act[i] - '0'); /* Take a step in the advocated position. */ new_w = (me_w + order[index][0] + width) % width; new_h = (me_h + order[index][1] + height) % height; /* Accept the move only if it doesn't put us into a rock. */ if(world[new_h][new_w] != ROCK) { /* Yummy! */ if(world[new_h][new_w] == FOOD) reward = FOOD_REWARD; plot_point(me_w, me_h, EMPTY); world[me_h][me_w] = EMPTY; me_w = new_w; me_h = new_h; world[me_h][me_w] = ME; plot_point(me_w, me_h, ME); } return(reward); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Perform one step of the implicit bucket brigade. */ void update(int reward, CLIST *mlist, CLIST *alist, CLIST *alistold) { int sz = 0; double hold = 0; CLIST *l; /* Sum of the hold amount, decay the strengths of the * classifiers in the action list, and count the size. */ for(l = alist; l != NULL; l = l->next) { hold += lrate * l->class->str; l->class->str -= lrate * l->class->str; sz++; } /* Pass out rewards to the action list. */ for(l = alist; l != NULL; l = l->next) l->class->str += lrate * reward / sz; /* Share the wealth with the previous action list. */ if(alistold) { sz = 0; for(l = alistold; l != NULL; l = l->next) sz++; for(l = alistold; l != NULL; l = l->next) l->class->str += drate * hold / sz; } /* Tax all classifiers in the match list that advocated a * different action. */ for(l = mlist; l != NULL; l = l->next) if(strcmp(l->class->act, alist->class->act) != 0) { l->class->str -= trate * l->class->str; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Do one step of the GA to weed out the weaklings. */ void ga(void) { int pa, pb, oa, ob; int i, cindex, t; double ave; /* Pick two parents by strength that are guaranteed to be different. */ pa = picklarge(-1); pb = picklarge(pa); /* Pick two classifiers by inverse strength that are guaranteed to be * different. */ oa = picksmall(-1); ob = picksmall(oa); /* Halve the strength of the parents and pass on to * the children. */ pop[pa].str /= 2; pop[oa] = pop[pa]; pop[pb].str /= 2; pop[ob] = pop[pb]; /* Optionally cross the two children. */ if(random_range(0, 1) < crate) { /* Do crossover on the condition. */ if((random() % (CLEN + ALEN)) < CLEN) { cindex = (random() % CLEN) + 1; for(i = 0; i < cindex; i++) { t = pop[oa].cond[i]; pop[oa].cond[i] = pop[ob].cond[i]; pop[ob].cond[i] = t; } } /* Do crossover on the action. */ else { cindex = (random() % ALEN) + 1; for(i = 0; i < cindex; i++) { t = pop[oa].act[i]; pop[oa].act[i] = pop[ob].act[i]; pop[ob].act[i] = t; } } /* Blur the strengths. */ ave = (pop[oa].str + pop[ob].str) / 2; pop[oa].str = pop[ob].str = ave; } /* Optionally mutate condition. */ for(i = 0; i < CLEN; i++) { if(random_range(0, 1) < mrate) { pop[oa].cond[i] = '0' + (random() % 3); pop[oa].cond[i] = (pop[oa].cond[i] == '2') ? '#' : pop[oa].cond[i]; } if(random_range(0, 1) < mrate) { pop[ob].cond[i] = '0' + (random() % 3); pop[ob].cond[i] = (pop[ob].cond[i] == '2') ? '#' : pop[ob].cond[i]; } } /* Optionally mutate action. */ for(i = 0; i < ALEN; i++) { if(random_range(0, 1) < mrate) pop[oa].act[i] = '0' + (random() % 2); if(random_range(0, 1) < mrate) pop[ob].act[i] = '0' + (random() % 2); } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ int main(int argc, char **argv) { FILE *fp; extern int plot_mag; extern int plot_inverse; CLIST *mlist, *alist, *alistold; int t, reward, cnt, *counts, i; double ave = 0, totcount = 0; char env[CLEN + 1]; get_options(argc, argv, options, help_string); srandom(seed); initialize(); counts = xmalloc(sizeof(int) * avelen); plot_inverse = invert; plot_mag = mag; plot_init(width, height, 4, term); plot_set_all(0); mlist = alist = alistold = NULL; draw_world(); /* For each time step... */ for(t = 0; t < steps; t++) { reward = cnt = 0; /* Keep going until some a reward is earned. */ while(reward == 0) { /* Build an environment string. */ environment(env); /* Form the match list. */ mlist = matchlist(env); /* Do covering. */ mlist = covering(mlist, env); /* Make the action list. */ alist = actlist(mlist); /* Move the little guy. */ reward = move(alist->class->act); /* Do implicit BB. */ update(reward, mlist, alist, alistold); /* Optionally perform GA step. */ if(random_range(0, 1) < grate) ga(); /* Clean up excess baggage. */ delete(mlist); delete(alistold); alistold = alist; cnt++; } /* Clean up excess baggage. */ delete(alistold); alistold = NULL; /* Take a windowed moving average of the number of steps * needed to complete the previous trials. */ if(t >= avelen) ave = ((ave * avelen) - counts[t % avelen] + cnt) / avelen; counts[t % avelen] = cnt; totcount += cnt; /* If we've completed enough trials, calculate the average. */ if(t == avelen - 1) { for(i = 0; i < avelen; i++) ave += counts[i]; ave /= avelen; } /* Dump out stats. */ if(t >= avelen) printf("%d\t%f\t%f\n", cnt, ave, totcount / (t + 1)); /* Restart in a new position. */ restart(); } /* Simulation is complete, so print out classifiers to log file. */ if((fp = fopen("zcs.log", "w")) != NULL) { qsort(pop, size, sizeof(CLASSIFIER), classcomp); for(i = 0; i < size; i++) fprintf(fp, "%s : %s : %.5f\n", pop[i].cond, pop[i].act, pop[i].str); } plot_finish(); exit(0); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */