/* NAME * gaipd - breed IPD strategies with a genetic algorithm * NOTES * See the STRINGS section for information on how the strategies * are encoded as strings. Also note that the dnaindex[] array * allows you to index the first location of a strategy string * that corresponds to a sub-strategy for a particular history * length. * PAYOFFS * The payoff matrix for the Prisoner's Dilemma game is usually * expressed as: * Player B's Move * +-----------+-----------+ * Player A's Move | cooperate | defect | * +-----------+-----------+-----------+ * | cooperate | CC, CC | CD, DC | * +-----------+-----------+-----------+ * | defect | DC, CD | DD, DD | * +-----------+-----------+-----------+ * * where the table entries are (A's payoff, B's payoff) and * CC, CD, DC, and DD are the reward, sucker, temptation, * and punish payoffs, respectively. For each of these four * outcomes you will probably want the payoffs to reflect the * relationships: * * (DC > CC > DD > CD) and ((CD + DC) / 2 < CC). * GENERATIONS * A single generation proceeds as follows. Each member of the * population must play several bouts with randomly selected * opponents. For each opponent, several rounds are played. The * total score after these bouts is a strategy's raw fitness score. * STRINGS * Since population strings may be optionally displayed at the end * of the simulation, this section describes the format of these * strings. Given two players, A and B, and the current time, t, * and letting cooperation be denoted by 0 and defection by 1, form * a bit string such as: * * A(t-1) B(t-1) A(t-2) B(t-2) ... A(t-H) B(t-H) * * where A(T) and B(T) are A and B's moves from time T, * respectively, and H is the number of time steps "remembered" by * each player. This bit string can take 2^(2 * H) values. To * define a complete strategy, we must have a H + 1 separate tables * of this form to describe each possible history. Thus, the rule * table string used internally in the program and displayed at the * end have as their first entry the move to make with no previous * history, followed by four entries for H equals to 1, followed by * sixteen entries for when H equals 2, and so on. * * As an annotated example, Tit-for-Tat is encoded as "CCDCD" so * that the first "C" indicates that the first move should be C * while the last for characters indicate what to play if * A(t-1) and B(t-1) is equal to (C, C), (C, D), (D, C), and (D, D), * respectively. * HINTS * Without any parameters (and assuming you have an uncorrupted * version if this source code) running this program without any * parameters will probably result in Tit-for-Tat, "CCDCD", * dominating. If you run it with the -noise 0.1 option, then * Pavlov, "CCDDC", will probably win in the end since it is more * resistant to noise. * * See the author's book, "The Computational Beauty of Nature," for * more details. * MISCELLANY * The fitness function relies on two steps done after the raw * fitness scores are calculated. The raw fitness score from the * previous step is divided by the total number of PD rounds played. * (Note that this may vary among population members since opponents * are selected at random.) This yields a scaled fitness score. * The normalized fitness is then set to the scaled fitness divided * by the sum of the scaled fitnesses. Thus the sum of the * normalized fitnesses must be equal to one. * BUGS * No sanity checks are performed to make sure that any of the * options make sense. * AUTHOR * Copyright (c) 1997, Gary William Flake. * * Permission granted for any use according to the standard GNU * ``copyleft'' agreement provided that the author's comments are * neither modified nor removed. No warranty is given or implied. */ #include #include #include #include "misc.h" double DC = 5, CC = 4, DD = 1, CD = 0; int size = 100, gens = 50, bouts = 50; int rounds = 20, hlen = 1, seed = 0, dump = 0; double crate = 0.25, mrate = 0.001, noise = 0.0; char help_string[] = "\ Use a genetic algorithm to evolve IPD strategies according to \ user-specified constraints. This program illustrates how GAs can \ demonstrate co-evolution since IPD strategies can only be successful \ within the context of their likely opponents. Reproduction of \ strategies entails crossover and mutation with strategies being \ selected based on fitness.\ "; OPTION options[] = { { "-size", OPT_INT, &size, "Population size." }, { "-gens", OPT_INT, &gens, "Number of generations." }, { "-bouts", OPT_INT, &bouts, "Bouts per generation." }, { "-rounds", OPT_INT, &rounds, "Rounds per bout." }, { "-hlen", OPT_INT, &hlen, "History length." }, { "-seed", OPT_INT, &seed, "Random seed." }, { "-crate", OPT_DOUBLE, &crate, "Crossover rate." }, { "-mrate", OPT_DOUBLE, &mrate, "Mutation rate." }, { "-noise", OPT_DOUBLE, &noise, "Chance of mistake in transaction." }, { "-CC", OPT_DOUBLE, &CC, "Reward Payoff." }, { "-CD", OPT_DOUBLE, &CD, "Sucker Payoff." }, { "-DC", OPT_DOUBLE, &DC, "Temptation Payoff." }, { "-DD", OPT_DOUBLE, &DD, "Punish Payoff." }, { "-dump", OPT_SWITCH, &dump, "Print entire population at end?" }, { NULL, OPT_NULL, NULL, NULL } }; /* These are global to avoid excessive parameter passing. */ double *fitness; int *dnaindex, *score, *roundbout, *hista, *histb; char **oldpop, **newpop; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Given two strategies, calculate the respective scores for a single round of the Prisoner's Dilemma. */ void pd(int strata, int stratb, int *scorea, int *scoreb) { if(strata == 0 && stratb == 0) *scorea = *scoreb = CC; else if(strata == 1 && stratb == 0) { *scorea = DC; *scoreb = CD; } else if(strata == 0 && stratb == 1) { *scorea = CD; *scoreb = DC; } else *scorea = *scoreb = DD; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Perform a single bout of the IPD. */ void ipd(int strata, int stratb, int time, int *scorea, int *scoreb) { int i, movea, moveb, indexa, indexb, hindex, h, t; /* Compute the amount of history to consider. */ t = (time > hlen) ? hlen : time; /* Get the current index in the history array. */ hindex = time % hlen; indexa = indexb = 0; /* For all previous times that we "remember" ... */ for(i = 0; i < t; i++) { /* Get the index of the time step (i + 1) units in the past. */ h = (hindex - i - 1 + hlen) % hlen; /* Compute the DNA indices that corresponds to this history. */ indexa = indexa * 2 + hista[h]; indexa = indexa * 2 + histb[h]; indexb = indexb * 2 + histb[h]; indexb = indexb * 2 + hista[h]; } /* Now grab the move from the DNA. */ movea = oldpop[strata][dnaindex[t] + indexa]; moveb = oldpop[stratb][dnaindex[t] + indexb]; /* Optionally add noise to a move. */ if(random_range(0,1) < noise) movea = random() % 2; if(random_range(0,1) < noise) moveb = random() % 2; /* Get the actual scores. */ pd(movea, moveb, scorea, scoreb); /* Save the move for the next round in this bout. */ hista[hindex] = movea; histb[hindex] = moveb; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Compute the fitness of each member of the population. */ void compute_fitness(void) { int i, j, k, opponent, scorea, scoreb; double sum; /* Zero out the initial scores and the number of games played. */ for(i = 0; i < size; i++) roundbout[i] = score[i] = 0; /* For each member of the popluation... */ for(i = 0; i < size; i++) /* Perform a bunch of bouts... */ for(j = 0; j < bouts; j++) { /* Get a random opponent. */ opponent = random() % size; scorea = scoreb = 0; /* Perform the IPD for a bunch of rounds. */ for(k = 0; k < rounds; k++) { ipd(i, opponent, k, &scorea, &scoreb); /* Tally the cumulative scores. */ score[i] += scorea; score[opponent] += scoreb; /* Keep track of the number of rounds played by each player. */ roundbout[i]++; roundbout[opponent]++; } } /* Normalize the scores by the number of rounds * bouts * and sum up all scores so that we can normalize them * by the total raw fitness of the population below. */ sum = 0; for(i = 0; i < size; i++) { fitness[i] = score[i] / (double) roundbout[i]; sum += fitness[i]; } for(i = 0; i < size; i++) fitness[i] /= sum; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Perform random roulette selection according to normalized fitness. */ int select_one(double *normfit) { int i; double x, sum; sum = 0; x = random_range(0, 1); for(i = 0; i < size; i++) { sum += normfit[i]; /* Accept a choice based on cumulative sum of fitness (which * should be equal to 1 if done over all string). */ if(x <= sum) return(i); } /* Just in case there was a subtle numerical error. */ return(size - 1); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Make love not war. Optionally cross parent A (PA) with parent P (PB). Children may also be affected by gamma rays. Childindex refers to where the two new children should be placed in the new population. */ void reproduce(int parenta, int parentb, int childindex) { int i, cpoint, dnalen; dnalen = dnaindex[hlen + 1]; /* Pick a crossover point. Note that a choice of 0 or tlen * does nothing. */ cpoint = (random_range(0, 1) < crate) ? (random() % (dnalen - 1)) + 1 : dnalen; /* Copy over the first cpoint characters. */ for(i = 0; i < cpoint; i++) { newpop[childindex][i] = oldpop[parenta][i]; newpop[childindex + 1][i] = oldpop[parentb][i]; } /* Copy over the remaining characters, but this time * swap the DNA from the two parents. */ for(i = cpoint; i < dnalen; i++) { newpop[childindex][i] = oldpop[parentb][i]; newpop[childindex + 1][i] = oldpop[parenta][i]; } /* Optionally mutate the children. */ for(i = 0; i < dnalen; i++) { if(random_range(0, 1) < mrate) newpop[childindex][i] = random() % 2; if(random_range(0, 1) < mrate) newpop[childindex + 1][i] = random() % 2; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Ugly-print some statistics. */ void dump_stats(int time) { int i, besti = -1; double best = -1, ave = 0; /* Find the best match and average the scores. */ for(i = 0; i < size; i++) { if(fitness[i] > best) { besti = i; best = fitness[i]; } ave += score[i] / (double) roundbout[i]; } ave /= size; fprintf(stderr, "---\ntime = %d\n", time); fprintf(stderr, "average score = %f\n", ave); fprintf(stderr, "best average score = %f\n", score[besti] / (double)roundbout[besti]); fprintf(stderr, "best = "); for(i = 0; i < dnaindex[hlen + 1]; i++) fputc(oldpop[besti][i] + 'C', stderr); fputc('\n', stderr); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Initialize space for the scores, round * bount counts, fitness and history for each IPD player. */ void inititalize_population(void) { int i, j; score = xmalloc(sizeof(int) * size); roundbout = xmalloc(sizeof(int) * size); fitness = xmalloc(sizeof(double) * size); hista = xmalloc(sizeof(int) * hlen); histb = xmalloc(sizeof(int) * hlen); /* Dnaindex[] is a special array that simplifies how we determine * the next move based on prior moves. It is indexed by a time * parameter and returns a value that indicates where in the * a population member's DNA the "lookup table" for that prior * history is contained. As a special case, dnaindex[hlen + 1] * returns the total length of the DNA. */ dnaindex = xmalloc(sizeof(int) * (hlen + 2)); dnaindex[0] = 0; for(i = 1; i < hlen + 2; i++) dnaindex[i] = dnaindex[i - 1] + pow(2, (i - 1) * 2); oldpop = xmalloc(sizeof(char *) * size); newpop = xmalloc(sizeof(char *) * size); for(i = 0; i < size; i++) { oldpop[i] = xmalloc(sizeof(char) * dnaindex[hlen + 1]); newpop[i] = xmalloc(sizeof(char) * dnaindex[hlen + 1]); /* Start of with random DNA. */ for(j = 0; j < dnaindex[hlen + 1]; j++) oldpop[i][j] = random() % 2; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ int main(int argc, char **argv) { int t, i, j, parent_a, parent_b; char **swap; get_options(argc, argv, options, help_string); srandom(seed); /* Force the size to be even. */ size += (size / 2 * 2 != size); /* Initialize the population. */ inititalize_population(); /* For each time step... */ for(t = 0; t < gens; t++) { compute_fitness(); dump_stats(t); /* Pick two parents by fitness and mate them until the * next generation has been made. */ for(i = 0; i < size; i += 2) { parent_a = select_one(fitness); parent_b = select_one(fitness); reproduce(parent_a, parent_b, i); } /* Make everything old new again. */ swap = newpop; newpop = oldpop; oldpop = swap; } /* Dump out all strategies to stdout for posterity. */ if(dump) for(j = 0; j < size; j++) { for(i = 0; i < dnaindex[hlen + 1]; i++) fputc(oldpop[j][i] + 'C', stdout); fputc('\n', stdout); } exit(0); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */