/* NAME
* gaipd - breed IPD strategies with a genetic algorithm
* NOTES
* See the STRINGS section for information on how the strategies
* are encoded as strings. Also note that the dnaindex[] array
* allows you to index the first location of a strategy string
* that corresponds to a sub-strategy for a particular history
* length.
* PAYOFFS
* The payoff matrix for the Prisoner's Dilemma game is usually
* expressed as:
* Player B's Move
* +-----------+-----------+
* Player A's Move | cooperate | defect |
* +-----------+-----------+-----------+
* | cooperate | CC, CC | CD, DC |
* +-----------+-----------+-----------+
* | defect | DC, CD | DD, DD |
* +-----------+-----------+-----------+
*
* where the table entries are (A's payoff, B's payoff) and
* CC, CD, DC, and DD are the reward, sucker, temptation,
* and punish payoffs, respectively. For each of these four
* outcomes you will probably want the payoffs to reflect the
* relationships:
*
* (DC > CC > DD > CD) and ((CD + DC) / 2 < CC).
* GENERATIONS
* A single generation proceeds as follows. Each member of the
* population must play several bouts with randomly selected
* opponents. For each opponent, several rounds are played. The
* total score after these bouts is a strategy's raw fitness score.
* STRINGS
* Since population strings may be optionally displayed at the end
* of the simulation, this section describes the format of these
* strings. Given two players, A and B, and the current time, t,
* and letting cooperation be denoted by 0 and defection by 1, form
* a bit string such as:
*
* A(t-1) B(t-1) A(t-2) B(t-2) ... A(t-H) B(t-H)
*
* where A(T) and B(T) are A and B's moves from time T,
* respectively, and H is the number of time steps "remembered" by
* each player. This bit string can take 2^(2 * H) values. To
* define a complete strategy, we must have a H + 1 separate tables
* of this form to describe each possible history. Thus, the rule
* table string used internally in the program and displayed at the
* end have as their first entry the move to make with no previous
* history, followed by four entries for H equals to 1, followed by
* sixteen entries for when H equals 2, and so on.
*
* As an annotated example, Tit-for-Tat is encoded as "CCDCD" so
* that the first "C" indicates that the first move should be C
* while the last for characters indicate what to play if
* A(t-1) and B(t-1) is equal to (C, C), (C, D), (D, C), and (D, D),
* respectively.
* HINTS
* Without any parameters (and assuming you have an uncorrupted
* version if this source code) running this program without any
* parameters will probably result in Tit-for-Tat, "CCDCD",
* dominating. If you run it with the -noise 0.1 option, then
* Pavlov, "CCDDC", will probably win in the end since it is more
* resistant to noise.
*
* See the author's book, "The Computational Beauty of Nature," for
* more details.
* MISCELLANY
* The fitness function relies on two steps done after the raw
* fitness scores are calculated. The raw fitness score from the
* previous step is divided by the total number of PD rounds played.
* (Note that this may vary among population members since opponents
* are selected at random.) This yields a scaled fitness score.
* The normalized fitness is then set to the scaled fitness divided
* by the sum of the scaled fitnesses. Thus the sum of the
* normalized fitnesses must be equal to one.
* BUGS
* No sanity checks are performed to make sure that any of the
* options make sense.
* AUTHOR
* Copyright (c) 1997, Gary William Flake.
*
* Permission granted for any use according to the standard GNU
* ``copyleft'' agreement provided that the author's comments are
* neither modified nor removed. No warranty is given or implied.
*/
#include
#include
#include
#include "misc.h"
double DC = 5, CC = 4, DD = 1, CD = 0;
int size = 100, gens = 50, bouts = 50;
int rounds = 20, hlen = 1, seed = 0, dump = 0;
double crate = 0.25, mrate = 0.001, noise = 0.0;
char help_string[] = "\
Use a genetic algorithm to evolve IPD strategies according to \
user-specified constraints. This program illustrates how GAs can \
demonstrate co-evolution since IPD strategies can only be successful \
within the context of their likely opponents. Reproduction of \
strategies entails crossover and mutation with strategies being \
selected based on fitness.\
";
OPTION options[] = {
{ "-size", OPT_INT, &size, "Population size." },
{ "-gens", OPT_INT, &gens, "Number of generations." },
{ "-bouts", OPT_INT, &bouts, "Bouts per generation." },
{ "-rounds", OPT_INT, &rounds, "Rounds per bout." },
{ "-hlen", OPT_INT, &hlen, "History length." },
{ "-seed", OPT_INT, &seed, "Random seed." },
{ "-crate", OPT_DOUBLE, &crate, "Crossover rate." },
{ "-mrate", OPT_DOUBLE, &mrate, "Mutation rate." },
{ "-noise", OPT_DOUBLE, &noise, "Chance of mistake in transaction." },
{ "-CC", OPT_DOUBLE, &CC, "Reward Payoff." },
{ "-CD", OPT_DOUBLE, &CD, "Sucker Payoff." },
{ "-DC", OPT_DOUBLE, &DC, "Temptation Payoff." },
{ "-DD", OPT_DOUBLE, &DD, "Punish Payoff." },
{ "-dump", OPT_SWITCH, &dump, "Print entire population at end?" },
{ NULL, OPT_NULL, NULL, NULL }
};
/* These are global to avoid excessive parameter passing. */
double *fitness;
int *dnaindex, *score, *roundbout, *hista, *histb;
char **oldpop, **newpop;
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Given two strategies, calculate the respective scores for a single
round of the Prisoner's Dilemma. */
void pd(int strata, int stratb, int *scorea, int *scoreb)
{
if(strata == 0 && stratb == 0)
*scorea = *scoreb = CC;
else if(strata == 1 && stratb == 0) {
*scorea = DC; *scoreb = CD;
}
else if(strata == 0 && stratb == 1) {
*scorea = CD; *scoreb = DC;
}
else
*scorea = *scoreb = DD;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Perform a single bout of the IPD. */
void ipd(int strata, int stratb, int time, int *scorea, int *scoreb)
{
int i, movea, moveb, indexa, indexb, hindex, h, t;
/* Compute the amount of history to consider. */
t = (time > hlen) ? hlen : time;
/* Get the current index in the history array. */
hindex = time % hlen;
indexa = indexb = 0;
/* For all previous times that we "remember" ... */
for(i = 0; i < t; i++) {
/* Get the index of the time step (i + 1) units in the past. */
h = (hindex - i - 1 + hlen) % hlen;
/* Compute the DNA indices that corresponds to this history. */
indexa = indexa * 2 + hista[h]; indexa = indexa * 2 + histb[h];
indexb = indexb * 2 + histb[h]; indexb = indexb * 2 + hista[h];
}
/* Now grab the move from the DNA. */
movea = oldpop[strata][dnaindex[t] + indexa];
moveb = oldpop[stratb][dnaindex[t] + indexb];
/* Optionally add noise to a move. */
if(random_range(0,1) < noise) movea = random() % 2;
if(random_range(0,1) < noise) moveb = random() % 2;
/* Get the actual scores. */
pd(movea, moveb, scorea, scoreb);
/* Save the move for the next round in this bout. */
hista[hindex] = movea; histb[hindex] = moveb;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Compute the fitness of each member of the population. */
void compute_fitness(void)
{
int i, j, k, opponent, scorea, scoreb;
double sum;
/* Zero out the initial scores and the number of games played. */
for(i = 0; i < size; i++)
roundbout[i] = score[i] = 0;
/* For each member of the popluation... */
for(i = 0; i < size; i++)
/* Perform a bunch of bouts... */
for(j = 0; j < bouts; j++) {
/* Get a random opponent. */
opponent = random() % size;
scorea = scoreb = 0;
/* Perform the IPD for a bunch of rounds. */
for(k = 0; k < rounds; k++) {
ipd(i, opponent, k, &scorea, &scoreb);
/* Tally the cumulative scores. */
score[i] += scorea;
score[opponent] += scoreb;
/* Keep track of the number of rounds played by each player. */
roundbout[i]++;
roundbout[opponent]++;
}
}
/* Normalize the scores by the number of rounds * bouts
* and sum up all scores so that we can normalize them
* by the total raw fitness of the population below.
*/
sum = 0;
for(i = 0; i < size; i++) {
fitness[i] = score[i] / (double) roundbout[i];
sum += fitness[i];
}
for(i = 0; i < size; i++)
fitness[i] /= sum;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Perform random roulette selection according to normalized fitness. */
int select_one(double *normfit)
{
int i;
double x, sum;
sum = 0;
x = random_range(0, 1);
for(i = 0; i < size; i++) {
sum += normfit[i];
/* Accept a choice based on cumulative sum of fitness (which
* should be equal to 1 if done over all string).
*/
if(x <= sum)
return(i);
}
/* Just in case there was a subtle numerical error. */
return(size - 1);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Make love not war. Optionally cross parent A (PA) with parent P (PB).
Children may also be affected by gamma rays. Childindex refers to
where the two new children should be placed in the new population. */
void reproduce(int parenta, int parentb, int childindex)
{
int i, cpoint, dnalen;
dnalen = dnaindex[hlen + 1];
/* Pick a crossover point. Note that a choice of 0 or tlen
* does nothing.
*/
cpoint = (random_range(0, 1) < crate) ?
(random() % (dnalen - 1)) + 1 : dnalen;
/* Copy over the first cpoint characters. */
for(i = 0; i < cpoint; i++) {
newpop[childindex][i] = oldpop[parenta][i];
newpop[childindex + 1][i] = oldpop[parentb][i];
}
/* Copy over the remaining characters, but this time
* swap the DNA from the two parents.
*/
for(i = cpoint; i < dnalen; i++) {
newpop[childindex][i] = oldpop[parentb][i];
newpop[childindex + 1][i] = oldpop[parenta][i];
}
/* Optionally mutate the children. */
for(i = 0; i < dnalen; i++) {
if(random_range(0, 1) < mrate)
newpop[childindex][i] = random() % 2;
if(random_range(0, 1) < mrate)
newpop[childindex + 1][i] = random() % 2;
}
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Ugly-print some statistics. */
void dump_stats(int time)
{
int i, besti = -1;
double best = -1, ave = 0;
/* Find the best match and average the scores. */
for(i = 0; i < size; i++) {
if(fitness[i] > best) {
besti = i; best = fitness[i];
}
ave += score[i] / (double) roundbout[i];
}
ave /= size;
fprintf(stderr, "---\ntime = %d\n", time);
fprintf(stderr, "average score = %f\n", ave);
fprintf(stderr, "best average score = %f\n", score[besti] /
(double)roundbout[besti]);
fprintf(stderr, "best = ");
for(i = 0; i < dnaindex[hlen + 1]; i++)
fputc(oldpop[besti][i] + 'C', stderr);
fputc('\n', stderr);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Initialize space for the scores, round * bount counts, fitness
and history for each IPD player. */
void inititalize_population(void)
{
int i, j;
score = xmalloc(sizeof(int) * size);
roundbout = xmalloc(sizeof(int) * size);
fitness = xmalloc(sizeof(double) * size);
hista = xmalloc(sizeof(int) * hlen);
histb = xmalloc(sizeof(int) * hlen);
/* Dnaindex[] is a special array that simplifies how we determine
* the next move based on prior moves. It is indexed by a time
* parameter and returns a value that indicates where in the
* a population member's DNA the "lookup table" for that prior
* history is contained. As a special case, dnaindex[hlen + 1]
* returns the total length of the DNA.
*/
dnaindex = xmalloc(sizeof(int) * (hlen + 2));
dnaindex[0] = 0;
for(i = 1; i < hlen + 2; i++)
dnaindex[i] = dnaindex[i - 1] + pow(2, (i - 1) * 2);
oldpop = xmalloc(sizeof(char *) * size);
newpop = xmalloc(sizeof(char *) * size);
for(i = 0; i < size; i++) {
oldpop[i] = xmalloc(sizeof(char) * dnaindex[hlen + 1]);
newpop[i] = xmalloc(sizeof(char) * dnaindex[hlen + 1]);
/* Start of with random DNA. */
for(j = 0; j < dnaindex[hlen + 1]; j++)
oldpop[i][j] = random() % 2;
}
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
int main(int argc, char **argv)
{
int t, i, j, parent_a, parent_b;
char **swap;
get_options(argc, argv, options, help_string);
srandom(seed);
/* Force the size to be even. */
size += (size / 2 * 2 != size);
/* Initialize the population. */
inititalize_population();
/* For each time step... */
for(t = 0; t < gens; t++) {
compute_fitness();
dump_stats(t);
/* Pick two parents by fitness and mate them until the
* next generation has been made.
*/
for(i = 0; i < size; i += 2) {
parent_a = select_one(fitness);
parent_b = select_one(fitness);
reproduce(parent_a, parent_b, i);
}
/* Make everything old new again. */
swap = newpop; newpop = oldpop; oldpop = swap;
}
/* Dump out all strategies to stdout for posterity. */
if(dump)
for(j = 0; j < size; j++) {
for(i = 0; i < dnaindex[hlen + 1]; i++)
fputc(oldpop[j][i] + 'C', stdout);
fputc('\n', stdout);
}
exit(0);
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */