
/* NAME
 *   zcs - run a zeroth level classifier system on a terrain
 * NOTES
 *   None.
 * MISCELLANY
 *   The file format for the specifications files must contain the
 *   width and height of the world followed by the details of the
 *   world in ASCII format, where '.' is an empty cell, 'O' is a rock,
 *   and 'F' represent food.  See any of the examples in the 'data'
 *   subdirectory of this distribution for more details.
 *   
 *   The log file, 'zcs.log', contains one line per classifier with
 *   the condition string, the action string, and the strength of the
 *   classifier.  The classifiers are sorted so the the strongest
 *   classifiers are printed first.
 *   
 *   The ASCII output of the program shows the most recent, windowed
 *   average and the total average for the number of steps needed to
 *   find food.
 * HINTS
 *   See the author's book, "The Computational Beauty of Nature," for
 *   more details.
 * BUGS
 *   No sanity checks are performed to make sure that any of the
 *   options make sense.
 * AUTHOR
 *   Copyright (c) 1997, Gary William Flake.
 *   
 *   Permission granted for any use according to the standard GNU
 *   ``copyleft'' agreement provided that the author's comments are
 *   neither modified nor removed.  No warranty is given or implied.
 */


#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "misc.h"

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

int width, height, avelen = 50;
int size = 400, steps = 5000, seed = 0, mag = 10, invert = 1;
double sinit = 20, lrate = 0.2, drate = 0.71, trate = 0.1, crate = 0.5;
double mrate = 0.002, grate = 0.25, cover = 0.5, wild = 0.33;
char *term = NULL, *specs = "data/woods1.txt";

char help_string[] = "\
Train a zeroth level classifier system (ZCS) to traverse a \
two-dimensional terrain, avoid obstacles, and find food with the \
implicit bucket brigade algorithm and a genetic algorithm.  At the \
beginning of each step the ZCS is placed at a random location of it's \
world.  It interacts with its environment until it finds food, which \
yields a reward.  The simulation then restarts with the ZCS placed at \
a new random location.  The progress of the ZCS is continuously \
plotted, while the statistics on the time to find food are calculated \
and displayed.  At the end of the simulation the classifiers that \
make up the final ZCS are saved to a log file. \
";

OPTION options[] = {
  { "-specs",  OPT_STRING,  &specs,  "World specification file." },
  { "-steps",  OPT_INT,     &steps,  "Number of simulated trials." },
  { "-seed",   OPT_INT,     &seed,   "Random seed for initial state." },
  { "-size",   OPT_INT,     &size,   "Population size." },
  { "-sinit",  OPT_DOUBLE,  &sinit,  "Initial classifier strength." },
  { "-lrate",  OPT_DOUBLE,  &lrate,  "BB learning rate." },
  { "-drate",  OPT_DOUBLE,  &drate,  "BB discount rate." },
  { "-trate",  OPT_DOUBLE,  &trate,  "Tax rate for strength reduce." },
  { "-crate",  OPT_DOUBLE,  &crate,  "GA crossover rate." },
  { "-mrate",  OPT_DOUBLE,  &mrate,  "GA mutation rate." },
  { "-grate",  OPT_DOUBLE,  &grate,  "GA invocation rate." },
  { "-cover",  OPT_DOUBLE,  &cover,  "Covering factor." },
  { "-wild",   OPT_DOUBLE,  &wild,   "Probability of # in cover." },
  { "-avelen", OPT_INT,     &avelen, "Length of windowed average." },
  { "-inv",    OPT_SWITCH,  &invert, "Invert all colors?" },
  { "-mag",    OPT_INT,     &mag,    "Magnification factor." },
  { "-term",   OPT_STRING,  &term,   "How to plot points." },
  { NULL,      OPT_NULL,    NULL,    NULL }
};

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* These define how the objects are represented internally and
   also the color that objects are drawn in: food = 11, rock = 10,
   empty = 00. */

#define EMPTY  0
#define FOOD   1
#define ROCK   2
#define ME     3


/* How much to reward the CS when food is found. */

#define FOOD_REWARD 1000


/* Macros for handy conversions. */

#define CHAR2NUM(c) (c == 'F' ? FOOD : c == 'O' ? ROCK : EMPTY)
#define NUM2BIN1(n) (n == EMPTY ? '0' : '1')
#define NUM2BIN2(n) (n == FOOD ? '1' : '0')


/* The length of a classifier and the action bit strings. */

#define CLEN 16
#define ALEN 3

/* A type declaration for a single classifier: */

typedef struct CLASSIFIER {
  double str;         /* Strength. */
  char cond[CLEN+1];  /* Condition. */
  char act[ALEN+1];   /* Action. */
} CLASSIFIER;


/* A list structure for classifiers. */

typedef struct CLIST {
  CLASSIFIER *class;
  struct CLIST *next;
} CLIST;


/* Globals to minimize parameter passing: an array of classifiers,
   A two-dimensional array to hold the state of the world, and our
   ZCS's (x, y) coordinates. */

CLASSIFIER *pop;
char **world;
int me_w, me_h;

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* A function to compare classifiers based on strength. */

int classcomp(const void *a, const void *b)
{
  const CLASSIFIER *ac = a, *bc = b;
  
  return(ac->str < bc->str ? 1 : ac->str > bc->str ? -1 : 0);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Read in the world specification and initialize memory. */

void initialize(void)
{
  FILE *fp;
  SCANNER *scan;
  char *str;
  int i, j;

  /* Read world specifications. */

  if(specs == NULL || (fp = fopen(specs, "r")) == NULL) {
    fprintf(stderr, "Cannot open specs file \"%s\".\n", specs);
    exit(1);
  }

  /* The characters 'F', 'O', and '.' are tokens.  '#' is for comments. */
  scan = scan_init(fp, "FO.", " \t\n", "#");

  /* Width and height are read in first. */
  if((str = scan_get(scan)) == NULL) goto BADFILE;
  width = atoi(str);
  if((str = scan_get(scan)) == NULL) goto BADFILE;
  height = atoi(str);
  
  /* Make space for the world. */
  world = xmalloc(sizeof(char *) * height);
  for(i = 0; i < height; i++) {
    world[i] = xmalloc(sizeof(char) * width);
    for(j = 0; j < width; j++) {
      if((str = scan_get(scan)) == NULL) goto BADFILE;
      world[i][j] = CHAR2NUM(*str);
    }
  }

  /* Find an empty place to initially reside. */
  while(1) {
    me_h = random() % height;
    me_w = random() % width;
    if(world[me_h][me_w] == EMPTY) break;
  }
  world[me_h][me_w] = ME;

  /* Initialize classifier system. */
  pop = xmalloc(size * sizeof(CLASSIFIER));
  for(i = 0; i < size; i++) {
    pop[i].str = sinit;
    for(j = 0; j < CLEN; j++) {
      pop[i].cond[j] = '0' + (random() % 3);
      pop[i].cond[j] = (pop[i].cond[j] == '2') ? '#' : pop[i].cond[j];
    }
    pop[i].cond[CLEN] = 0;
    for(j = 0; j < ALEN; j++)
      pop[i].act[j] = '0' + (random() % 2);
    pop[i].act[ALEN] = 0;
  }

  return;
BADFILE:
  fprintf(stderr, "Problem found in specs file.\n");
  exit(1);  
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Redraw everything. */

void draw_world(void)
{
  int i, j;

  for(i = 0; i < height; i++)
    for(j = 0; j < width; j++)
      plot_point(j, i, world[i][j]);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Restart the simulation after food has been found. */

void restart(void)
{
  /* Since the previous run only ends when food is found, place food
   * on the current location.
   */
  world[me_h][me_w] = FOOD;
  plot_point(me_w, me_h, FOOD);
  
  /* Find a new empty location to start from. */
  while(1) {
    me_h = random() % height;
    me_w = random() % width;
    if(world[me_h][me_w] == EMPTY) break;
  }
  world[me_h][me_w] = ME;
  plot_point(me_w, me_h, ME);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Builds a new list from a new front element and a sublist. */

CLIST *cons(CLASSIFIER *class, CLIST *list)
{
  CLIST *new;
  
  new = xmalloc(sizeof(CLIST));
  new->class = class;
  new->next = list;
  return(new);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Destroy a list and free up memory. */

void delete(CLIST *list)
{
  CLIST *next;

  while(list) {
    next = list->next;
    free(list);
    list = next;
  }
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Encode the ZCS's environment into a string. */

void environment(char *env)
{
  /* This order contains the x and y offsets for moving clockwise
   * around the current position, starting from the northern-most
   * position.  { 0, -1 } is north because for plotting (0, 0) is
   * the upper-left cell.
   */
  int order[8][2] = { { 0, -1}, { 1, -1}, { 1,  0}, { 1,  1}, 
                      { 0,  1}, {-1,  1}, {-1,  0}, {-1, -1} };
  int i, x, y;
  
  for(i = 0; i < 8; i++) {
    x = (me_w + order[i][0] + width) % width;
    y = (me_h + order[i][1] + height) % height;
    /* Covert the world state at this position to two bits. */
    env[2 * i] = NUM2BIN1(world[y][x]);
    env[2 * i + 1] = NUM2BIN2(world[y][x]);
  }
  env[CLEN] = 0;
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Checks if two strings are "equal" considering while card characters. */

int condeq(char *cond, char *env)
{
  int i;

  for(i = 0; i < CLEN; i++)
    if(cond[i] != '#' && cond[i] != env[i]) return(0);
  return(1);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Steps through the conditions of all classifiers and forms a match
   list of all classifiers that match the environment. */

CLIST *matchlist(char *env)
{
  int i;
  CLIST *mlist = NULL;

  for(i = 0; i < size; i++)
    if(condeq(pop[i].cond, env))
      mlist = cons(&pop[i], mlist);
  return(mlist);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Performs random roulette selection based on the normalized strengths
   of the classifiers. */

int picklarge(int skip)
{
  int i;
  double x, sum, runsum;
  
  /* Calculate the sum of strengths of everything but the
   * skip'th classifier.
   */
  sum = 0;
  for(i = 0; i < size; i++)
    if(i != skip) sum += pop[i].str;

  runsum = 0;
  x = random_range(0, 1);
  for(i = 0; i < size; i++) {
    if(i == skip) continue;
    runsum += pop[i].str / sum;

    /* Accept a choice based on cumulative sum of strengths (which
     * should be equal to 1 if done over all strings).
     */
    if(x <= runsum)
      return(i);
  }
  /* Just in case there was a subtle numerical error. */
  return(size - 1);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Performs random roulette selection based on the normalized inverse
   strengths of the classifiers. */

int picksmall(int skip)
{
  int i;
  double x, sum, runsum;

  /* Calculate the sum of inverse strengths of everything but the
   * skip'th classifier.
   */
  sum = 0;
  for(i = 0; i < size; i++)
    if(i != skip) sum += 1 / pop[i].str;
  runsum = 0;
  x = random_range(0, 1);
  for(i = 0; i < size; i++) {
    if(i == skip) continue;
    runsum += (1 / pop[i].str) / sum;

    /* Accept a choice based on cumulative sum of inverse strengths
     * (which should be equal to 1 if done over all strings).
     */
    if(x <= runsum)
      return(i);
  }
  /* Just in case there was a subtle numerical error. */
  return(size - 1);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Optionally perform covering, which entails building a new
   classifier if none match the current environment. */

CLIST *covering(CLIST *mlist, char *env)
{
  CLIST *l;
  int i, replace;
  double str = 0, mean = 0;

  /* Get the total strength of the matchlist.  Note that this will
   * be zero if the match list is empty.
   */
  for(l = mlist; l != NULL; l = l->next)
    str += l->class->str;

  /* Get the average strength of all classifiers. */
  for(i = 0; i < size; i++)
    mean += pop[i].str;
  mean /= size;

  /* Check first bailout condition: total strength is greater
   * than mean of math list times cover constant.
   */
  if(str > (mean * cover)) return(mlist);

  /* Pick something very weak from all classifiers. */
  replace = picksmall(-1);
  /* Copy in the current environment. */
  strcpy(pop[replace].cond, env);
  /* Sprinkle in some wildcards. */
  for(i = 0; i < CLEN; i++)
    if(random_range(0, 1) < wild)
      pop[replace].cond[i] = '#';

  /* Set the action of the new classifier to some random string. */
  for(i = 0; i < ALEN; i++)
    pop[replace].act[i] = '0' + (random() % 2);

  /* Give it the mean fitness. */
  pop[replace].str = mean;

  /* Add it to the match list. */
  return(cons(&pop[replace], mlist));
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Compute an action list from a match list. */

CLIST *actlist(CLIST *mlist)
{
  CLIST *l, *alist = NULL;
  CLASSIFIER *pick = NULL;
  double sum, runsum, x;
  
  /* Get the sum of strengths of the match list. */
  sum = 0;
  for(l = mlist; l != NULL; l = l->next)
    sum += l->class->str;

  /* Do random roulette selection based on strengths. */
  runsum = 0;
  x = random_range(0, 1);
  for(l = mlist; l != NULL; l = l->next) {
    runsum += l->class->str / sum;

    /* Accept a choice based on cumulative sum of strengths (which
     * should be equal to 1 if done over all strings).
     */
    if(x <= runsum) {
      pick = l->class;
      break;
    }
  }
  /* If (l == NULL), then the above loop hit a strange statistical
   * burp.  Pick the first thing in the match list to fix. 
   */
  if(l == NULL) pick = mlist->class;

  /* Form a list of every member of the match list that advocates
   * the same action and put them into an action list.
   */
  for(l = mlist; l != NULL; l = l->next)
    if(strcmp(pick->act, l->class->act) == 0)
      alist = cons(l->class, alist);
  return(alist);  
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Move based on the action string. */

int move(char *act)
{
  /* This order contains the x and y offsets for moving clockwise
   * around the current position, starting from the northern-most
   * position.  { 0, -1 } is north because for plotting (0, 0) is
   * the upper-left cell.
   */  
  int order[8][2] = { { 0, -1}, { 1, -1}, { 1,  0}, { 1,  1}, 
                      { 0,  1}, {-1,  1}, {-1,  0}, {-1, -1} };
  int i, index = 0, new_w, new_h, reward = 0;

  /* Convert the action into an integer between 0 and 2^ALEN - 1. */
  for(i = 0; i < ALEN; i++)
    index = index * 2 + (act[i] - '0');
  
  /* Take a step in the advocated position. */
  new_w = (me_w + order[index][0] + width) % width;
  new_h = (me_h + order[index][1] + height) % height;

  /* Accept the move only if it doesn't put us into a rock. */
  if(world[new_h][new_w] != ROCK) {
    /* Yummy! */
    if(world[new_h][new_w] == FOOD) reward = FOOD_REWARD;
    plot_point(me_w, me_h, EMPTY);
    world[me_h][me_w] = EMPTY;
    me_w = new_w; me_h = new_h;
    world[me_h][me_w] = ME;
    plot_point(me_w, me_h, ME);
  }
  return(reward);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Perform one step of the implicit bucket brigade. */

void update(int reward, CLIST *mlist, CLIST *alist, CLIST *alistold)
{
  int sz = 0;
  double hold = 0;
  CLIST *l;

  /* Sum of the hold amount, decay the strengths of the
   * classifiers in the action list, and count the size.
   */
  for(l = alist; l != NULL; l = l->next) {
    hold += lrate * l->class->str;
    l->class->str -= lrate * l->class->str;
    sz++;
  }

  /* Pass out rewards to the action list. */
  for(l = alist; l != NULL; l = l->next)
    l->class->str += lrate * reward / sz;

  /* Share the wealth with the previous action list. */
  if(alistold) {
    sz = 0;
    for(l = alistold; l != NULL; l = l->next)
      sz++;
    for(l = alistold; l != NULL; l = l->next)
      l->class->str += drate * hold / sz;
  }
  
  /* Tax all classifiers in the match list that advocated a
   * different action.
   */
  for(l = mlist; l != NULL; l = l->next)
    if(strcmp(l->class->act, alist->class->act) != 0) {
      l->class->str -= trate * l->class->str;
    }
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

/* Do one step of the GA to weed out the weaklings. */

void ga(void)
{
  int pa, pb, oa, ob;
  int i, cindex, t;
  double ave;

  /* Pick two parents by strength that are guaranteed to be different. */
  pa = picklarge(-1);
  pb = picklarge(pa);

  /* Pick two classifiers by inverse strength that are guaranteed to be
   * different.
   */
  oa = picksmall(-1);
  ob = picksmall(oa);

  /* Halve the strength of the parents and pass on to
   * the children.
   */
  pop[pa].str /= 2;
  pop[oa] = pop[pa];
  pop[pb].str /= 2;
  pop[ob] = pop[pb];

  /* Optionally cross the two children. */
  if(random_range(0, 1) < crate) {
    /* Do crossover on the condition. */
    if((random() % (CLEN + ALEN)) < CLEN) {
      cindex = (random() % CLEN) + 1;
      for(i = 0; i < cindex; i++) {
        t = pop[oa].cond[i];
        pop[oa].cond[i] = pop[ob].cond[i];
        pop[ob].cond[i] = t;
      }
    }
    /* Do crossover on the action. */
    else {
      cindex = (random() % ALEN) + 1;
      for(i = 0; i < cindex; i++) {
        t = pop[oa].act[i];
        pop[oa].act[i] = pop[ob].act[i];
        pop[ob].act[i] = t;
      }
    }
    /* Blur the strengths. */
    ave = (pop[oa].str + pop[ob].str) / 2;
    pop[oa].str = pop[ob].str = ave;
  }

  /* Optionally mutate condition. */
  for(i = 0; i < CLEN; i++) {
    if(random_range(0, 1) < mrate) {
      pop[oa].cond[i] = '0' + (random() % 3);
      pop[oa].cond[i] = (pop[oa].cond[i] == '2') ? '#' : pop[oa].cond[i];
    }
    if(random_range(0, 1) < mrate) {
      pop[ob].cond[i] = '0' + (random() % 3);
      pop[ob].cond[i] = (pop[ob].cond[i] == '2') ? '#' : pop[ob].cond[i];
    }
  }
  /* Optionally mutate action. */
  for(i = 0; i < ALEN; i++) {
    if(random_range(0, 1) < mrate)
      pop[oa].act[i] = '0' + (random() % 2);
    if(random_range(0, 1) < mrate)
      pop[ob].act[i] = '0' + (random() % 2);
  }
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

int main(int argc, char **argv)
{
  FILE *fp;
  extern int plot_mag;
  extern int plot_inverse;
  CLIST *mlist, *alist, *alistold;
  int t, reward, cnt, *counts, i;
  double ave = 0, totcount = 0;
  char env[CLEN + 1];

  get_options(argc, argv, options, help_string);
  srandom(seed);
  initialize();
  
  counts = xmalloc(sizeof(int) * avelen);

  plot_inverse = invert;
  plot_mag = mag;
  plot_init(width, height, 4, term);
  plot_set_all(0);

  mlist = alist = alistold = NULL;

  draw_world();
  
  /* For each time step... */
  for(t = 0; t < steps; t++) {
    reward = cnt = 0;
    /* Keep going until some a reward is earned. */
    while(reward == 0) {
      /* Build an environment string. */
      environment(env);
      /* Form the match list. */
      mlist = matchlist(env);
      /* Do covering. */
      mlist = covering(mlist, env);
      /* Make the action list. */
      alist = actlist(mlist);
      /* Move the little guy. */
      reward = move(alist->class->act);
      /* Do implicit BB. */
      update(reward, mlist, alist, alistold);
      /* Optionally perform GA step. */ 
      if(random_range(0, 1) < grate) ga();
      /* Clean up excess baggage. */
      delete(mlist);
      delete(alistold);
      alistold = alist;
      cnt++;
    }
    /* Clean up excess baggage. */
    delete(alistold); alistold = NULL;

    /* Take a windowed moving average of the number of steps
     * needed to complete the previous trials.
     */
    if(t >= avelen)
      ave = ((ave * avelen) - counts[t % avelen] + cnt) / avelen;

    counts[t % avelen] = cnt;
    totcount += cnt;

    /* If we've completed enough trials, calculate the average. */
    if(t == avelen - 1) {
      for(i = 0; i < avelen; i++)
        ave += counts[i];
      ave /= avelen;
    }
   
    /* Dump out stats. */
    if(t >= avelen)
      printf("%d\t%f\t%f\n", cnt, ave, totcount / (t + 1));

    /* Restart in a new position. */
    restart();
  }

  /* Simulation is complete, so print out classifiers to log file. */
  if((fp = fopen("zcs.log", "w")) != NULL) {
    qsort(pop, size, sizeof(CLASSIFIER), classcomp);
    for(i = 0; i < size; i++)
      fprintf(fp, "%s : %s : %.5f\n", pop[i].cond, pop[i].act, pop[i].str);
  }

  plot_finish();
  exit(0);
}

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

