#include "rb.h",and then compile the program with:
gcc -I/blugreen/homes/plank/cs360/includeAlso when you link your object files to make an executable, you need to include /blugreen/homes/plank/cs360/objs/rb.o.
The makefile in this directory does both of these things for you.
Rb-trees are data structures based on balanced binary trees. You don't need to know how they work -- just that they do work, and all operations are in O(log(n)) time, where n is the number of elements in the tree. (If you really want to know more about red-black trees, let me know and I can point you to some texts on them).
The main struct for rb-trees is the Rb_node. Like dlists, all rb-trees have a header node. You create a rb-tree by calling make_rb(), which returns a pointer to the header node of an empty rb-tree. This header points to the main body of the rb-tree, which you don't need to care about, and to the first and last external nodes of the tree. These external nodes are hooked together with flink and blink pointers, so that you can view rb-trees as being dlists with the property that they are sorted.
The Rb-tree data structure is a bit confusing with all the unions. The only fields that you need to care about are:
You use rb_insert(r, k, v) to create a new node with key k (where k is a character string) and val v, and insert it into the tree r in lexicographic order (i.e. it uses strcmp to compare strings).
Thus, in jh.c, we create a tree and insert two strings ("Jim" and "Heather") into it. Since "Heather" is lexicographically less than "Jim", it will be the first node in the tree, and "Jim" will be the last. We print out the tree by traversing the external node list using the c.list.flink pointer:
#include < stdio.h >
#include "rb.h"
main()
{
Rb_node r, tmp;
r = make_rb();
rb_insert(r, "Jim", NULL);
rb_insert(r, "Heather", NULL);
for (tmp = r->c.list.flink; tmp != r; tmp = tmp->c.list.flink) {
printf("%s\n", tmp->k.key);
}
}
UNIX> jh Heather Jim UNIX>To make things more readable, there are the following macros defined in rb.h:
#define rb_first(n) (n->c.list.flink) #define rb_last(n) (n->c.list.blink) #define rb_next(n) (n->c.list.flink) #define rb_prev(n) (n->c.list.blink) #define rb_empty(t) (t->c.list.flink == t)Thus, the above for loop can be written:
for (tmp = rb_first(r); tmp != r; tmp = rb_next(tmp) {
printf("%s\n", tmp->k.key);
}
Since tree-traversal is something you tend to do a lot, I have also defined
the macro rb_traverse, in which the for loop is put into a #define:
#define rb_traverse(ptr, lst) \ for((ptr) = rb_first((lst)); (ptr) != (lst); (ptr) = rb_next((ptr)))Thus, the above for loop can be written:
rb_traverse(tmp, r) {
printf("%s\n", tmp->k.key);
}
#include < stdio.h >
#include < string.h >
#include "rb.h"
#include "fields.h"
main()
{
IS is;
char *copy;
Rb_node sorted_lines, tmp;
sorted_lines = make_rb();
is = new_inputstruct(NULL);
while(get_line(is) >= 0) {
copy = strdup(is->text1);
rb_insert(sorted_lines, copy, NULL);
}
rb_traverse(tmp, sorted_lines) {
printf("%s", tmp->k.key);
}
}
UNIX> head randfile 13 hkrob 13 isofq 15 lninv 0 ezvpy 8 xxgxs 18 wzypq 19 jatzg 16 vrbdg 3 kkwfb 0 bbvhy UNIX> head randfile | mysort 0 bbvhy 0 ezvpy 13 hkrob 13 isofq 15 lninv 16 vrbdg 18 wzypq 19 jatzg 3 kkwfb 8 xxgxs UNIX>
UNIX> head randfile | mysort2 8 xxgxs 3 kkwfb 19 jatzg 18 wzypq 16 vrbdg 15 lninv 13 isofq 13 hkrob 0 ezvpy 0 bbvhy UNIX>
UNIX> cat > jfile Jim Jim Heather UNIX> sort jfile Heather Jim Jim UNIX> sort -u jfile Heather Jim UXIX>One way we can do this is to read all lines into an rb-tree as in mysort.c, and then only print out a line if it is different from the previous line in the rb-tree. Since the tree is sorted, duplicate lines will be adjacent to each other in the rb-tree, so this algorithm will indeed work, and is in mysortu0.c. Note that in the body of the rb_traverse loop is a check to see if a node is the first one in the list. If so, then it prints that line. Otherwise, it checks to see if the node's line is equal to the previous one and only prints it if not. The reason we need that extra check is because we don't know what the value of sorted_lines->k.key is. It could cause strcmp() to dump core. Thus we must take care not to call strcmp() on it.
#include < string.h >
#include < stdio.h >
#include "fields.h"
#include "rb.h"
main()
{
IS is;
char *copy;
Rb_node sorted_lines, tmp;
int found;
sorted_lines = make_rb();
is = new_inputstruct(NULL);
while(get_line(is) >= 0) {
copy = strdup(is->text1);
rb_insert(sorted_lines, copy, NULL);
}
rb_traverse(tmp, sorted_lines) {
if (tmp == rb_first(sorted_lines) ||
strcmp(tmp->k.key, tmp->c.list.blink->k.key) != 0)
printf("%s", tmp->k.key);
}
}
s2 = NULL;
rb_traverse(tmp, sorted_lines) {
if (strcmp(tmp->k.key, s2) != 0) printf("%s", tmp->k.key);
s2 = tmp->k.key;
}
This doesn't work (try it -- it dumps core). Why? I'll let you figure it
out. How would you fix it?
rb_find_key_n(Rb_node t, char *k, int *f)This works as follows: If there is a node with key k in the tree, then rb_find_key_n sets *f to be 1, and returns a pointer to that node. If there is no node with key k in the tree, then rb_find_key_n sets *f to zero and returns a pointers to the Rb_node in the tree whose value is the smallest value greater than k. If there is no value in the tree greater than or equal to k, then the root of the tree is returned. Like rb_insert(), rb_find_key_n() works on character strings, and works in O(log(n)) time, where n is the number of elements in the tree. So, to implement 'sort -u', we first check to see if a string is in the tree already. If so, then we do nothing. If not, then we insert it into the tree. Mysortu1.c does this:
#include < string.h >
#include < stdio.h >
#include "fields.h"
#include "rb.h"
main()
{
IS is;
char *copy;
Rb_node sorted_lines, tmp;
int found;
sorted_lines = make_rb();
is = new_inputstruct(NULL);
while(get_line(is) >= 0) {
/* Insert the line into the tree only if it is not there already */
(void) rb_find_key_n(sorted_lines, is->text1, &found);
if (!found) {
copy = strdup(is->text1);
rb_insert(sorted_lines, copy, NULL);
}
}
rb_traverse(tmp, sorted_lines) {
printf("%s", tmp->k.key);
}
}
#include < string.h >
#include < stdio.h >
#include "fields.h"
#include "rb.h"
#define talloc(type, size) (type *) malloc(sizeof(type)*(size))
main()
{
IS is;
char *copy;
Rb_node sorted_lines, tmp, r;
int found;
int *count;
sorted_lines = make_rb();
is = new_inputstruct(NULL);
while(get_line(is) >= 0) {
r = rb_find_key_n(sorted_lines, is->text1, &found);
/* If the line is already in the tree, then just increment its count */
if (found) {
count = (int *) r->v.val;
*count = *count + 1;
/* Otherwise, insert the line into the tree with a count of one */
} else {
count = talloc(int, 1);
*count = 1;
copy = strdup(is->text1);
rb_insert(sorted_lines, copy, (char *)count);
}
}
rb_traverse(tmp, sorted_lines) {
count = (int *) tmp->v.val;
printf("%6d\t%s", *count, tmp->k.key);
}
}
#include < stdio.h >
#include < string.h >
#include "fields.h"
#include "rb.h"
main()
{
IS is;
char *copy;
Rb_node sorted_lines, tmp;
int i;
sorted_lines = make_rb();
is = new_inputstruct(NULL);
while(get_line(is) >= 0) {
copy = strdup(is->text1);
i = atoi(is->text1);
rb_inserti(sorted_lines, i, copy);
}
rb_traverse(tmp, sorted_lines) {
printf("%s", tmp->v.val);
}
}
UNIX> head randfile 13 hkrob 13 isofq 15 lninv 0 ezvpy 8 xxgxs 18 wzypq 19 jatzg 16 vrbdg 3 kkwfb 0 bbvhy UNIX> head randfile | mysort 0 bbvhy 0 ezvpy 13 hkrob 13 isofq 15 lninv 16 vrbdg 18 wzypq 19 jatzg 3 kkwfb 8 xxgxs UNIX> head randfile | mysorti 0 bbvhy 0 ezvpy 3 kkwfb 8 xxgxs 13 isofq 13 hkrob 15 lninv 16 vrbdg 18 wzypq 19 jatzg UNIX>