#include #include #include #include #include #include #include #include #include #include #include #include using namespace std; /* Calculate the altered djb hash of the first *size* characters in *s*. This is done using the algorithm of djb_altered_2, rather than djb_altered. They are equivalent. */ unsigned int djb_hash(const string &s, int size) { int i; unsigned int h; unsigned int c; unsigned int rv; int shift; shift = 0; rv = 0; for (i = size-1; i >= 0; i--) { c = s[i]; if (shift == 0) { rv ^= c; } else { rv ^= ((c << shift) ^ (c >> (32-shift))); } shift += 5; shift %= 32; } h = 5381; if (shift != 0) { h = ((h << shift) ^ (h >> (32-shift))); } return h ^ rv; } int main(int argc, char **argv) { string pattern, file, s; uint32_t hash, hs, tmp; uint32_t sh; size_t i, j; long long l; int shift; if (argc != 2) { fprintf(stderr, "usage: pattern < file\n"); exit(1); } while (getline(cin, s)) { if (file.size() != 0) file += " "; file += s; } pattern = argv[1]; /* Hash the pattern and exit if the pattern is too big. */ hash = djb_hash(pattern.c_str(), pattern.size()); if (file.size() < pattern.size()) { printf("file is smaller than pattern\n"); return 0; } /* Calculate the DJB hash of the first pattern.size() characters of the file, without the h term. */ sh = 0; for (i = 0; i < pattern.size(); i++) { sh = (sh << 5) | (sh >> (32-5)); sh ^= file[i]; } /* Calculate hs */ l = pattern.size() * 5; shift = l % 32; hs = 5381; if (shift != 0) hs = ((hs << shift) ^ (hs >> (32-shift))); /* Do the Rabin_Karp algorithm i is the next character, j is the first character. */ j = 0; while (i < file.size()) { sh = (sh << 5) | (sh >> (32-5)); // Rolling hash -- do the circular shift. sh ^= file[i]; // Add in the next character. tmp = file[j]; // Subtract the first character. if (shift != 0) tmp = ((tmp << shift) ^ (tmp >> (32-shift))); sh ^= tmp; if ((sh^hs) == hash) { // If the hashes match, then verify if (strncmp(pattern.c_str(), file.c_str()+j+1, pattern.size()) == 0) { printf("%lu\n", j+1); return 0; } } i++; j++; } return 0; }