/* jaccard-set-linear: Program to calculate Jaccard similarity using sets and two iterators. James S. Plank CS494/CS594 - Advanced Algorithms and Programming October, 2017 */ #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; void usage(const string s) { fprintf(stderr, "usage: jaccard-set-linear files\n"); if (s != "") fprintf(stderr, "%s\n", s.c_str()); exit(1); } int main(int argc, char **argv) { vector < set > sets; set ::iterator liti, litj; ifstream f; string s; int i, j; double Intersection; double Total; double Union; if (argc == 1) usage(""); /* Read the data sets into set. */ sets.resize(argc-1); for (i = 1; i < argc; i++) { f.clear(); f.open(argv[i]); if (f.fail()) { perror(argv[i]); exit(1); } while (getline(f, s)) sets[i-1].insert(s); f.close(); } /* For each pair of sets, calculate the Jaccard similarity directly. */ for (i = 0; i < sets.size(); i++) { for (j = 0; j < sets.size(); j++) { Total = sets[i].size() + sets[j].size(); Intersection = 0; liti = sets[i].begin(); litj = sets[j].begin(); while (liti != sets[i].end() && litj != sets[j].end()) { if (*liti == *litj) { Intersection++; liti++; litj++; } else if (*liti < *litj) { liti++; } else { litj++; } } Union = Total - Intersection; printf("%-30s %-30s %.6lf\n", argv[i+1], argv[j+1], Intersection / Union); } } exit(1); }