/* jaccard-set-lazy: Program to calculate Jaccard similarity using sets and find(). James S. Plank CS494/CS594 - Advanced Algorithms and Programming October, 2017 */ #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; void usage(const string s) { fprintf(stderr, "usage: jaccard-set-lazy files\n"); if (s != "") fprintf(stderr, "%s\n", s.c_str()); exit(1); } int main(int argc, char **argv) { vector < set > sets; set ::iterator lit; ifstream f; string s; int i, j; double Intersection; double Total; double Union; if (argc == 1) usage(""); /* Read the data sets into set. */ sets.resize(argc-1); for (i = 1; i < argc; i++) { f.clear(); f.open(argv[i]); if (f.fail()) { perror(argv[i]); exit(1); } while (getline(f, s)) sets[i-1].insert(s); f.close(); } /* For each pair of sets, calculate the Jaccard similarity directly. */ for (i = 0; i < sets.size(); i++) { for (j = 0; j < sets.size(); j++) { Total = sets[i].size() + sets[j].size(); Intersection = 0; for (lit = sets[j].begin(); lit != sets[j].end(); lit++) { if (sets[i].find(*lit) != sets[i].end()) Intersection++; } Union = Total - Intersection; printf("%-30s %-30s %.6lf\n", argv[i+1], argv[j+1], Intersection / Union); } } exit(1); }