diff --git a/DESIGN b/DESIGN index e32164c4..9a41d062 100644 --- a/DESIGN +++ b/DESIGN @@ -423,54 +423,37 @@ Index: -Filter: (lib/filter.cpp) +Filter: Filtering is used to generate a subset of songs displayed by the UI to that users can choose from. The inverted index is generated at startup so there is no need for a remove() function, since it will be wiped the next time the application starts. -- Index: - Database filter_index; - map lowercase_cache; - unsigned int lowercase_cache_hits; - - Parsing: - 1) Convert the provided string into a list of words, using whitespace - and the following characters as delimiters: \/,;()_-~+" - - For each word: - 2) Check the lowercase_cache to see if we have seen the word before, - a) If we have, return the stored string - b) Convert the string to lowercase and strip out remaining - special characters. Add the result to the lowercase_cache; + - Scan over the input text to create a list of words using the following + characters as delimiters: \/,;()_-~+" + - While scanning, convert the string to lowercase and strip out any + other special characters. - API: - void filter :: add(string, track_id); - Parse the string into substrings following the "Parsing" - section (above). Add each (substring, track_id) pair to the - filter_index. + std::string filter :: add(const std::string &key, unsigned int track_id); + Parse the key into words following the "Parsing" section above. + Generate substrings for each word and add each (substring, + track_id) pair to the index. Return the lowercased text to the + caller. To generate substrings, iterate over the word starting from the front. For example: "goron" would contain the substrings {g, go, gor, goro, goron}. - void filter :: search(string, set &); - Parse the string into substrings following the "Parsing" - section (above). We want to find track_ids that match ALL - substrings, so take the intersection of all sets returned by - the filter_index for a given substring. + std::string filter :: lowercase(const std::string &text); + Parse the text into lowercased words following the "Parsing" + section above. Return the lowercased string to the caller. - std::string filter :: to_lowercase(const std::string &string); - Split the string into words following step 1 of "Parsing" - (above). Assemble and return a result string using the lower - case cache to convert each term to lowercase. - - void filter :: print_cache_stats(); - Print cache hit and size information. - - void filter :: get_index(); - This function only exists if CONFIG_TEST is enabled. - Return the index storing all the filter data. + void filter :: search(const std::string &text, std::set &res); + This function finds all track_ids matching the input text. + Parse the string into substrings and take the intersection of + all sets returned by the index for each substring. diff --git a/include/filter.h b/include/filter.h index fd23ac52..a955ac06 100644 --- a/include/filter.h +++ b/include/filter.h @@ -9,11 +9,9 @@ namespace filter { - void add(const std::string &, unsigned int); + std::string add(const std::string &, unsigned int); void search(const std::string &, std::set &); - std::string to_lowercase(const std::string &); - - void print_cache_stats(); + std::string lowercase(const std::string &); }; diff --git a/include/tabs.h b/include/tabs.h index 73bba0df..aae9a158 100644 --- a/include/tabs.h +++ b/include/tabs.h @@ -6,7 +6,7 @@ #include #include - +#include class Tab { private: diff --git a/lib/filter.cpp b/lib/filter.cpp index 2cba8164..b1aa2b43 100644 --- a/lib/filter.cpp +++ b/lib/filter.cpp @@ -8,13 +8,10 @@ #include #include -#include static Index filter_index("", false); -static std::map lowercase_cache; -static unsigned int lowercase_cache_hits = 0; -static void split_text(const std::string &text, std::list &ret) +static void parse_text(const std::string &text, std::list &ret) { std::string word; char c; @@ -22,6 +19,17 @@ static void split_text(const std::string &text, std::list &ret) for (unsigned int i = 0; i < text.size(); i++) { c = text[i]; + if ( (c >= 'a') && (c <= 'z') ) { + word += c; + continue; + } else if ( (c >= 'A') && (c <= 'Z') ) { + word += (c + ('a' - 'A')); + continue; + } else if ( (c >= '0') && (c <= '9') ) { + word += c; + continue; + } + switch (c) { case '\\': case '/': @@ -40,9 +48,8 @@ static void split_text(const std::string &text, std::list &ret) ret.push_back(word); word = ""; } - break; default: - word += c; + break; }; } @@ -50,42 +57,6 @@ static void split_text(const std::string &text, std::list &ret) ret.push_back(word); } -static void lower_text(const std::string &text, std::list &ret) -{ - char c; - std::string word; - std::map::iterator it = lowercase_cache.find(text); - - if (it != lowercase_cache.end()) { - lowercase_cache_hits++; - ret.push_back(it->second); - return; - } - - for (unsigned int i = 0; i < text.size(); i++) { - c = text[i]; - if ( (c >= 'a') && (c <= 'z') ) - word += c; - else if ( (c >= 'A') && (c <= 'Z') ) - word += (c + ('a' - 'A')); - else if ( (c >= '0') && (c <= '9') ) - word += c; - } - - lowercase_cache[text] = word; - ret.push_back(word); -} - -static void parse_text(const std::string &text, std::list &ret) -{ - std::list split; - std::list::iterator it; - - split_text(text, split); - for (it = split.begin(); it != split.end(); it++) - lower_text(*it, ret); -} - static void add_substrings(const std::string &text, unsigned int track_id) { std::string substr; @@ -95,7 +66,21 @@ static void add_substrings(const std::string &text, unsigned int track_id) } } -void filter :: add(const std::string &text, unsigned int track_id) +static std::string reassemble_text(std::list text) +{ + std::string res; + std::list::iterator it = text.begin(); + + if (it == text.end()) + return res; + + res += *it; + for (it++; it != text.end(); it++) + res += " " + *it; + return res; +} + +std::string filter :: add(const std::string &text, unsigned int track_id) { std::list parsed; std::list::iterator it; @@ -103,6 +88,7 @@ void filter :: add(const std::string &text, unsigned int track_id) parse_text(text, parsed); for (it = parsed.begin(); it != parsed.end(); it++) add_substrings(*it, track_id); + return reassemble_text(parsed); } static void find_intersection(std::string &text, std::set &res) @@ -136,24 +122,9 @@ void filter :: search(const std::string &text, std::set &res) find_intersection(*it, res); } -std::string filter :: to_lowercase(const std::string &text) +std::string filter :: lowercase(const std::string &text) { - std::string res = ""; std::list parsed; - std::list::iterator it; - parse_text(text, parsed); - for (it = parsed.begin(); it != parsed.end(); it++) { - if (it != parsed.begin()) - res += " "; - res += *it; - } - - return res; -} - -void filter :: print_cache_stats() -{ - print("Lowercase cache size: %u\n", lowercase_cache.size()); - print("Lowercase cache hits: %u\n", lowercase_cache_hits); + return reassemble_text(parsed); } diff --git a/lib/library.cpp b/lib/library.cpp index fe1ecf41..c9492322 100644 --- a/lib/library.cpp +++ b/lib/library.cpp @@ -47,7 +47,7 @@ library :: AGInfo :: AGInfo(DB_Type type, TagLib :: Tag *tag) else throw -E_INVAL; - key_lower = filter :: to_lowercase(name); + key_lower = filter :: lowercase(name); } library :: AGInfo :: AGInfo(DB_Type type, const std::string &str) @@ -55,7 +55,7 @@ library :: AGInfo :: AGInfo(DB_Type type, const std::string &str) { if ((db_type == DB_ARTIST) || (db_type == DB_GENRE)) { name = str; - key_lower = filter :: to_lowercase(name); + key_lower = filter :: lowercase(name); } else throw -E_INVAL; } @@ -68,7 +68,7 @@ const std::string library :: AGInfo :: primary_key() void library :: AGInfo :: read(File &f) { name = f.getline(); - key_lower = filter :: to_lowercase(name); + key_lower = filter :: lowercase(name); } void library :: AGInfo :: write(File &f) @@ -91,13 +91,13 @@ library :: Album :: Album(TagLib :: Tag *tag, unsigned int artist) : name(tag->album().stripWhiteSpace().to8Bit(true)), year(tag->year()), artist_id(artist) { - name_lower = filter :: to_lowercase(name); + name_lower = filter :: lowercase(name); } library :: Album :: Album(const std::string &str, unsigned int yr, unsigned int artist) : name(str), year(yr), artist_id(artist) { - name_lower = filter :: to_lowercase(name); + name_lower = filter :: lowercase(name); } const std::string library :: Album :: primary_key() @@ -111,7 +111,7 @@ void library :: Album :: read(File &f) { f >> artist_id >> year; name = f.getline(); - name_lower = filter :: to_lowercase(name); + name_lower = filter :: lowercase(name); } void library :: Album :: write(File &f) @@ -176,7 +176,7 @@ library :: Track :: Track(TagLib :: Tag *tag, TagLib :: AudioProperties *audio, full_path = path; filepath = path.substr(library_db.at(library_id)->root_path.size() + 1); - title_lower = filter :: to_lowercase(title); + title_lower = filter :: lowercase(title); minutes = length / 60; seconds = length % 60; @@ -200,7 +200,7 @@ library :: Track :: Track(struct ImportData *data, unsigned int lib, full_path = data->filepath; filepath = full_path.substr(library_db.at(library_id)->root_path.size() + 1); - title_lower = filter :: to_lowercase(title); + title_lower = filter :: lowercase(title); minutes = length / 60; seconds = length % 60; @@ -224,7 +224,7 @@ void library :: Track :: read(File &f) length_str = f.getline(); title = f.getline(); filepath = f.getline(); - title_lower = filter :: to_lowercase(title); + title_lower = filter :: lowercase(title); full_path = library_db.at(library_id)->root_path + "/" + filepath; library_db.at(library_id)->size++; } diff --git a/tests/Sconscript b/tests/Sconscript index 18fdc894..a9a42932 100644 --- a/tests/Sconscript +++ b/tests/Sconscript @@ -8,8 +8,8 @@ if sys.argv.count("tests") > 0: src = SConscript("src/Sconscript") -tests = [ "version", "file", "db_entry", "database", "index" ] -#scripts = [ "filter", "idle", "playlist", "library", "playqueue", "deck", "audio", "gui" ] +tests = [ "version", "file", "db_entry", "database", "index", "filter" ] +#scripts = [ "idle", "playlist", "library", "playqueue", "deck", "audio", "gui" ] prev = None diff --git a/tests/filter b/tests/filter new file mode 100755 index 00000000..fa2cad0f --- /dev/null +++ b/tests/filter @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright 2014 (c) Anna Schumaker + +. $(dirname $0)/_functions + +function test_add +{ + test_equal "./src/filter.run -a $1" "$2" +} + +function test_lowercase +{ + test_equal "./src/filter.run -l $1" "$2" +} + +function test_text +{ + test_add "$1" "$2" + test_lowercase "$1" "$2" +} + +function test_search +{ + num=$(cat -b $DATA_DIR/filter.txt | tail -n 1 | awk '{print $1}') + let num=$num-1 + test_equal "./src/filter.run -s $num $1" "$2" +} + + + +new_test "Filter Add and Lowercase Test" + +test_text " " "" +test_text " test +text" "test text" +test_text "test text" "test text" +test_text "Test Text" "test text" +test_text "Test? Text!" "test text" +test_text "Test?123 Text!456" "test123 text456" +test_text "Test?123 Text!456" "test123 text456" +test_text "Test(text);123-456" "test text 123 456" +test_text "Test((text));;123--456" "test text 123 456" + + + +echo +new_test "Filter Search Test" + +file=$DATA_DIR/filter.txt +mkdir -p $DATA_DIR + +echo "0" > $file +echo "It's dangerous to go alone! Take this..." >> $file +echo "DODONGO DISLIKES SMOKE." >> $file +echo "I am Error." >> $file +echo "Error knows a secret." >> $file +echo "Hey, you pay, then you can open the chests!" >> $file +echo "And the Master Sword sleeps again... FOREVER!" >> $file +echo "Link checked the chest. Wow! This is a nice chest!" >> $file +echo "Hey! Listen! Hey! Listen! Watch out!" >> $file +echo "You killed the Deku Tree? How could you?!" >> $file +echo "You've met with a terrible fate, haven't you?" >> $file +echo "Believe in your strengths... Believe..." >> $file +echo "Tingle! Tingle! Kooloo-Limpah!" >> $file +echo "Well excuse me, Princess!" >> $file + +test_search "error" "2 3" +test_search "the" "4 5 6 8" +test_search "the ch" "4 6" +test_search "the CH" "4 6" +test_search "the ch y" "4" diff --git a/tests/filter/Sconscript b/tests/filter/Sconscript deleted file mode 100644 index ea122ae8..00000000 --- a/tests/filter/Sconscript +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/python -Import("Test", "CONFIG") - -CONFIG.FILTER = True - -Test("filter", "filter.cpp") diff --git a/tests/filter/filter.cpp b/tests/filter/filter.cpp deleted file mode 100644 index 015e1670..00000000 --- a/tests/filter/filter.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright 2013 (c) Anna Schumaker. - */ -#include -#include -#include - -std::string quotes [] = { - "What heroes like best is themselves.", - "The sun rose slowly, as if wasn't sure it was worth all the effort", - "Of course I'm sane, when trees start talking to me, I don't talk back", - "Darkness isn't the opposite of light, it's simply its absence", - "Time passed, which, basically, is its job", - "Million-to-one chances crop up nine times out of ten", - "CATS ARE NICE", - "Death isn't cruel - merely terribly, terribly good at his job", - "Thunder rolled ... it rolled a six", - "DROP THE SCYTHE, AND TURN AROUND SLOWLY", - "Time is like a drug. Too much of it kills you", - "Gravity is a habit that is hard to shake off", - "You do not ask people like that what they are thinking about in case " - "they turn around very slowly and say 'You'", - "Do unto others before they do unto you", - "Not a man to mince words. People, yes. But not words", - "An elf's strength lay in persuading others they were weak", - "May you live in interesting times", - "WITH HIM HERE, EVEN UNCERTAINTY IS UNCERTAIN. AND I'M NOT SURE EVEN " - "ABOUT THAT", - "I AM DEATH, NOT TAXES. I TURN UP ONLY ONCE", - "All tribal myths are true, for a given value of 'true'", - "The Truth Shall Make Ye Fret", - "When you look into the abyss, it's not supposed to wave back", - "I have no use for people who have learned the limits of the possible", - "Speak softly and employ a huge man with a crowbar", - "Truly, the leopard can change his shorts", - "+++Divide By Cucumber Error, Please Reinstall Universe And Reboot+++", - "+++Whoops! Here comes the cheese! +++", - "Bring out yer dead, bring out yer living dead", - "1. ALL FUNGI ARE EDIBLE. 2. SOME FUNGI ARE NOT EDIBLE MORE THAN ONCE.", - "A lot of farming is about manure", - "I am very attached to my fingers, and I like to think of them as attached to me", - "There be a lot o' men who became heroes cuz they wuz too scared tae run", - "If only the pawns united, make talked the rooks round, the whole board " - "could've been a republic in a dozen moves", - "Always remember that the crowd that applauds your coronation is the same " - "crowd that will applaud your beheading. People like a show.", -}; - -static const unsigned int num_quotes = sizeof(quotes) / sizeof(std::string); - -void print_index(Database &db) -{ - db.print_keys(); - for (unsigned int i = db.first(); i <= db.last(); i = db.next(i)) { - print("index[%s] = ", db[i].primary_key.c_str()); - db[i].print(); - print("\n"); - } - print("\n"); -} - -void test_search(const std::string &text) -{ - std::set results; - std::set::iterator it; - - filter :: search(text, results); - - print("Search for: \"%s\" returned %u matches:\n", - text.c_str(), results.size()); - - for (it = results.begin(); it != results.end(); it++) - print("\t%s\n", quotes[*it].c_str()); - print("\n"); -} - -void test_lowercase(const std::string &text) -{ - std::string res = filter :: to_lowercase(text); - print("Lowercasing: \"%s\" returned: \"%s\"\n", text.c_str(), res.c_str()); -} - -void test_0() -{ - for (unsigned int i = 0; i < num_quotes; i++) - filter :: add(quotes[i], i); - print_index(filter :: get_index()); - filter :: print_cache_stats(); - print("\n"); -} - -void test_1() -{ - test_search(""); - test_search("Rincewind"); - test_search("Rincewind Twoflower Luggage"); - test_search("the"); - test_search("the is"); - test_search("THE IS"); - test_search("th i"); - test_search("th i even"); - test_search("Th/i-eVEn"); - test_search("whoops"); - filter :: print_cache_stats(); - print("\n"); -} - -void test_2() -{ - test_lowercase(""); - test_lowercase("Rincewind"); - test_lowercase("Rincewind Twoflower Luggage"); - test_lowercase("the"); - test_lowercase("the is"); - test_lowercase("THE IS"); - test_lowercase("th i"); - test_lowercase("th i even"); - test_lowercase("Th/i-eVen"); - test_lowercase("whoops"); - test_lowercase("WHOOPS"); - filter :: print_cache_stats(); -} - -int main(int argc, char **argv) -{ - test_0(); - test_1(); - test_2(); - - return 0; -} diff --git a/tests/filter/filter.good b/tests/filter/filter.good deleted file mode 100644 index a2681624..00000000 --- a/tests/filter/filter.good +++ /dev/null @@ -1,757 +0,0 @@ -Found keys: 1 2 a ab abo abou about abs abse absen absenc absence aby abys abyss al all alw alwa alway always am an and ap app appl appla applau applaud applauds ar are aro arou aroun around as ask at att atta attac attach attache attached b ba bac back bas basi basic basica basical basicall basically be bec beca becam became bee been bef befo befor before beh behe behea behead beheadi beheadin beheading bes best bo boa boar board br bri brin bring bu but by c ca can cas case cat cats ch cha chan chanc chance chances chang change che chee chees cheese co com come comes cor coro coron corona coronat coronati coronatio coronation cou coul could couldv couldve cour cours course cr cro crop crow crowb crowba crowbar crowd cru crue cruel cu cuc cucu cucum cucumb cucumbe cucumber cuz d da dar dark darkn darkne darknes darkness de dea dead deat death di div divi divid divide do don dont doz doze dozen dr dro drop dru drug e ed edi edib edibl edible ef eff effo effor effort el elf elfs em emp empl emplo employ er err erro error ev eve even f fa far farm farmi farmin farming fi fin fing finge finger fingers fo for fr fre fret fu fun fung fungi g gi giv give given go goo good gr gra grav gravi gravit gravity h ha hab habi habit har hard hav have he her here hero heroe heroes hi him his hu hug huge i if im in int inte inter intere interes interest interesti interestin interesting into is isn isnt it its j jo job k ki kil kill kills l la lay le lea lear learn learne learned leo leop leopa leopar leopard li lig ligh light lik like lim limi limit limits liv live livi livin living lo loo look lot m ma mak make man manu manur manure may me men mer mere merel merely mi mil mill milli millio million min minc mince mo mor more mov move moves mu muc much my myt myth myths n ni nic nice nin nine no not o of off on onc once one onl only op opp oppo oppos opposi opposit opposite ot oth othe other others ou out p pa pas pass passe passed paw pawn pawns pe peo peop peopl people per pers persu persua persuad persuadi persuadin persuading pl ple plea pleas please po pos poss possi possib possibl possible r re reb rebo reboo reboot rei rein reins reinst reinsta reinstal reinstall rem reme remem rememb remembe remember rep repu repub republ republi republic ro rol roll rolle rolled roo rook rooks ros rose rou roun round ru run s sa sam same san sane say sc sca scar scare scared scy scyt scyth scythe sh sha shak shake shal shall sho shor short shorts show si sim simp simpl simply six sl slo slow slowl slowly so sof soft softl softly som some sp spe spea speak st sta star start str stre stren streng strengt strength su sun sup supp suppo suppos suppose supposed sur sure t ta tae tal talk talke talked talki talkin talking tax taxe taxes te ten ter terr terri terrib terribl terribly th tha than that the them thems themse themsel themselv themselve themselves ther there they thi thin think thinki thinkin thinking thu thun thund thunde thunder ti tim time times to too tr tre tree trees tri trib triba tribal tru true trul truly trut truth tu tur turn u un unc unce uncer uncert uncerta uncertai uncertain uncertaint uncertainty uni unit unite united univ unive univer univers universe unt unto up us use v va val valu value ve ver very w wa was wasn wasnt wav wave we wea weak wer were wh wha what whe when whi whic which who whol whole whoo whoop whoops wi wil will wit with wo wor word words wort worth wu wuz y ye yer yes yo you your -index[w] = {0 1 2 4 12 14 15 17 21 22 23 26 31 32 33} -index[wh] = {0 2 4 12 21 22 26 31 32} -index[wha] = {0 12} -index[what] = {0 12} -index[h] = {0 7 11 17 22 23 24 26 31} -index[he] = {0 17 26 31} -index[her] = {0 17 26 31} -index[hero] = {0 31} -index[heroe] = {0 31} -index[heroes] = {0 31} -index[l] = {0 3 10 12 15 16 21 22 24 27 29 30 31 33} -index[li] = {0 3 10 12 16 22 27 30 33} -index[lik] = {0 10 12 30 33} -index[like] = {0 10 12 30 33} -index[b] = {0 2 4 13 14 21 25 27 31 32 33} -index[be] = {0 13 31 32 33} -index[bes] = {0} -index[best] = {0} -index[i] = {0 1 2 3 4 7 8 10 11 12 15 16 17 18 21 22 29 30 32 33} -index[is] = {0 3 4 7 10 11 17 29 33} -index[t] = {0 1 2 3 4 5 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 24 26 28 30 31 32 33} -index[th] = {0 1 3 8 9 11 12 13 15 17 20 21 22 24 26 28 30 31 32 33} -index[the] = {0 1 3 9 12 13 15 20 21 22 24 26 30 31 32 33} -index[them] = {0 30} -index[thems] = {0} -index[themse] = {0} -index[themsel] = {0} -index[themselv] = {0} -index[themselve] = {0} -index[themselves] = {0} -index[s] = {1 2 3 8 9 11 12 15 17 20 21 23 24 28 31 33} -index[su] = {1 17 21} -index[sun] = {1} -index[r] = {1 8 25 31 32 33} -index[ro] = {1 8 32} -index[ros] = {1} -index[rose] = {1} -index[sl] = {1 9 12} -index[slo] = {1 9 12} -index[slow] = {1 9 12} -index[slowl] = {1 9 12} -index[slowly] = {1 9 12} -index[a] = {1 3 6 7 8 9 10 11 12 14 15 17 18 19 21 23 25 28 29 30 31 32 33} -index[as] = {1 12 30} -index[if] = {1 32} -index[wa] = {1 21} -index[was] = {1} -index[wasn] = {1} -index[wasnt] = {1} -index[sur] = {1 17} -index[sure] = {1 17} -index[it] = {1 3 4 8 10 21} -index[wo] = {1 14} -index[wor] = {1 14} -index[wort] = {1} -index[worth] = {1} -index[al] = {1 19 28 33} -index[all] = {1 19 28} -index[e] = {1 15 17 23 25 28} -index[ef] = {1} -index[eff] = {1} -index[effo] = {1} -index[effor] = {1} -index[effort] = {1} -index[o] = {2 3 5 10 11 13 15 18 19 22 27 28 29 30 31 32} -index[of] = {2 3 5 10 11 19 22 29 30} -index[c] = {2 5 6 7 12 23 24 25 26 31 32 33} -index[co] = {2 26 32 33} -index[cou] = {2 32} -index[cour] = {2} -index[cours] = {2} -index[course] = {2} -index[im] = {2 17} -index[sa] = {2 12 33} -index[san] = {2} -index[sane] = {2} -index[whe] = {2 21} -index[when] = {2 21} -index[tr] = {2 19 20 24} -index[tre] = {2} -index[tree] = {2} -index[trees] = {2} -index[st] = {2 15} -index[sta] = {2} -index[star] = {2} -index[start] = {2} -index[ta] = {2 18 31 32} -index[tal] = {2 32} -index[talk] = {2 32} -index[talki] = {2} -index[talkin] = {2} -index[talking] = {2} -index[to] = {2 5 10 11 14 21 30 31} -index[m] = {2 5 7 10 14 16 19 20 23 28 29 30 31 32} -index[me] = {2 7 30 31} -index[d] = {2 3 7 9 10 12 13 18 25 27 32} -index[do] = {2 12 13 32} -index[don] = {2} -index[dont] = {2} -index[ba] = {2 4 21} -index[bac] = {2 21} -index[back] = {2 21} -index[da] = {3} -index[dar] = {3} -index[dark] = {3} -index[darkn] = {3} -index[darkne] = {3} -index[darknes] = {3} -index[darkness] = {3} -index[isn] = {3 7} -index[isnt] = {3 7} -index[op] = {3} -index[opp] = {3} -index[oppo] = {3} -index[oppos] = {3} -index[opposi] = {3} -index[opposit] = {3} -index[opposite] = {3} -index[lig] = {3} -index[ligh] = {3} -index[light] = {3} -index[its] = {3 4 21} -index[si] = {3 8} -index[sim] = {3} -index[simp] = {3} -index[simpl] = {3} -index[simply] = {3} -index[ab] = {3 12 17 21 29} -index[abs] = {3} -index[abse] = {3} -index[absen] = {3} -index[absenc] = {3} -index[absence] = {3} -index[ti] = {4 5 10 16} -index[tim] = {4 5 10 16} -index[time] = {4 5 10 16} -index[p] = {4 12 14 15 22 25 32 33} -index[pa] = {4 32} -index[pas] = {4} -index[pass] = {4} -index[passe] = {4} -index[passed] = {4} -index[whi] = {4} -index[whic] = {4} -index[which] = {4} -index[bas] = {4} -index[basi] = {4} -index[basic] = {4} -index[basica] = {4} -index[basical] = {4} -index[basicall] = {4} -index[basically] = {4} -index[j] = {4 7} -index[jo] = {4 7} -index[job] = {4 7} -index[mi] = {5 14} -index[mil] = {5} -index[mill] = {5} -index[milli] = {5} -index[millio] = {5} -index[million] = {5} -index[on] = {5 18 28 32} -index[one] = {5} -index[ch] = {5 24 26} -index[cha] = {5 24} -index[chan] = {5 24} -index[chanc] = {5} -index[chance] = {5} -index[chances] = {5} -index[cr] = {5 7 23 33} -index[cro] = {5 23 33} -index[crop] = {5} -index[u] = {5 13 17 18 22 25 32} -index[up] = {5 18} -index[n] = {5 6 12 14 17 18 21 22 28} -index[ni] = {5 6} -index[nin] = {5} -index[nine] = {5} -index[times] = {5 16} -index[ou] = {5 27} -index[out] = {5 27} -index[te] = {5 7} -index[ten] = {5} -index[ca] = {6 12 24} -index[cat] = {6} -index[cats] = {6} -index[ar] = {6 9 12 19 28} -index[are] = {6 12 19 28} -index[nic] = {6} -index[nice] = {6} -index[de] = {7 18 27} -index[dea] = {7 18 27} -index[deat] = {7 18} -index[death] = {7 18} -index[cru] = {7} -index[crue] = {7} -index[cruel] = {7} -index[mer] = {7} -index[mere] = {7} -index[merel] = {7} -index[merely] = {7} -index[ter] = {7} -index[terr] = {7} -index[terri] = {7} -index[terrib] = {7} -index[terribl] = {7} -index[terribly] = {7} -index[g] = {7 11 19} -index[go] = {7} -index[goo] = {7} -index[good] = {7} -index[at] = {7 30} -index[hi] = {7 17 24} -index[his] = {7 24} -index[thu] = {8} -index[thun] = {8} -index[thund] = {8} -index[thunde] = {8} -index[thunder] = {8} -index[rol] = {8} -index[roll] = {8} -index[rolle] = {8} -index[rolled] = {8} -index[six] = {8} -index[dr] = {9 10} -index[dro] = {9} -index[drop] = {9} -index[sc] = {9 31} -index[scy] = {9} -index[scyt] = {9} -index[scyth] = {9} -index[scythe] = {9} -index[an] = {9 12 15 17 23 25 30} -index[and] = {9 12 17 23 25 30} -index[tu] = {9 12 18} -index[tur] = {9 12 18} -index[turn] = {9 12 18} -index[aro] = {9 12} -index[arou] = {9 12} -index[aroun] = {9 12} -index[around] = {9 12} -index[dru] = {10} -index[drug] = {10} -index[too] = {10 31} -index[mu] = {10} -index[muc] = {10} -index[much] = {10} -index[k] = {10} -index[ki] = {10} -index[kil] = {10} -index[kill] = {10} -index[kills] = {10} -index[y] = {10 12 13 14 16 20 21 27 33} -index[yo] = {10 12 13 16 21 33} -index[you] = {10 12 13 16 21 33} -index[gr] = {11} -index[gra] = {11} -index[grav] = {11} -index[gravi] = {11} -index[gravit] = {11} -index[gravity] = {11} -index[ha] = {11 22} -index[hab] = {11} -index[habi] = {11} -index[habit] = {11} -index[tha] = {11 12 17 28 33} -index[that] = {11 12 17 33} -index[har] = {11} -index[hard] = {11} -index[sh] = {11 20 24 33} -index[sha] = {11 20} -index[shak] = {11} -index[shake] = {11} -index[off] = {11} -index[no] = {12 14 17 18 21 22 28} -index[not] = {12 14 17 18 21 28} -index[ask] = {12} -index[pe] = {12 14 15 22 33} -index[peo] = {12 14 22 33} -index[peop] = {12 14 22 33} -index[peopl] = {12 14 22 33} -index[people] = {12 14 22 33} -index[they] = {12 13 15 31} -index[thi] = {12 30} -index[thin] = {12 30} -index[think] = {12 30} -index[thinki] = {12} -index[thinkin] = {12} -index[thinking] = {12} -index[abo] = {12 17 29} -index[abou] = {12 17 29} -index[about] = {12 17 29} -index[in] = {12 15 16 21 32} -index[cas] = {12} -index[case] = {12} -index[v] = {12 19 30} -index[ve] = {12 30} -index[ver] = {12 30} -index[very] = {12 30} -index[say] = {12} -index[un] = {13 17 25 32} -index[unt] = {13} -index[unto] = {13} -index[ot] = {13 15} -index[oth] = {13 15} -index[othe] = {13 15} -index[other] = {13 15} -index[others] = {13 15} -index[bef] = {13} -index[befo] = {13} -index[befor] = {13} -index[before] = {13} -index[ma] = {14 16 20 23 29 32} -index[man] = {14 23 29} -index[min] = {14} -index[minc] = {14} -index[mince] = {14} -index[word] = {14} -index[words] = {14} -index[ye] = {14 20 27} -index[yes] = {14} -index[bu] = {14} -index[but] = {14} -index[el] = {15} -index[elf] = {15} -index[elfs] = {15} -index[str] = {15} -index[stre] = {15} -index[stren] = {15} -index[streng] = {15} -index[strengt] = {15} -index[strength] = {15} -index[la] = {15} -index[lay] = {15} -index[per] = {15} -index[pers] = {15} -index[persu] = {15} -index[persua] = {15} -index[persuad] = {15} -index[persuadi] = {15} -index[persuadin] = {15} -index[persuading] = {15} -index[we] = {15} -index[wer] = {15} -index[were] = {15} -index[wea] = {15} -index[weak] = {15} -index[may] = {16} -index[liv] = {16 27} -index[live] = {16} -index[int] = {16 21} -index[inte] = {16} -index[inter] = {16} -index[intere] = {16} -index[interes] = {16} -index[interest] = {16} -index[interesti] = {16} -index[interestin] = {16} -index[interesting] = {16} -index[wi] = {17 23 33} -index[wit] = {17 23} -index[with] = {17 23} -index[him] = {17} -index[here] = {17 26} -index[ev] = {17} -index[eve] = {17} -index[even] = {17} -index[unc] = {17} -index[unce] = {17} -index[uncer] = {17} -index[uncert] = {17} -index[uncerta] = {17} -index[uncertai] = {17} -index[uncertain] = {17} -index[uncertaint] = {17} -index[uncertainty] = {17} -index[am] = {18 30} -index[tax] = {18} -index[taxe] = {18} -index[taxes] = {18} -index[onl] = {18 32} -index[only] = {18 32} -index[onc] = {18 28} -index[once] = {18 28} -index[tri] = {19} -index[trib] = {19} -index[triba] = {19} -index[tribal] = {19} -index[my] = {19 30} -index[myt] = {19} -index[myth] = {19} -index[myths] = {19} -index[tru] = {19 20 24} -index[true] = {19} -index[f] = {19 20 22 28 29 30} -index[fo] = {19 22} -index[for] = {19 22} -index[gi] = {19} -index[giv] = {19} -index[give] = {19} -index[given] = {19} -index[va] = {19} -index[val] = {19} -index[valu] = {19} -index[value] = {19} -index[trut] = {20} -index[truth] = {20} -index[shal] = {20} -index[shall] = {20} -index[mak] = {20 32} -index[make] = {20 32} -index[fr] = {20} -index[fre] = {20} -index[fret] = {20} -index[lo] = {21 29 31} -index[loo] = {21} -index[look] = {21} -index[into] = {21} -index[aby] = {21} -index[abys] = {21} -index[abyss] = {21} -index[sup] = {21} -index[supp] = {21} -index[suppo] = {21} -index[suppos] = {21} -index[suppose] = {21} -index[supposed] = {21} -index[wav] = {21} -index[wave] = {21} -index[hav] = {22} -index[have] = {22} -index[us] = {22} -index[use] = {22} -index[who] = {22 26 31 32} -index[le] = {22 24} -index[lea] = {22} -index[lear] = {22} -index[learn] = {22} -index[learne] = {22} -index[learned] = {22} -index[lim] = {22} -index[limi] = {22} -index[limit] = {22} -index[limits] = {22} -index[po] = {22} -index[pos] = {22} -index[poss] = {22} -index[possi] = {22} -index[possib] = {22} -index[possibl] = {22} -index[possible] = {22} -index[sp] = {23} -index[spe] = {23} -index[spea] = {23} -index[speak] = {23} -index[so] = {23 28} -index[sof] = {23} -index[soft] = {23} -index[softl] = {23} -index[softly] = {23} -index[em] = {23} -index[emp] = {23} -index[empl] = {23} -index[emplo] = {23} -index[employ] = {23} -index[hu] = {23} -index[hug] = {23} -index[huge] = {23} -index[crow] = {23 33} -index[crowb] = {23} -index[crowba] = {23} -index[crowbar] = {23} -index[trul] = {24} -index[truly] = {24} -index[leo] = {24} -index[leop] = {24} -index[leopa] = {24} -index[leopar] = {24} -index[leopard] = {24} -index[can] = {24} -index[chang] = {24} -index[change] = {24} -index[sho] = {24 33} -index[shor] = {24} -index[short] = {24} -index[shorts] = {24} -index[di] = {25} -index[div] = {25} -index[divi] = {25} -index[divid] = {25} -index[divide] = {25} -index[by] = {25} -index[cu] = {25 31} -index[cuc] = {25} -index[cucu] = {25} -index[cucum] = {25} -index[cucumb] = {25} -index[cucumbe] = {25} -index[cucumber] = {25} -index[er] = {25} -index[err] = {25} -index[erro] = {25} -index[error] = {25} -index[pl] = {25} -index[ple] = {25} -index[plea] = {25} -index[pleas] = {25} -index[please] = {25} -index[re] = {25 32 33} -index[rei] = {25} -index[rein] = {25} -index[reins] = {25} -index[reinst] = {25} -index[reinsta] = {25} -index[reinstal] = {25} -index[reinstall] = {25} -index[uni] = {25 32} -index[univ] = {25} -index[unive] = {25} -index[univer] = {25} -index[univers] = {25} -index[universe] = {25} -index[reb] = {25} -index[rebo] = {25} -index[reboo] = {25} -index[reboot] = {25} -index[whoo] = {26} -index[whoop] = {26} -index[whoops] = {26} -index[com] = {26} -index[come] = {26} -index[comes] = {26} -index[che] = {26} -index[chee] = {26} -index[chees] = {26} -index[cheese] = {26} -index[br] = {27} -index[bri] = {27} -index[brin] = {27} -index[bring] = {27} -index[yer] = {27} -index[dead] = {27} -index[livi] = {27} -index[livin] = {27} -index[living] = {27} -index[1] = {28} -index[fu] = {28} -index[fun] = {28} -index[fung] = {28} -index[fungi] = {28} -index[ed] = {28} -index[edi] = {28} -index[edib] = {28} -index[edibl] = {28} -index[edible] = {28} -index[2] = {28} -index[som] = {28} -index[some] = {28} -index[mo] = {28 32} -index[mor] = {28} -index[more] = {28} -index[than] = {28} -index[lot] = {29 31} -index[fa] = {29} -index[far] = {29} -index[farm] = {29} -index[farmi] = {29} -index[farmin] = {29} -index[farming] = {29} -index[manu] = {29} -index[manur] = {29} -index[manure] = {29} -index[att] = {30} -index[atta] = {30} -index[attac] = {30} -index[attach] = {30} -index[attache] = {30} -index[attached] = {30} -index[fi] = {30} -index[fin] = {30} -index[fing] = {30} -index[finge] = {30} -index[finger] = {30} -index[fingers] = {30} -index[ther] = {31} -index[there] = {31} -index[men] = {31} -index[bec] = {31} -index[beca] = {31} -index[becam] = {31} -index[became] = {31} -index[cuz] = {31} -index[wu] = {31} -index[wuz] = {31} -index[sca] = {31} -index[scar] = {31} -index[scare] = {31} -index[scared] = {31} -index[tae] = {31} -index[ru] = {31} -index[run] = {31} -index[paw] = {32} -index[pawn] = {32} -index[pawns] = {32} -index[unit] = {32} -index[unite] = {32} -index[united] = {32} -index[talke] = {32} -index[talked] = {32} -index[roo] = {32} -index[rook] = {32} -index[rooks] = {32} -index[rou] = {32} -index[roun] = {32} -index[round] = {32} -index[whol] = {32} -index[whole] = {32} -index[bo] = {32} -index[boa] = {32} -index[boar] = {32} -index[board] = {32} -index[coul] = {32} -index[could] = {32} -index[couldv] = {32} -index[couldve] = {32} -index[bee] = {32} -index[been] = {32} -index[rep] = {32} -index[repu] = {32} -index[repub] = {32} -index[republ] = {32} -index[republi] = {32} -index[republic] = {32} -index[doz] = {32} -index[doze] = {32} -index[dozen] = {32} -index[mov] = {32} -index[move] = {32} -index[moves] = {32} -index[alw] = {33} -index[alwa] = {33} -index[alway] = {33} -index[always] = {33} -index[rem] = {33} -index[reme] = {33} -index[remem] = {33} -index[rememb] = {33} -index[remembe] = {33} -index[remember] = {33} -index[crowd] = {33} -index[ap] = {33} -index[app] = {33} -index[appl] = {33} -index[appla] = {33} -index[applau] = {33} -index[applaud] = {33} -index[applauds] = {33} -index[your] = {33} -index[cor] = {33} -index[coro] = {33} -index[coron] = {33} -index[corona] = {33} -index[coronat] = {33} -index[coronati] = {33} -index[coronatio] = {33} -index[coronation] = {33} -index[sam] = {33} -index[same] = {33} -index[wil] = {33} -index[will] = {33} -index[beh] = {33} -index[behe] = {33} -index[behea] = {33} -index[behead] = {33} -index[beheadi] = {33} -index[beheadin] = {33} -index[beheading] = {33} -index[show] = {33} - -Lowercase cache size: 260 -Lowercase cache hits: 112 - -Search for: "" returned 0 matches: - -Search for: "Rincewind" returned 0 matches: - -Search for: "Rincewind Twoflower Luggage" returned 0 matches: - -Search for: "the" returned 16 matches: - What heroes like best is themselves. - The sun rose slowly, as if wasn't sure it was worth all the effort - Darkness isn't the opposite of light, it's simply its absence - DROP THE SCYTHE, AND TURN AROUND SLOWLY - You do not ask people like that what they are thinking about in case they turn around very slowly and say 'You' - Do unto others before they do unto you - An elf's strength lay in persuading others they were weak - The Truth Shall Make Ye Fret - When you look into the abyss, it's not supposed to wave back - I have no use for people who have learned the limits of the possible - Truly, the leopard can change his shorts - +++Whoops! Here comes the cheese! +++ - I am very attached to my fingers, and I like to think of them as attached to me - There be a lot o' men who became heroes cuz they wuz too scared tae run - If only the pawns united, make talked the rooks round, the whole board could've been a republic in a dozen moves - Always remember that the crowd that applauds your coronation is the same crowd that will applaud your beheading. People like a show. - -Search for: "the is" returned 3 matches: - What heroes like best is themselves. - Darkness isn't the opposite of light, it's simply its absence - Always remember that the crowd that applauds your coronation is the same crowd that will applaud your beheading. People like a show. - -Search for: "THE IS" returned 3 matches: - What heroes like best is themselves. - Darkness isn't the opposite of light, it's simply its absence - Always remember that the crowd that applauds your coronation is the same crowd that will applaud your beheading. People like a show. - -Search for: "th i" returned 13 matches: - What heroes like best is themselves. - The sun rose slowly, as if wasn't sure it was worth all the effort - Darkness isn't the opposite of light, it's simply its absence - Thunder rolled ... it rolled a six - Gravity is a habit that is hard to shake off - You do not ask people like that what they are thinking about in case they turn around very slowly and say 'You' - An elf's strength lay in persuading others they were weak - WITH HIM HERE, EVEN UNCERTAINTY IS UNCERTAIN. AND I'M NOT SURE EVEN ABOUT THAT - When you look into the abyss, it's not supposed to wave back - I have no use for people who have learned the limits of the possible - I am very attached to my fingers, and I like to think of them as attached to me - If only the pawns united, make talked the rooks round, the whole board could've been a republic in a dozen moves - Always remember that the crowd that applauds your coronation is the same crowd that will applaud your beheading. People like a show. - -Search for: "th i even" returned 1 matches: - WITH HIM HERE, EVEN UNCERTAINTY IS UNCERTAIN. AND I'M NOT SURE EVEN ABOUT THAT - -Search for: "Th/i-eVEn" returned 1 matches: - WITH HIM HERE, EVEN UNCERTAINTY IS UNCERTAIN. AND I'M NOT SURE EVEN ABOUT THAT - -Search for: "whoops" returned 1 matches: - +++Whoops! Here comes the cheese! +++ - -Lowercase cache size: 269 -Lowercase cache hits: 121 - -Lowercasing: "" returned: "" -Lowercasing: "Rincewind" returned: "rincewind" -Lowercasing: "Rincewind Twoflower Luggage" returned: "rincewind twoflower luggage" -Lowercasing: "the" returned: "the" -Lowercasing: "the is" returned: "the is" -Lowercasing: "THE IS" returned: "the is" -Lowercasing: "th i" returned: "th i" -Lowercasing: "th i even" returned: "th i even" -Lowercasing: "Th/i-eVen" returned: "th i even" -Lowercasing: "whoops" returned: "whoops" -Lowercasing: "WHOOPS" returned: "whoops" -Lowercase cache size: 271 -Lowercase cache hits: 138 diff --git a/tests/src/filter.cpp b/tests/src/filter.cpp new file mode 100644 index 00000000..a2dfb97d --- /dev/null +++ b/tests/src/filter.cpp @@ -0,0 +1,98 @@ +/* + * Copyright 2014 (c) Anna Schumaker. + * Test the filtering code + */ + +#include +#include +#include + +#include +#include +#include + +enum action_t { ADD, LOWERCASE, SEARCH }; + +void add_text(const std::string &text) +{ + std::string lc = filter :: add(text, 0); + print("%s\n", lc.c_str()); +} + +void to_lowercase(const std::string &text) +{ + std::string lc = filter :: lowercase(text); + print("%s\n", lc.c_str()); +} + +void read_file(unsigned int n) +{ + File f("filter.txt", FILE_TYPE_DATA); + if (f.open(OPEN_READ)) { + for (unsigned int i = 0; i < n; i++) { + std::string text = f.getline(); + filter :: add(text, i); + } + f.close(); + } +} + +void do_search(const std::string &text) +{ + std::set res; + std::set::iterator it; + + filter :: search(text, res); + + it = res.begin(); + if (it == res.end()) + return; + + print("%u", *it); + for (it++; it != res.end(); it++) + print(" %u", *it); + print("\n"); +} + +int main(int argc, char **argv) +{ + char c; + unsigned int n; + action_t action = ADD; + + while ((c = getopt(argc, argv, "als:")) != -1) { + switch (c) { + case 'a': + action = ADD; + break; + case 'l': + action = LOWERCASE; + break; + case 's': + action = SEARCH; + n = atoi(optarg); + break; + } + } + + std::string text; + for (int i = optind; i < argc; i++) { + text += " "; + text += argv[i]; + } + + switch (action) { + case ADD: + add_text(text); + break; + case LOWERCASE: + to_lowercase(text); + break; + case SEARCH: + read_file(n); + do_search(text); + break; + } + + return 0; +}