diff --git a/design.txt b/design.txt index 855b7ff3..e4a13c3d 100644 --- a/design.txt +++ b/design.txt @@ -415,7 +415,7 @@ Filter: (lib/filter.cpp) substrings, so take the intersection of all sets returned by the filter_index for a given substring. - const std::string & filter :: to_lowercase(const std::string &string); + std::string filter :: to_lowercase(const std::string &string); Split the string into words following step 1 of "Parsing" (above). Assemble and return a result string using the lower case cache to convert each term to lowercase. diff --git a/design/filter.txt b/design/filter.txt index 4f7100b9..45243582 100644 --- a/design/filter.txt +++ b/design/filter.txt @@ -44,7 +44,7 @@ Filter: (lib/filter.cpp) substrings, so take the intersection of all sets returned by the filter_index for a given substring. - const std::string & filter :: to_lowercase(const std::string &string); + std::string filter :: to_lowercase(const std::string &string); Split the string into words following step 1 of "Parsing" (above). Assemble and return a result string using the lower case cache to convert each term to lowercase. diff --git a/include/filter.h b/include/filter.h index 674fdaf2..a88e4dd1 100644 --- a/include/filter.h +++ b/include/filter.h @@ -11,11 +11,12 @@ namespace filter { void add(const std::string &, unsigned int); void search(const std::string &, std::set &); + std::string to_lowercase(const std::string &); void print_cache_stats(); #ifdef CONFIG_TEST - Database &get_index(); + Database &get_index(); #endif /* CONFIG_TEST */ }; diff --git a/lib/filter.cpp b/lib/filter.cpp index 3f8b49f7..2f4a57ea 100644 --- a/lib/filter.cpp +++ b/lib/filter.cpp @@ -10,7 +10,7 @@ #include #include -static Database filter_index(""); +static Database filter_index(""); static std::map lowercase_cache; static unsigned int lowercase_cache_hits = 0; @@ -136,6 +136,22 @@ void filter :: search(const std::string &text, std::set &res) find_intersection(*it, res); } +std::string filter :: to_lowercase(const std::string &text) +{ + std::string res = ""; + std::list parsed; + std::list::iterator it; + + parse_text(text, parsed); + for (it = parsed.begin(); it != parsed.end(); it++) { + if (it != parsed.begin()) + res += " "; + res += *it; + } + + return res; +} + void filter :: print_cache_stats() { print("Lowercase cache size: %u\n", lowercase_cache.size()); @@ -143,7 +159,7 @@ void filter :: print_cache_stats() } #ifdef CONFIG_TEST -Database &filter :: get_index() +Database &filter :: get_index() { return filter_index; } diff --git a/tests/filter/filter.cpp b/tests/filter/filter.cpp index 0a9f30ff..015e1670 100644 --- a/tests/filter/filter.cpp +++ b/tests/filter/filter.cpp @@ -48,11 +48,11 @@ std::string quotes [] = { static const unsigned int num_quotes = sizeof(quotes) / sizeof(std::string); -void print_index(Database &db) +void print_index(Database &db) { db.print_keys(); for (unsigned int i = db.first(); i <= db.last(); i = db.next(i)) { - print("index[%s] = ", db[i].primary_key().c_str()); + print("index[%s] = ", db[i].primary_key.c_str()); db[i].print(); print("\n"); } @@ -74,6 +74,12 @@ void test_search(const std::string &text) print("\n"); } +void test_lowercase(const std::string &text) +{ + std::string res = filter :: to_lowercase(text); + print("Lowercasing: \"%s\" returned: \"%s\"\n", text.c_str(), res.c_str()); +} + void test_0() { for (unsigned int i = 0; i < num_quotes; i++) @@ -96,12 +102,30 @@ void test_1() test_search("Th/i-eVEn"); test_search("whoops"); filter :: print_cache_stats(); + print("\n"); +} + +void test_2() +{ + test_lowercase(""); + test_lowercase("Rincewind"); + test_lowercase("Rincewind Twoflower Luggage"); + test_lowercase("the"); + test_lowercase("the is"); + test_lowercase("THE IS"); + test_lowercase("th i"); + test_lowercase("th i even"); + test_lowercase("Th/i-eVen"); + test_lowercase("whoops"); + test_lowercase("WHOOPS"); + filter :: print_cache_stats(); } int main(int argc, char **argv) { test_0(); test_1(); + test_2(); return 0; } diff --git a/tests/filter/filter.good b/tests/filter/filter.good index 3c4f9a7e..a2681624 100644 --- a/tests/filter/filter.good +++ b/tests/filter/filter.good @@ -741,3 +741,17 @@ Search for: "whoops" returned 1 matches: Lowercase cache size: 269 Lowercase cache hits: 121 + +Lowercasing: "" returned: "" +Lowercasing: "Rincewind" returned: "rincewind" +Lowercasing: "Rincewind Twoflower Luggage" returned: "rincewind twoflower luggage" +Lowercasing: "the" returned: "the" +Lowercasing: "the is" returned: "the is" +Lowercasing: "THE IS" returned: "the is" +Lowercasing: "th i" returned: "th i" +Lowercasing: "th i even" returned: "th i even" +Lowercasing: "Th/i-eVen" returned: "th i even" +Lowercasing: "whoops" returned: "whoops" +Lowercasing: "WHOOPS" returned: "whoops" +Lowercase cache size: 271 +Lowercase cache hits: 138