filter: Implement a "to lowercase" function

This function will convert the provided text to lowercase.  This really
isn't a filter function, but the filter already implements a lowercase
cache that should be useful.

Signed-off-by: Anna Schumaker <schumaker.anna@gmail.com>
This commit is contained in:
Anna Schumaker 2013-12-30 20:54:19 -05:00 committed by Anna Schumaker
parent acd8bd7b23
commit ee100c0238
6 changed files with 62 additions and 7 deletions

View File

@ -415,7 +415,7 @@ Filter: (lib/filter.cpp)
substrings, so take the intersection of all sets returned by
the filter_index for a given substring.
const std::string & filter :: to_lowercase(const std::string &string);
std::string filter :: to_lowercase(const std::string &string);
Split the string into words following step 1 of "Parsing"
(above). Assemble and return a result string using the lower
case cache to convert each term to lowercase.

View File

@ -44,7 +44,7 @@ Filter: (lib/filter.cpp)
substrings, so take the intersection of all sets returned by
the filter_index for a given substring.
const std::string & filter :: to_lowercase(const std::string &string);
std::string filter :: to_lowercase(const std::string &string);
Split the string into words following step 1 of "Parsing"
(above). Assemble and return a result string using the lower
case cache to convert each term to lowercase.

View File

@ -11,11 +11,12 @@ namespace filter {
void add(const std::string &, unsigned int);
void search(const std::string &, std::set<unsigned int> &);
std::string to_lowercase(const std::string &);
void print_cache_stats();
#ifdef CONFIG_TEST
Database<IndexEntry, const std::string> &get_index();
Database<IndexEntry> &get_index();
#endif /* CONFIG_TEST */
};

View File

@ -10,7 +10,7 @@
#include <map>
#include <set>
static Database<IndexEntry, const std::string> filter_index("");
static Database<IndexEntry> filter_index("");
static std::map<std::string, std::string> lowercase_cache;
static unsigned int lowercase_cache_hits = 0;
@ -136,6 +136,22 @@ void filter :: search(const std::string &text, std::set<unsigned int> &res)
find_intersection(*it, res);
}
std::string filter :: to_lowercase(const std::string &text)
{
std::string res = "";
std::list<std::string> parsed;
std::list<std::string>::iterator it;
parse_text(text, parsed);
for (it = parsed.begin(); it != parsed.end(); it++) {
if (it != parsed.begin())
res += " ";
res += *it;
}
return res;
}
void filter :: print_cache_stats()
{
print("Lowercase cache size: %u\n", lowercase_cache.size());
@ -143,7 +159,7 @@ void filter :: print_cache_stats()
}
#ifdef CONFIG_TEST
Database<IndexEntry, const std::string> &filter :: get_index()
Database<IndexEntry> &filter :: get_index()
{
return filter_index;
}

View File

@ -48,11 +48,11 @@ std::string quotes [] = {
static const unsigned int num_quotes = sizeof(quotes) / sizeof(std::string);
void print_index(Database<IndexEntry, const std::string> &db)
void print_index(Database<IndexEntry> &db)
{
db.print_keys();
for (unsigned int i = db.first(); i <= db.last(); i = db.next(i)) {
print("index[%s] = ", db[i].primary_key().c_str());
print("index[%s] = ", db[i].primary_key.c_str());
db[i].print();
print("\n");
}
@ -74,6 +74,12 @@ void test_search(const std::string &text)
print("\n");
}
void test_lowercase(const std::string &text)
{
std::string res = filter :: to_lowercase(text);
print("Lowercasing: \"%s\" returned: \"%s\"\n", text.c_str(), res.c_str());
}
void test_0()
{
for (unsigned int i = 0; i < num_quotes; i++)
@ -96,12 +102,30 @@ void test_1()
test_search("Th/i-eVEn");
test_search("whoops");
filter :: print_cache_stats();
print("\n");
}
void test_2()
{
test_lowercase("");
test_lowercase("Rincewind");
test_lowercase("Rincewind Twoflower Luggage");
test_lowercase("the");
test_lowercase("the is");
test_lowercase("THE IS");
test_lowercase("th i");
test_lowercase("th i even");
test_lowercase("Th/i-eVen");
test_lowercase("whoops");
test_lowercase("WHOOPS");
filter :: print_cache_stats();
}
int main(int argc, char **argv)
{
test_0();
test_1();
test_2();
return 0;
}

View File

@ -741,3 +741,17 @@ Search for: "whoops" returned 1 matches:
Lowercase cache size: 269
Lowercase cache hits: 121
Lowercasing: "" returned: ""
Lowercasing: "Rincewind" returned: "rincewind"
Lowercasing: "Rincewind Twoflower Luggage" returned: "rincewind twoflower luggage"
Lowercasing: "the" returned: "the"
Lowercasing: "the is" returned: "the is"
Lowercasing: "THE IS" returned: "the is"
Lowercasing: "th i" returned: "th i"
Lowercasing: "th i even" returned: "th i even"
Lowercasing: "Th/i-eVen" returned: "th i even"
Lowercasing: "whoops" returned: "whoops"
Lowercasing: "WHOOPS" returned: "whoops"
Lowercase cache size: 271
Lowercase cache hits: 138