filter: Implement a "to lowercase" function
This function will convert the provided text to lowercase. This really isn't a filter function, but the filter already implements a lowercase cache that should be useful. Signed-off-by: Anna Schumaker <schumaker.anna@gmail.com>
This commit is contained in:
parent
acd8bd7b23
commit
ee100c0238
|
@ -415,7 +415,7 @@ Filter: (lib/filter.cpp)
|
|||
substrings, so take the intersection of all sets returned by
|
||||
the filter_index for a given substring.
|
||||
|
||||
const std::string & filter :: to_lowercase(const std::string &string);
|
||||
std::string filter :: to_lowercase(const std::string &string);
|
||||
Split the string into words following step 1 of "Parsing"
|
||||
(above). Assemble and return a result string using the lower
|
||||
case cache to convert each term to lowercase.
|
||||
|
|
|
@ -44,7 +44,7 @@ Filter: (lib/filter.cpp)
|
|||
substrings, so take the intersection of all sets returned by
|
||||
the filter_index for a given substring.
|
||||
|
||||
const std::string & filter :: to_lowercase(const std::string &string);
|
||||
std::string filter :: to_lowercase(const std::string &string);
|
||||
Split the string into words following step 1 of "Parsing"
|
||||
(above). Assemble and return a result string using the lower
|
||||
case cache to convert each term to lowercase.
|
||||
|
|
|
@ -11,11 +11,12 @@ namespace filter {
|
|||
|
||||
void add(const std::string &, unsigned int);
|
||||
void search(const std::string &, std::set<unsigned int> &);
|
||||
std::string to_lowercase(const std::string &);
|
||||
|
||||
void print_cache_stats();
|
||||
|
||||
#ifdef CONFIG_TEST
|
||||
Database<IndexEntry, const std::string> &get_index();
|
||||
Database<IndexEntry> &get_index();
|
||||
#endif /* CONFIG_TEST */
|
||||
|
||||
};
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include <map>
|
||||
#include <set>
|
||||
|
||||
static Database<IndexEntry, const std::string> filter_index("");
|
||||
static Database<IndexEntry> filter_index("");
|
||||
static std::map<std::string, std::string> lowercase_cache;
|
||||
static unsigned int lowercase_cache_hits = 0;
|
||||
|
||||
|
@ -136,6 +136,22 @@ void filter :: search(const std::string &text, std::set<unsigned int> &res)
|
|||
find_intersection(*it, res);
|
||||
}
|
||||
|
||||
std::string filter :: to_lowercase(const std::string &text)
|
||||
{
|
||||
std::string res = "";
|
||||
std::list<std::string> parsed;
|
||||
std::list<std::string>::iterator it;
|
||||
|
||||
parse_text(text, parsed);
|
||||
for (it = parsed.begin(); it != parsed.end(); it++) {
|
||||
if (it != parsed.begin())
|
||||
res += " ";
|
||||
res += *it;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void filter :: print_cache_stats()
|
||||
{
|
||||
print("Lowercase cache size: %u\n", lowercase_cache.size());
|
||||
|
@ -143,7 +159,7 @@ void filter :: print_cache_stats()
|
|||
}
|
||||
|
||||
#ifdef CONFIG_TEST
|
||||
Database<IndexEntry, const std::string> &filter :: get_index()
|
||||
Database<IndexEntry> &filter :: get_index()
|
||||
{
|
||||
return filter_index;
|
||||
}
|
||||
|
|
|
@ -48,11 +48,11 @@ std::string quotes [] = {
|
|||
|
||||
static const unsigned int num_quotes = sizeof(quotes) / sizeof(std::string);
|
||||
|
||||
void print_index(Database<IndexEntry, const std::string> &db)
|
||||
void print_index(Database<IndexEntry> &db)
|
||||
{
|
||||
db.print_keys();
|
||||
for (unsigned int i = db.first(); i <= db.last(); i = db.next(i)) {
|
||||
print("index[%s] = ", db[i].primary_key().c_str());
|
||||
print("index[%s] = ", db[i].primary_key.c_str());
|
||||
db[i].print();
|
||||
print("\n");
|
||||
}
|
||||
|
@ -74,6 +74,12 @@ void test_search(const std::string &text)
|
|||
print("\n");
|
||||
}
|
||||
|
||||
void test_lowercase(const std::string &text)
|
||||
{
|
||||
std::string res = filter :: to_lowercase(text);
|
||||
print("Lowercasing: \"%s\" returned: \"%s\"\n", text.c_str(), res.c_str());
|
||||
}
|
||||
|
||||
void test_0()
|
||||
{
|
||||
for (unsigned int i = 0; i < num_quotes; i++)
|
||||
|
@ -96,12 +102,30 @@ void test_1()
|
|||
test_search("Th/i-eVEn");
|
||||
test_search("whoops");
|
||||
filter :: print_cache_stats();
|
||||
print("\n");
|
||||
}
|
||||
|
||||
void test_2()
|
||||
{
|
||||
test_lowercase("");
|
||||
test_lowercase("Rincewind");
|
||||
test_lowercase("Rincewind Twoflower Luggage");
|
||||
test_lowercase("the");
|
||||
test_lowercase("the is");
|
||||
test_lowercase("THE IS");
|
||||
test_lowercase("th i");
|
||||
test_lowercase("th i even");
|
||||
test_lowercase("Th/i-eVen");
|
||||
test_lowercase("whoops");
|
||||
test_lowercase("WHOOPS");
|
||||
filter :: print_cache_stats();
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
test_0();
|
||||
test_1();
|
||||
test_2();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -741,3 +741,17 @@ Search for: "whoops" returned 1 matches:
|
|||
|
||||
Lowercase cache size: 269
|
||||
Lowercase cache hits: 121
|
||||
|
||||
Lowercasing: "" returned: ""
|
||||
Lowercasing: "Rincewind" returned: "rincewind"
|
||||
Lowercasing: "Rincewind Twoflower Luggage" returned: "rincewind twoflower luggage"
|
||||
Lowercasing: "the" returned: "the"
|
||||
Lowercasing: "the is" returned: "the is"
|
||||
Lowercasing: "THE IS" returned: "the is"
|
||||
Lowercasing: "th i" returned: "th i"
|
||||
Lowercasing: "th i even" returned: "th i even"
|
||||
Lowercasing: "Th/i-eVen" returned: "th i even"
|
||||
Lowercasing: "whoops" returned: "whoops"
|
||||
Lowercasing: "WHOOPS" returned: "whoops"
|
||||
Lowercase cache size: 271
|
||||
Lowercase cache hits: 138
|
||||
|
|
Loading…
Reference in New Issue