ocarina/lib/filter.cpp

160 lines
3.2 KiB
C++

/*
* Copyright 2013 (c) Anna Schumaker.
*/
#include <filter.h>
#include <index.h>
#include <print.h>
#include <algorithm>
#include <list>
#include <map>
static Index filter_index("", false);
static std::map<std::string, std::string> lowercase_cache;
static unsigned int lowercase_cache_hits = 0;
static void split_text(const std::string &text, std::list<std::string> &ret)
{
std::string word;
char c;
for (unsigned int i = 0; i < text.size(); i++) {
c = text[i];
switch (c) {
case '\\':
case '/':
case ',':
case ';':
case '(':
case ')':
case '_':
case '-':
case '~':
case '+':
case '"':
case ' ':
case ' ':
if (word != "") {
ret.push_back(word);
word = "";
}
break;
default:
word += c;
};
}
if (word != "")
ret.push_back(word);
}
static void lower_text(const std::string &text, std::list<std::string> &ret)
{
char c;
std::string word;
std::map<std::string, std::string>::iterator it = lowercase_cache.find(text);
if (it != lowercase_cache.end()) {
lowercase_cache_hits++;
ret.push_back(it->second);
return;
}
for (unsigned int i = 0; i < text.size(); i++) {
c = text[i];
if ( (c >= 'a') && (c <= 'z') )
word += c;
else if ( (c >= 'A') && (c <= 'Z') )
word += (c + ('a' - 'A'));
else if ( (c >= '0') && (c <= '9') )
word += c;
}
lowercase_cache[text] = word;
ret.push_back(word);
}
static void parse_text(const std::string &text, std::list<std::string> &ret)
{
std::list<std::string> split;
std::list<std::string>::iterator it;
split_text(text, split);
for (it = split.begin(); it != split.end(); it++)
lower_text(*it, ret);
}
static void add_substrings(const std::string &text, unsigned int track_id)
{
std::string substr;
for (unsigned int i = 1; i <= text.size(); i++) {
substr = text.substr(0, i);
filter_index.insert(substr, track_id);
}
}
void filter :: add(const std::string &text, unsigned int track_id)
{
std::list<std::string> parsed;
std::list<std::string>::iterator it;
parse_text(text, parsed);
for (it = parsed.begin(); it != parsed.end(); it++)
add_substrings(*it, track_id);
}
static void find_intersection(std::string &text, std::set<unsigned int> &res)
{
Index::iterator it = filter_index.find(text);
std::set<unsigned int> tmp;
set_intersection(it->values.begin(), it->values.end(),
res.begin(), res.end(),
std::inserter<std::set<unsigned int> >(tmp, tmp.begin()));
res.swap(tmp);
}
void filter :: search(const std::string &text, std::set<unsigned int> &res)
{
std::list<std::string> parsed;
std::list<std::string>::iterator it;
parse_text(text, parsed);
if (parsed.size() == 0)
return;
it = parsed.begin();
try {
res = filter_index.find(*it)->values;
} catch (...) {
return;
}
for (it++; it != parsed.end(); it++)
find_intersection(*it, res);
}
std::string filter :: to_lowercase(const std::string &text)
{
std::string res = "";
std::list<std::string> parsed;
std::list<std::string>::iterator it;
parse_text(text, parsed);
for (it = parsed.begin(); it != parsed.end(); it++) {
if (it != parsed.begin())
res += " ";
res += *it;
}
return res;
}
void filter :: print_cache_stats()
{
print("Lowercase cache size: %u\n", lowercase_cache.size());
print("Lowercase cache hits: %u\n", lowercase_cache_hits);
}