2014-09-16 08:09:27 -04:00
|
|
|
/**
|
|
|
|
* @file
|
2013-08-25 10:33:48 -04:00
|
|
|
* Copyright 2013 (c) Anna Schumaker.
|
|
|
|
*/
|
|
|
|
|
2014-06-05 10:19:22 -04:00
|
|
|
#include <core/filter.h>
|
|
|
|
#include <core/index.h>
|
2013-08-25 10:33:48 -04:00
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <list>
|
|
|
|
|
2014-03-12 20:23:34 -04:00
|
|
|
static Index filter_index("", false);
|
2013-08-25 10:33:48 -04:00
|
|
|
|
2014-03-15 20:44:07 -04:00
|
|
|
static void parse_text(const std::string &text, std::list<std::string> &ret)
|
2013-08-25 10:33:48 -04:00
|
|
|
{
|
|
|
|
std::string word;
|
|
|
|
char c;
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < text.size(); i++) {
|
|
|
|
c = text[i];
|
|
|
|
|
2014-03-15 20:44:07 -04:00
|
|
|
if ( (c >= 'a') && (c <= 'z') ) {
|
|
|
|
word += c;
|
|
|
|
continue;
|
|
|
|
} else if ( (c >= 'A') && (c <= 'Z') ) {
|
|
|
|
word += (c + ('a' - 'A'));
|
|
|
|
continue;
|
|
|
|
} else if ( (c >= '0') && (c <= '9') ) {
|
|
|
|
word += c;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-08-25 10:33:48 -04:00
|
|
|
switch (c) {
|
|
|
|
case '\\':
|
|
|
|
case '/':
|
|
|
|
case ',':
|
|
|
|
case ';':
|
|
|
|
case '(':
|
|
|
|
case ')':
|
|
|
|
case '_':
|
|
|
|
case '-':
|
|
|
|
case '~':
|
|
|
|
case '+':
|
|
|
|
case '"':
|
|
|
|
case ' ':
|
|
|
|
case ' ':
|
|
|
|
if (word != "") {
|
|
|
|
ret.push_back(word);
|
|
|
|
word = "";
|
|
|
|
}
|
|
|
|
default:
|
2014-03-15 20:44:07 -04:00
|
|
|
break;
|
2013-08-25 10:33:48 -04:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
if (word != "")
|
|
|
|
ret.push_back(word);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void add_substrings(const std::string &text, unsigned int track_id)
|
|
|
|
{
|
2013-12-01 22:31:35 -05:00
|
|
|
std::string substr;
|
|
|
|
for (unsigned int i = 1; i <= text.size(); i++) {
|
|
|
|
substr = text.substr(0, i);
|
2014-03-12 20:23:34 -04:00
|
|
|
filter_index.insert(substr, track_id);
|
2013-12-01 22:31:35 -05:00
|
|
|
}
|
2013-08-25 10:33:48 -04:00
|
|
|
}
|
|
|
|
|
2014-03-15 20:44:07 -04:00
|
|
|
static std::string reassemble_text(std::list<std::string> text)
|
|
|
|
{
|
|
|
|
std::string res;
|
|
|
|
std::list<std::string>::iterator it = text.begin();
|
|
|
|
|
|
|
|
if (it == text.end())
|
|
|
|
return res;
|
|
|
|
|
|
|
|
res += *it;
|
|
|
|
for (it++; it != text.end(); it++)
|
|
|
|
res += " " + *it;
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string filter :: add(const std::string &text, unsigned int track_id)
|
2013-08-25 10:33:48 -04:00
|
|
|
{
|
|
|
|
std::list<std::string> parsed;
|
|
|
|
std::list<std::string>::iterator it;
|
|
|
|
|
|
|
|
parse_text(text, parsed);
|
|
|
|
for (it = parsed.begin(); it != parsed.end(); it++)
|
|
|
|
add_substrings(*it, track_id);
|
2014-03-15 20:44:07 -04:00
|
|
|
return reassemble_text(parsed);
|
2013-08-25 10:33:48 -04:00
|
|
|
}
|
|
|
|
|
2014-08-12 09:59:00 -04:00
|
|
|
static void do_set_intersection(std::set<unsigned int> &a,
|
|
|
|
std::set<unsigned int> &b,
|
|
|
|
std::set<unsigned int> &res)
|
|
|
|
{
|
|
|
|
set_intersection(a.begin(), a.end(), b.begin(), b.end(),
|
|
|
|
std::inserter<std::set<unsigned int> >(res, res.begin()));
|
|
|
|
}
|
|
|
|
|
2013-08-25 10:33:48 -04:00
|
|
|
static void find_intersection(std::string &text, std::set<unsigned int> &res)
|
|
|
|
{
|
|
|
|
std::set<unsigned int> tmp;
|
2014-08-12 09:59:00 -04:00
|
|
|
IndexEntry *it = filter_index.find(text);
|
2013-08-25 10:33:48 -04:00
|
|
|
|
2014-08-12 09:59:00 -04:00
|
|
|
if (it)
|
2014-11-04 08:21:14 -05:00
|
|
|
do_set_intersection(res, it->_values, tmp);
|
2013-08-25 10:33:48 -04:00
|
|
|
res.swap(tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
void filter :: search(const std::string &text, std::set<unsigned int> &res)
|
|
|
|
{
|
|
|
|
std::list<std::string> parsed;
|
|
|
|
std::list<std::string>::iterator it;
|
2014-03-26 17:19:40 -04:00
|
|
|
IndexEntry *found;
|
2013-08-25 10:33:48 -04:00
|
|
|
|
2014-08-12 10:19:56 -04:00
|
|
|
res.clear();
|
2013-08-25 10:33:48 -04:00
|
|
|
parse_text(text, parsed);
|
|
|
|
if (parsed.size() == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
it = parsed.begin();
|
2014-03-26 17:19:40 -04:00
|
|
|
found = filter_index.find(*it);
|
|
|
|
if (!found)
|
2013-12-08 16:43:18 -05:00
|
|
|
return;
|
2014-11-04 08:21:14 -05:00
|
|
|
res = found->_values;
|
2013-08-25 10:33:48 -04:00
|
|
|
|
|
|
|
for (it++; it != parsed.end(); it++)
|
|
|
|
find_intersection(*it, res);
|
|
|
|
}
|
|
|
|
|
2014-03-15 20:44:07 -04:00
|
|
|
std::string filter :: lowercase(const std::string &text)
|
2013-12-30 20:54:19 -05:00
|
|
|
{
|
|
|
|
std::list<std::string> parsed;
|
|
|
|
parse_text(text, parsed);
|
2014-03-15 20:44:07 -04:00
|
|
|
return reassemble_text(parsed);
|
2013-08-25 10:33:48 -04:00
|
|
|
}
|