2014-09-16 08:09:27 -04:00
|
|
|
/**
|
|
|
|
* @file
|
2013-08-25 10:33:48 -04:00
|
|
|
* Copyright 2013 (c) Anna Schumaker.
|
|
|
|
*/
|
|
|
|
|
2014-06-05 10:19:22 -04:00
|
|
|
#include <core/filter.h>
|
|
|
|
#include <core/index.h>
|
2015-01-30 10:03:18 -05:00
|
|
|
#include <core/string.h>
|
2013-08-25 10:33:48 -04:00
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <list>
|
|
|
|
|
2014-03-12 20:23:34 -04:00
|
|
|
static Index filter_index("", false);
|
2013-08-25 10:33:48 -04:00
|
|
|
|
2014-03-15 20:44:07 -04:00
|
|
|
static void parse_text(const std::string &text, std::list<std::string> &ret)
|
2013-08-25 10:33:48 -04:00
|
|
|
{
|
|
|
|
std::string word;
|
|
|
|
char c;
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < text.size(); i++) {
|
|
|
|
c = text[i];
|
|
|
|
|
2014-03-15 20:44:07 -04:00
|
|
|
if ( (c >= 'a') && (c <= 'z') ) {
|
|
|
|
word += c;
|
|
|
|
continue;
|
|
|
|
} else if ( (c >= 'A') && (c <= 'Z') ) {
|
|
|
|
word += (c + ('a' - 'A'));
|
|
|
|
continue;
|
|
|
|
} else if ( (c >= '0') && (c <= '9') ) {
|
|
|
|
word += c;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-08-25 10:33:48 -04:00
|
|
|
switch (c) {
|
|
|
|
case '\\':
|
|
|
|
case '/':
|
|
|
|
case ',':
|
|
|
|
case ';':
|
|
|
|
case '(':
|
|
|
|
case ')':
|
|
|
|
case '_':
|
|
|
|
case '-':
|
|
|
|
case '~':
|
|
|
|
case '+':
|
|
|
|
case '"':
|
|
|
|
case ' ':
|
|
|
|
case ' ':
|
|
|
|
if (word != "") {
|
|
|
|
ret.push_back(word);
|
|
|
|
word = "";
|
|
|
|
}
|
|
|
|
default:
|
2014-03-15 20:44:07 -04:00
|
|
|
break;
|
2013-08-25 10:33:48 -04:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
if (word != "")
|
|
|
|
ret.push_back(word);
|
|
|
|
}
|
|
|
|
|
2015-01-30 10:03:18 -05:00
|
|
|
const std::string filter :: add(const std::string &text, unsigned int index)
|
2013-08-25 10:33:48 -04:00
|
|
|
{
|
2015-01-30 10:03:18 -05:00
|
|
|
const std::string lc = string :: lowercase(text);
|
|
|
|
size_t begin = 0, end;
|
2014-03-15 20:44:07 -04:00
|
|
|
|
2015-01-30 10:03:18 -05:00
|
|
|
for (end = 1; end <= lc.size(); end++) {
|
|
|
|
filter_index.insert(lc.substr(begin, end - begin), index);
|
|
|
|
if (lc[end] == ' ')
|
|
|
|
begin = ++end;
|
|
|
|
}
|
|
|
|
return lc;
|
2013-08-25 10:33:48 -04:00
|
|
|
}
|
|
|
|
|
2014-11-06 08:24:13 -05:00
|
|
|
static void do_set_intersection(IndexEntry *entry,
|
2014-08-12 09:59:00 -04:00
|
|
|
std::set<unsigned int> &res)
|
|
|
|
{
|
2014-11-06 08:24:13 -05:00
|
|
|
std::set<unsigned int> tmp;
|
|
|
|
|
|
|
|
set_intersection(entry->begin(), entry->end(), res.begin(), res.end(),
|
|
|
|
std::inserter<std::set<unsigned int> >(tmp, tmp.begin()));
|
|
|
|
res.swap(tmp);
|
2014-08-12 09:59:00 -04:00
|
|
|
}
|
|
|
|
|
2013-08-25 10:33:48 -04:00
|
|
|
static void find_intersection(std::string &text, std::set<unsigned int> &res)
|
|
|
|
{
|
2014-08-12 09:59:00 -04:00
|
|
|
IndexEntry *it = filter_index.find(text);
|
|
|
|
if (it)
|
2014-11-06 08:24:13 -05:00
|
|
|
do_set_intersection(it, res);
|
|
|
|
else
|
|
|
|
res.clear();
|
2013-08-25 10:33:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
void filter :: search(const std::string &text, std::set<unsigned int> &res)
|
|
|
|
{
|
|
|
|
std::list<std::string> parsed;
|
|
|
|
std::list<std::string>::iterator it;
|
2014-03-26 17:19:40 -04:00
|
|
|
IndexEntry *found;
|
2013-08-25 10:33:48 -04:00
|
|
|
|
2014-08-12 10:19:56 -04:00
|
|
|
res.clear();
|
2013-08-25 10:33:48 -04:00
|
|
|
parse_text(text, parsed);
|
2014-11-06 08:24:13 -05:00
|
|
|
|
2013-08-25 10:33:48 -04:00
|
|
|
if (parsed.size() == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
it = parsed.begin();
|
2014-03-26 17:19:40 -04:00
|
|
|
found = filter_index.find(*it);
|
|
|
|
if (!found)
|
2013-12-08 16:43:18 -05:00
|
|
|
return;
|
2014-11-06 08:24:13 -05:00
|
|
|
std::copy(found->begin(), found->end(), std::inserter(res, res.begin()));
|
2013-08-25 10:33:48 -04:00
|
|
|
|
|
|
|
for (it++; it != parsed.end(); it++)
|
|
|
|
find_intersection(*it, res);
|
|
|
|
}
|