libsaria: Cache words generated from strings
I now look up a list of words from a map (if it exists), rather than having to iterate over the same string potentially many times.
This commit is contained in:
parent
6eaa8f944c
commit
664ad0dad2
|
@ -8,7 +8,7 @@ using namespace std;
|
|||
namespace libsaria
|
||||
{
|
||||
|
||||
void format_text(string &, list<string> &);
|
||||
list<string> *format_text(string &);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -1,52 +1,75 @@
|
|||
|
||||
#include <libsaria/format.h>
|
||||
|
||||
#include <map>
|
||||
#include <list>
|
||||
using namespace std;
|
||||
|
||||
static map<string, list<string> > format_cache;
|
||||
|
||||
list<string> *do_format(string &text)
|
||||
{
|
||||
string word;
|
||||
list<string> word_list;
|
||||
pair< map<string, list<string> >::iterator, bool > ret;
|
||||
char c, diff = 'a' - 'A';
|
||||
|
||||
for (unsigned int i = 0; i < text.size(); i++) {
|
||||
c = text[i];
|
||||
// Character already lower case
|
||||
if ( (c >= 'a') && (c <= 'z') )
|
||||
word += c;
|
||||
// Convert uppercase to lowercase
|
||||
else if ( (c >= 'A') && (c <= 'Z') )
|
||||
word += (c + diff);
|
||||
// Keep numbers
|
||||
else if ( (c >= '0') && (c <= '9') )
|
||||
word += c;
|
||||
else {
|
||||
// These characters indicate a new word
|
||||
switch (c) {
|
||||
case '-':
|
||||
case '\\':
|
||||
case '/':
|
||||
case ',':
|
||||
case ';':
|
||||
case '(':
|
||||
case ')':
|
||||
case '_':
|
||||
case '~':
|
||||
case '+':
|
||||
case '"':
|
||||
case ' ':
|
||||
if (word != "")
|
||||
word_list.push_back(word);
|
||||
word = "";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (word != "")
|
||||
word_list.push_back(word);
|
||||
|
||||
ret = format_cache.insert( pair<string, list<string> >(text, word_list) );
|
||||
return &(ret.first->second);
|
||||
}
|
||||
|
||||
namespace libsaria
|
||||
{
|
||||
|
||||
void format_text(string &text, list<string> &word_list)
|
||||
list<string> *format_text(string &text)
|
||||
{
|
||||
string word;
|
||||
char c, diff = 'a' - 'A';
|
||||
map<string, list<string> >::iterator it;
|
||||
it = format_cache.find(text);
|
||||
|
||||
for (unsigned int i = 0; i < text.size(); i++) {
|
||||
c = text[i];
|
||||
// Character already lower case
|
||||
if ( (c >= 'a') && (c <= 'z') )
|
||||
word += c;
|
||||
// Convert uppercase to lowercase
|
||||
else if ( (c >= 'A') && (c <= 'Z') )
|
||||
word += (c + diff);
|
||||
// Keep numbers
|
||||
else if ( (c >= '0') && (c <= '9') )
|
||||
word += c;
|
||||
else {
|
||||
// These characters indicate a new word
|
||||
switch (c) {
|
||||
case '-':
|
||||
case '\\':
|
||||
case '/':
|
||||
case ',':
|
||||
case ';':
|
||||
case '(':
|
||||
case ')':
|
||||
case '_':
|
||||
case '~':
|
||||
case '+':
|
||||
case '"':
|
||||
case ' ':
|
||||
if (word != "")
|
||||
word_list.push_back(word);
|
||||
word = "";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (word != "")
|
||||
word_list.push_back(word);
|
||||
/* Not found in cache... */
|
||||
if (it == format_cache.end())
|
||||
return do_format(text);
|
||||
else
|
||||
return &(it->second);
|
||||
}
|
||||
|
||||
} /* Namespace: libsaria */
|
||||
|
|
|
@ -46,10 +46,10 @@ static void index_word(sid_t &inode, string &word)
|
|||
|
||||
static void index_tag(sid_t &inode, string tag)
|
||||
{
|
||||
list<string> word_list;
|
||||
list<string> *word_list;
|
||||
list<string>::iterator it;
|
||||
libsaria::format_text(tag, word_list);
|
||||
for (it = word_list.begin(); it != word_list.end(); it++)
|
||||
word_list = libsaria::format_text(tag);
|
||||
for (it = word_list->begin(); it != word_list->end(); it++)
|
||||
index_word(inode, *it);
|
||||
}
|
||||
|
||||
|
@ -120,12 +120,12 @@ static void inplace_intersect(set<sid_t> *inodes)
|
|||
|
||||
}
|
||||
|
||||
static void do_filter(list<string> &terms)
|
||||
static void do_filter(list<string> *terms)
|
||||
{
|
||||
list<string>::iterator it;
|
||||
map<string, set<sid_t> >::iterator index_iter;
|
||||
|
||||
for (it = terms.begin(); it != terms.end(); it++) {
|
||||
for (it = terms->begin(); it != terms->end(); it++) {
|
||||
index_iter = substr_index.find(*it);
|
||||
/*
|
||||
* Key not found means we don't need to filter anymore
|
||||
|
@ -139,7 +139,7 @@ static void do_filter(list<string> &terms)
|
|||
* This is the first result, so the result set is empty.
|
||||
* Taking an intersection will always give us an empty set
|
||||
*/
|
||||
if (it == terms.begin())
|
||||
if (it == terms->begin())
|
||||
results = index_iter->second;
|
||||
else
|
||||
inplace_intersect(&index_iter->second);
|
||||
|
@ -162,12 +162,12 @@ namespace libsaria
|
|||
|
||||
void library::filter(string &text)
|
||||
{
|
||||
list<string> key_list;
|
||||
list<string> *key_list;
|
||||
|
||||
format_text(text, key_list);
|
||||
key_list = format_text(text);
|
||||
results.clear();
|
||||
|
||||
if (key_list.size() == 0)
|
||||
if (key_list->size() == 0)
|
||||
filtered = false;
|
||||
else {
|
||||
do_filter(key_list);
|
||||
|
|
Loading…
Reference in New Issue