libsaria: Cache words generated from strings

I now look up a list of words from a map (if it exists), rather than
having to iterate over the same string potentially many times.
This commit is contained in:
Bryan Schumaker 2011-12-27 18:48:57 -05:00
parent 6eaa8f944c
commit 664ad0dad2
3 changed files with 74 additions and 51 deletions

View File

@ -8,7 +8,7 @@ using namespace std;
namespace libsaria
{
void format_text(string &, list<string> &);
list<string> *format_text(string &);
}

View File

@ -1,52 +1,75 @@
#include <libsaria/format.h>
#include <map>
#include <list>
using namespace std;
static map<string, list<string> > format_cache;
list<string> *do_format(string &text)
{
string word;
list<string> word_list;
pair< map<string, list<string> >::iterator, bool > ret;
char c, diff = 'a' - 'A';
for (unsigned int i = 0; i < text.size(); i++) {
c = text[i];
// Character already lower case
if ( (c >= 'a') && (c <= 'z') )
word += c;
// Convert uppercase to lowercase
else if ( (c >= 'A') && (c <= 'Z') )
word += (c + diff);
// Keep numbers
else if ( (c >= '0') && (c <= '9') )
word += c;
else {
// These characters indicate a new word
switch (c) {
case '-':
case '\\':
case '/':
case ',':
case ';':
case '(':
case ')':
case '_':
case '~':
case '+':
case '"':
case ' ':
if (word != "")
word_list.push_back(word);
word = "";
break;
default:
break;
};
}
}
if (word != "")
word_list.push_back(word);
ret = format_cache.insert( pair<string, list<string> >(text, word_list) );
return &(ret.first->second);
}
namespace libsaria
{
void format_text(string &text, list<string> &word_list)
list<string> *format_text(string &text)
{
string word;
char c, diff = 'a' - 'A';
map<string, list<string> >::iterator it;
it = format_cache.find(text);
for (unsigned int i = 0; i < text.size(); i++) {
c = text[i];
// Character already lower case
if ( (c >= 'a') && (c <= 'z') )
word += c;
// Convert uppercase to lowercase
else if ( (c >= 'A') && (c <= 'Z') )
word += (c + diff);
// Keep numbers
else if ( (c >= '0') && (c <= '9') )
word += c;
else {
// These characters indicate a new word
switch (c) {
case '-':
case '\\':
case '/':
case ',':
case ';':
case '(':
case ')':
case '_':
case '~':
case '+':
case '"':
case ' ':
if (word != "")
word_list.push_back(word);
word = "";
break;
default:
break;
};
}
}
if (word != "")
word_list.push_back(word);
/* Not found in cache... */
if (it == format_cache.end())
return do_format(text);
else
return &(it->second);
}
} /* Namespace: libsaria */

View File

@ -46,10 +46,10 @@ static void index_word(sid_t &inode, string &word)
static void index_tag(sid_t &inode, string tag)
{
list<string> word_list;
list<string> *word_list;
list<string>::iterator it;
libsaria::format_text(tag, word_list);
for (it = word_list.begin(); it != word_list.end(); it++)
word_list = libsaria::format_text(tag);
for (it = word_list->begin(); it != word_list->end(); it++)
index_word(inode, *it);
}
@ -120,12 +120,12 @@ static void inplace_intersect(set<sid_t> *inodes)
}
static void do_filter(list<string> &terms)
static void do_filter(list<string> *terms)
{
list<string>::iterator it;
map<string, set<sid_t> >::iterator index_iter;
for (it = terms.begin(); it != terms.end(); it++) {
for (it = terms->begin(); it != terms->end(); it++) {
index_iter = substr_index.find(*it);
/*
* Key not found means we don't need to filter anymore
@ -139,7 +139,7 @@ static void do_filter(list<string> &terms)
* This is the first result, so the result set is empty.
* Taking an intersection will always give us an empty set
*/
if (it == terms.begin())
if (it == terms->begin())
results = index_iter->second;
else
inplace_intersect(&index_iter->second);
@ -162,12 +162,12 @@ namespace libsaria
void library::filter(string &text)
{
list<string> key_list;
list<string> *key_list;
format_text(text, key_list);
key_list = format_text(text);
results.clear();
if (key_list.size() == 0)
if (key_list->size() == 0)
filtered = false;
else {
do_filter(key_list);