libsaria: Cache words generated from strings

I now look up a list of words from a map (if it exists), rather than having to iterate over the same string potentially many times.
2011-12-27 18:48:57 -05:00 · 2011-12-27 18:48:57 -05:00 · 664ad0dad2
parent 6eaa8f944c
commit 664ad0dad2
3 changed files with 74 additions and 51 deletions
--- a/include/libsaria/format.h
+++ b/include/libsaria/format.h
@ -8,7 +8,7 @@ using namespace std;
 namespace libsaria
 {

-	void format_text(string &, list<string> &);
+	list<string> *format_text(string &);

 }

--- a/libsaria/format.cpp
+++ b/libsaria/format.cpp
@ -1,52 +1,75 @@

 #include <libsaria/format.h>

+#include <map>
+#include <list>
+using namespace std;
+
+static map<string, list<string> > format_cache;
+
+list<string> *do_format(string &text)
+{
+	string word;
+	list<string> word_list;
+	pair< map<string, list<string> >::iterator, bool > ret;
+	char c, diff = 'a' - 'A';
+
+	for (unsigned int i = 0; i < text.size(); i++) {
+		c = text[i];
+		// Character already lower case
+		if ( (c >= 'a') && (c <= 'z') )
+			word += c;
+		// Convert uppercase to lowercase
+		else if ( (c >= 'A') && (c <= 'Z') )
+			word += (c + diff);
+		// Keep numbers
+		else if ( (c >= '0') && (c <= '9') )
+			word += c;
+		else {
+			// These characters indicate a new word
+			switch (c) {
+			case '-':
+			case '\\':
+			case '/':
+			case ',':
+			case ';':
+			case '(':
+			case ')':
+			case '_':
+			case '~':
+			case '+':
+			case '"':
+			case ' ':
+				if (word != "")
+					word_list.push_back(word);
+				word = "";
+				break;
+			default:
+				break;
+			};
+		}
+	}
+
+	if (word != "")
+		word_list.push_back(word);
+
+	ret = format_cache.insert( pair<string, list<string> >(text, word_list) );
+	return &(ret.first->second);
+}
+
 namespace libsaria
 {

-	void format_text(string &text, list<string> &word_list)
+	list<string> *format_text(string &text)
 	{
-		string word;
-		char c, diff = 'a' - 'A';
+		map<string, list<string> >::iterator it;
+		it = format_cache.find(text);

-		for (unsigned int i = 0; i < text.size(); i++) {
-			c = text[i];
-			// Character already lower case
-			if ( (c >= 'a') && (c <= 'z') )
-				word += c;
-			// Convert uppercase to lowercase
-			else if ( (c >= 'A') && (c <= 'Z') )
-				word += (c + diff);
-			// Keep numbers
-			else if ( (c >= '0') && (c <= '9') )
-				word += c;
-			else {
-				// These characters indicate a new word
-				switch (c) {
-				case '-':
-				case '\\':
-				case '/':
-				case ',':
-				case ';':
-				case '(':
-				case ')':
-				case '_':
-				case '~':
-				case '+':
-				case '"':
-				case ' ':
-					if (word != "")
-						word_list.push_back(word);
-					word = "";
-					break;
-				default:
-					break;
-				};
-			}
-		}
-
-		if (word != "")
-			word_list.push_back(word);
+		/* Not found in cache... */
+		if (it == format_cache.end())
+			return do_format(text);
+		else
+			return &(it->second);
 	}

 } /* Namespace: libsaria */
--- a/libsaria/library/index.cpp
+++ b/libsaria/library/index.cpp
@ -46,10 +46,10 @@ static void index_word(sid_t &inode, string &word)

 static void index_tag(sid_t &inode, string tag)
 {
-	list<string> word_list;
+	list<string> *word_list;
 	list<string>::iterator it;
-	libsaria::format_text(tag, word_list);
-	for (it = word_list.begin(); it != word_list.end(); it++)
+	word_list = libsaria::format_text(tag);
+	for (it = word_list->begin(); it != word_list->end(); it++)
 		index_word(inode, *it);
 }

@ -120,12 +120,12 @@ static void inplace_intersect(set<sid_t> *inodes)

 }

-static void do_filter(list<string> &terms)
+static void do_filter(list<string> *terms)
 {
 	list<string>::iterator it;
 	map<string, set<sid_t> >::iterator index_iter;

-	for (it = terms.begin(); it != terms.end(); it++) {
+	for (it = terms->begin(); it != terms->end(); it++) {
 		index_iter = substr_index.find(*it);
 		/*
 		 * Key not found means we don't need to filter anymore
@ -139,7 +139,7 @@ static void do_filter(list<string> &terms)
 		 * This is the first result, so the result set is empty.
 		 * Taking an intersection will always give us an empty set
 		 */
-		if (it == terms.begin())
+		if (it == terms->begin())
 			results = index_iter->second;
 		else
 			inplace_intersect(&index_iter->second);
@ -162,12 +162,12 @@ namespace libsaria

 	void library::filter(string &text)
 	{
-		list<string> key_list;
+		list<string> *key_list;

-		format_text(text, key_list);
+		key_list = format_text(text);
 		results.clear();

-		if (key_list.size() == 0)
+		if (key_list->size() == 0)
 			filtered = false;
 		else {
 			do_filter(key_list);