libsaria: Format to a set of words

I don't care about the order of words for indexing, and I don't need
duplicates either.  Formatting to a unique set of words should make
things a little faster.
This commit is contained in:
Bryan Schumaker 2011-12-28 10:15:08 -05:00
parent abfd67cc84
commit 147a212809
4 changed files with 24 additions and 24 deletions

View File

@ -1,14 +1,14 @@
#ifndef LIBSARIA_FORMAT_H
#define LIBSARIA_FORMAT_H
#include <list>
#include <set>
#include <string>
using namespace std;
namespace libsaria
{
list<string> *format_text(const string &);
set<string> *format_text(const string &);
void print_format_stats();
}

View File

@ -3,18 +3,18 @@
#include <libsaria/print.h>
#include <map>
#include <list>
#include <set>
using namespace std;
static map<string, list<string> > format_cache;
static map<string, set<string> > format_cache;
static unsigned int hits;
static unsigned int misses;
list<string> *do_format(const string &text)
set<string> *do_format(const string &text)
{
string word;
list<string> word_list;
pair< map<string, list<string> >::iterator, bool > ret;
set<string> word_set;
pair< map<string, set<string> >::iterator, bool > ret;
char c, diff = 'a' - 'A';
for (unsigned int i = 0; i < text.size(); i++) {
@ -44,7 +44,7 @@ list<string> *do_format(const string &text)
case '"':
case ' ':
if (word != "")
word_list.push_back(word);
word_set.insert(word);
word = "";
break;
default:
@ -54,18 +54,18 @@ list<string> *do_format(const string &text)
}
if (word != "")
word_list.push_back(word);
word_set.insert(word);
ret = format_cache.insert( pair<string, list<string> >(text, word_list) );
ret = format_cache.insert( pair<string, set<string> >(text, word_set) );
return &(ret.first->second);
}
namespace libsaria
{
list<string> *format_text(const string &text)
set<string> *format_text(const string &text)
{
map<string, list<string> >::iterator it;
map<string, set<string> >::iterator it;
it = format_cache.find(text);
/* Not found in cache... */

View File

@ -5,7 +5,7 @@
#include "index.h"
set<sid_t> results;
list<string> *cur_terms;
set<string> *cur_terms;
static bool filtered = false;
/*
@ -33,7 +33,7 @@ static void inplace_intersect(set<sid_t> *ids)
}
static inline void search_index(string &term,
static inline void search_index(const string &term,
map<string, set<sid_t> > *index,
set<ino_t> *found)
{
@ -43,17 +43,17 @@ static inline void search_index(string &term,
found->insert(it->second.begin(), it->second.end());
}
static inline void search_indexes(string &term, set<ino_t> *found)
static inline void search_indexes(const string &term, set<ino_t> *found)
{
search_index(term, &artist_index, found);
search_index(term, &album_index, found);
search_index(term, &title_index, found);
}
static void do_filter(list<string> *terms)
static void do_filter(set<string> *terms)
{
set<ino_t> found;
list<string>::iterator it;
set<string>::iterator it;
for (it = terms->begin(); it != terms->end(); it++) {
found.clear();

View File

@ -17,7 +17,7 @@ static map<string, set<string> > substr_cache;
unsigned int hits;
unsigned int misses;
static set<string> *gen_substrs(string &word)
static set<string> *gen_substrs(const string &word)
{
string substr, key;
set<string> substrs;
@ -35,7 +35,7 @@ static set<string> *gen_substrs(string &word)
return &(ret.first->second);
}
static set<string> *find_substrs(string &word)
static set<string> *find_substrs(const string &word)
{
map<string, set<string> >::iterator it;
it = substr_cache.find(word);
@ -63,7 +63,7 @@ static void add_to_index(sid_t &songid, string key,
}
}
static void index_word(sid_t &songid, string &word,
static void index_word(sid_t &songid, const string &word,
map<string, set<sid_t> > *index)
{
set<string> *substrs = find_substrs(word);
@ -75,11 +75,11 @@ static void index_word(sid_t &songid, string &word,
static void index_tag(sid_t &songid, const string &tag,
map<string, set<sid_t> >*index)
{
list<string> *word_list;
list<string>::iterator it;
set<string> *word_set;
set<string>::iterator it;
word_list = libsaria::format_text(tag);
for (it = word_list->begin(); it != word_list->end(); it++)
word_set = libsaria::format_text(tag);
for (it = word_set->begin(); it != word_set->end(); it++)
index_word(songid, *it, index);
}