libsaria: Format to a set of words
I don't care about the order of words for indexing, and I don't need duplicates either. Formatting to a unique set of words should make things a little faster.
This commit is contained in:
parent
abfd67cc84
commit
147a212809
|
@ -1,14 +1,14 @@
|
|||
#ifndef LIBSARIA_FORMAT_H
|
||||
#define LIBSARIA_FORMAT_H
|
||||
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
namespace libsaria
|
||||
{
|
||||
|
||||
list<string> *format_text(const string &);
|
||||
set<string> *format_text(const string &);
|
||||
void print_format_stats();
|
||||
|
||||
}
|
||||
|
|
|
@ -3,18 +3,18 @@
|
|||
#include <libsaria/print.h>
|
||||
|
||||
#include <map>
|
||||
#include <list>
|
||||
#include <set>
|
||||
using namespace std;
|
||||
|
||||
static map<string, list<string> > format_cache;
|
||||
static map<string, set<string> > format_cache;
|
||||
static unsigned int hits;
|
||||
static unsigned int misses;
|
||||
|
||||
list<string> *do_format(const string &text)
|
||||
set<string> *do_format(const string &text)
|
||||
{
|
||||
string word;
|
||||
list<string> word_list;
|
||||
pair< map<string, list<string> >::iterator, bool > ret;
|
||||
set<string> word_set;
|
||||
pair< map<string, set<string> >::iterator, bool > ret;
|
||||
char c, diff = 'a' - 'A';
|
||||
|
||||
for (unsigned int i = 0; i < text.size(); i++) {
|
||||
|
@ -44,7 +44,7 @@ list<string> *do_format(const string &text)
|
|||
case '"':
|
||||
case ' ':
|
||||
if (word != "")
|
||||
word_list.push_back(word);
|
||||
word_set.insert(word);
|
||||
word = "";
|
||||
break;
|
||||
default:
|
||||
|
@ -54,18 +54,18 @@ list<string> *do_format(const string &text)
|
|||
}
|
||||
|
||||
if (word != "")
|
||||
word_list.push_back(word);
|
||||
word_set.insert(word);
|
||||
|
||||
ret = format_cache.insert( pair<string, list<string> >(text, word_list) );
|
||||
ret = format_cache.insert( pair<string, set<string> >(text, word_set) );
|
||||
return &(ret.first->second);
|
||||
}
|
||||
|
||||
namespace libsaria
|
||||
{
|
||||
|
||||
list<string> *format_text(const string &text)
|
||||
set<string> *format_text(const string &text)
|
||||
{
|
||||
map<string, list<string> >::iterator it;
|
||||
map<string, set<string> >::iterator it;
|
||||
it = format_cache.find(text);
|
||||
|
||||
/* Not found in cache... */
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#include "index.h"
|
||||
|
||||
set<sid_t> results;
|
||||
list<string> *cur_terms;
|
||||
set<string> *cur_terms;
|
||||
static bool filtered = false;
|
||||
|
||||
/*
|
||||
|
@ -33,7 +33,7 @@ static void inplace_intersect(set<sid_t> *ids)
|
|||
|
||||
}
|
||||
|
||||
static inline void search_index(string &term,
|
||||
static inline void search_index(const string &term,
|
||||
map<string, set<sid_t> > *index,
|
||||
set<ino_t> *found)
|
||||
{
|
||||
|
@ -43,17 +43,17 @@ static inline void search_index(string &term,
|
|||
found->insert(it->second.begin(), it->second.end());
|
||||
}
|
||||
|
||||
static inline void search_indexes(string &term, set<ino_t> *found)
|
||||
static inline void search_indexes(const string &term, set<ino_t> *found)
|
||||
{
|
||||
search_index(term, &artist_index, found);
|
||||
search_index(term, &album_index, found);
|
||||
search_index(term, &title_index, found);
|
||||
}
|
||||
|
||||
static void do_filter(list<string> *terms)
|
||||
static void do_filter(set<string> *terms)
|
||||
{
|
||||
set<ino_t> found;
|
||||
list<string>::iterator it;
|
||||
set<string>::iterator it;
|
||||
|
||||
for (it = terms->begin(); it != terms->end(); it++) {
|
||||
found.clear();
|
||||
|
|
|
@ -17,7 +17,7 @@ static map<string, set<string> > substr_cache;
|
|||
unsigned int hits;
|
||||
unsigned int misses;
|
||||
|
||||
static set<string> *gen_substrs(string &word)
|
||||
static set<string> *gen_substrs(const string &word)
|
||||
{
|
||||
string substr, key;
|
||||
set<string> substrs;
|
||||
|
@ -35,7 +35,7 @@ static set<string> *gen_substrs(string &word)
|
|||
return &(ret.first->second);
|
||||
}
|
||||
|
||||
static set<string> *find_substrs(string &word)
|
||||
static set<string> *find_substrs(const string &word)
|
||||
{
|
||||
map<string, set<string> >::iterator it;
|
||||
it = substr_cache.find(word);
|
||||
|
@ -63,7 +63,7 @@ static void add_to_index(sid_t &songid, string key,
|
|||
}
|
||||
}
|
||||
|
||||
static void index_word(sid_t &songid, string &word,
|
||||
static void index_word(sid_t &songid, const string &word,
|
||||
map<string, set<sid_t> > *index)
|
||||
{
|
||||
set<string> *substrs = find_substrs(word);
|
||||
|
@ -75,11 +75,11 @@ static void index_word(sid_t &songid, string &word,
|
|||
static void index_tag(sid_t &songid, const string &tag,
|
||||
map<string, set<sid_t> >*index)
|
||||
{
|
||||
list<string> *word_list;
|
||||
list<string>::iterator it;
|
||||
set<string> *word_set;
|
||||
set<string>::iterator it;
|
||||
|
||||
word_list = libsaria::format_text(tag);
|
||||
for (it = word_list->begin(); it != word_list->end(); it++)
|
||||
word_set = libsaria::format_text(tag);
|
||||
for (it = word_set->begin(); it != word_set->end(); it++)
|
||||
index_word(songid, *it, index);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue