ocarina/libsaria/format.cpp
Bryan Schumaker 1933689d4f Added copyright lines to everything
I probably should have done this earlier... oh well

Signed-off-by: Bryan Schumaker <bjschuma@gmail.com>
2012-02-19 17:17:24 -05:00

130 lines
2.7 KiB
C++

// Copyright (c) 2011 Bryan Schumaker.
#include <libsaria/format.h>
#include <libsaria/print.h>
#include <map>
#include <set>
using namespace std;
static map<string, set<string> > format_cache;
static map<string, string> lc_cache;
static unsigned int format_hits;
static unsigned int lc_hits;
void do_format(const string &text,
map<string, set<string> >::iterator &words,
map<string, string>::iterator &lc)
{
string word;
string lc_string;
set<string> word_set;
pair< map<string, set<string> >::iterator, bool > ret1;
pair< map<string, string>::iterator, bool > ret2;
char c, diff = 'a' - 'A';
for (unsigned int i = 0; i < text.size(); i++) {
c = text[i];
// Character already lower case
if ( (c >= 'a') && (c <= 'z') )
word += c;
// Convert uppercase to lowercase
else if ( (c >= 'A') && (c <= 'Z') )
word += (c + diff);
// Keep numbers
else if ( (c >= '0') && (c <= '9') )
word += c;
else {
// These characters indicate a new word
switch (c) {
case '-':
case '\\':
case '/':
case ',':
case ';':
case '(':
case ')':
case '_':
case '~':
case '+':
case '"':
case ' ':
if (word != "") {
word_set.insert(word);
if (lc_string == "")
lc_string = word;
else
lc_string += " " + word;
}
word = "";
break;
default:
break;
};
}
}
if (word != "") {
word_set.insert(word);
if (lc_string == "")
lc_string = word;
else
lc_string += " " + word;
}
ret1 = format_cache.insert( pair<string, set<string> >(text, word_set) );
ret2 = lc_cache.insert( pair<string, string>(text, lc_string) );
words = ret1.first;
lc = ret2.first;
}
void find_unique_words(const string &text,
map<string, set<string> >::iterator &words)
{
map<string, string>::iterator lc;
do_format(text, words, lc);
}
void find_lowercase(const string &text,
map<string, string>::iterator &lc)
{
map<string, set<string> >::iterator words;
do_format(text, words, lc);
}
namespace libsaria
{
set<string> *format_text(const string &text)
{
map<string, set<string> >::iterator it;
it = format_cache.find(text);
/* Not found in cache... */
if (it == format_cache.end())
find_unique_words(text, it);
else
format_hits++;
return &(it->second);
}
string *lowercase(const string &text)
{
map<string, string>::iterator it;
it = lc_cache.find(text);
/* Not found in cache */
if (it == lc_cache.end())
find_lowercase(text, it);
else
lc_hits++;
return &(it->second);
}
void print_format_stats()
{
println("Format cache hits: %u size: %u", format_hits, format_cache.size());
println("Lowercase cache hits: %u size: %u", lc_hits, lc_cache.size());
}
} /* Namespace: libsaria */