ocarina/libsaria/format.cpp

210 lines
4.6 KiB
C++

// Copyright (c) 2011 Bryan Schumaker.
#include <format.h>
#include <print.h>
#include <map>
#include <set>
#include <sstream>
using namespace std;
static map<string, set<string> > format_cache;
static map<string, set<string> > substr_cache;
static map<string, string> lc_cache;
static unsigned int format_hits = 0;
static unsigned int substr_hits = 0;
static unsigned int lc_hits = 0;
static const unsigned int TM_SECOND = 1;
static const unsigned int TM_MINUTE = 60;
static const unsigned int TM_HOUR = TM_MINUTE * 60;
static const unsigned int TM_DAY = TM_HOUR * 24;
void do_format(const string &text,
map<string, set<string> >::iterator &words,
map<string, string>::iterator &lc)
{
string word;
string lc_string;
set<string> word_set;
pair< map<string, set<string> >::iterator, bool > ret1;
pair< map<string, string>::iterator, bool > ret2;
char c, diff = 'a' - 'A';
for (unsigned int i = 0; i < text.size(); i++) {
c = text[i];
// Character already lower case
if ( (c >= 'a') && (c <= 'z') )
word += c;
// Convert uppercase to lowercase
else if ( (c >= 'A') && (c <= 'Z') )
word += (c + diff);
// Keep numbers
else if ( (c >= '0') && (c <= '9') )
word += c;
else {
// These characters indicate a new word
switch (c) {
case '-':
case '\\':
case '/':
case ',':
case ';':
case '(':
case ')':
case '_':
case '~':
case '+':
case '"':
case ' ':
if (word != "") {
word_set.insert(word);
if (lc_string == "")
lc_string = word;
else
lc_string += " " + word;
}
word = "";
break;
default:
break;
};
}
}
if (word != "") {
word_set.insert(word);
if (lc_string == "")
lc_string = word;
else
lc_string += " " + word;
}
ret1 = format_cache.insert( pair<string, set<string> >(text, word_set) );
ret2 = lc_cache.insert( pair<string, string>(text, lc_string) );
words = ret1.first;
lc = ret2.first;
}
void find_unique_words(const string &text,
map<string, set<string> >::iterator &words)
{
map<string, string>::iterator lc;
do_format(text, words, lc);
}
static set<string> *gen_substrs(const string &word)
{
string key;
set<string> substrs;
pair< map<string, set<string> >::iterator, bool > ret;
for (unsigned int i = 1; i <= word.size(); i++) {
key = word.substr(0, i);
substrs.insert(key);
}
ret = substr_cache.insert( pair<string, set<string> >(word, substrs));
return &(ret.first->second);
}
static set<string> *find_substrs(const string &word)
{
map<string, set<string> >::iterator it;
it = substr_cache.find(word);
if (it == substr_cache.end())
return gen_substrs(word);
else {
substr_hits++;
return &(it->second);
}
}
void find_lowercase(const string &text,
map<string, string>::iterator &lc)
{
map<string, set<string> >::iterator words;
do_format(text, words, lc);
}
unsigned int add_to_stream(stringstream &stream, const unsigned int factor,
string field, unsigned int length, unsigned int total)
{
unsigned int res = length / factor;
if (res == 0)
return 0;
if (total != 0)
stream << ", ";
stream << res << " " << field;
if (res > 1)
stream << "s";
return res * factor;;
}
namespace libsaria
{
set<string> *format_text(const string &text)
{
map<string, set<string> >::iterator it;
it = format_cache.find(text);
/* Not found in cache... */
if (it == format_cache.end())
find_unique_words(text, it);
else
format_hits++;
return &(it->second);
}
void format_substrs(const string &text, set<string> &res)
{
set<string> *words, *substrs;
set<string>::iterator it;
words = format_text(text);
for (it = words->begin(); it != words->end(); it++) {
substrs = find_substrs(*it);
res.insert(substrs->begin(), substrs->end());
}
}
string *lowercase(const string &text)
{
map<string, string>::iterator it;
it = lc_cache.find(text);
/* Not found in cache */
if (it == lc_cache.end())
find_lowercase(text, it);
else
lc_hits++;
return &(it->second);
}
string length_string(unsigned int len)
{
unsigned int tot;
stringstream stream;
if (len == 0)
return "";
tot = add_to_stream(stream, TM_DAY, "day", len, 0);
tot += add_to_stream(stream, TM_HOUR, "hour", len - tot, tot);
tot += add_to_stream(stream, TM_MINUTE, "minute", len - tot, tot);
add_to_stream(stream, TM_SECOND, "second", len - tot, tot);
return stream.str();
}
void print_format_stats()
{
println("Format cache hits: %u size: %u", format_hits, format_cache.size());
println("Substring cache hits: %u size: %u", substr_hits, substr_cache.size());
println("Lowercase cache hits: %u size: %u", lc_hits, lc_cache.size());
}
} /* Namespace: libsaria */