ocarina/libsaria/collection/index.py

50 lines
1.0 KiB
Python

# Bryan Schumaker (8/10/2010)
translate = unicode.translate
split = unicode.split
space_ord = ord(" ")
stripc = u"\"#$%&'*+<=>@[]^`{|}~.?!"
splitc = u"-\/,:;()_~+"
ttable = None
def format_once(text):
import string
global ttable
upper = string.uppercase
lower = string.lowercase
ttable = dict((ord(c),None) for c in stripc)
splitt = dict((ord(c),space_ord) for c in splitc)
lowert = dict((ord(c),ord(lower[i])) for i,c in enumerate(upper))
for t in (splitt, lowert):
for c in t:
ttable[c] = t[c]
format = format_rest
return format_rest(text)
def format_rest(text):
return text.translate(ttable).split()
format = format_once
class Index(dict):
def __init__(self):
dict.__init__(self)
def insert(self, tags, id):
get = self.get
idset = set([id])
for tag in tags:
for word in format(tag):
ids = get(word, None)
if ids == None:
self[word] = idset
else:
ids.update(idset)
def filter(self, text):
text = unicode(text)
search = format(text)
print search