ocarina/libsaria/collection/index.py

50 lines
1005 B
Python

# Bryan Schumaker (8/10/2010)
ttable = None
translate = None
class Index:
def __init__(self):
self.tokens = dict()
def setup(self):
import string
space = ord(" ")
strip = u"\"#$%&'*+<=>@[]^`{|}~.?!"
split = u"-\/,:;()_~+"
upper = string.uppercase
lower = string.lowercase
translate = string.translate
ttable = dict((ord(c),None) for c in strip)
splitt = dict((ord(c),space) for c in split)
lowert = dict((ord(c),ord(lower[i])) for i,c in enumerate(upper))
for t in (splitt, lowert):
for c in t:
ttable[c] = t[c]
def insert(self, id, tags):
global ttable
idset = set([id])
tokens = self.tokens
if ttable == None:
self.setup()
for tag in tags:
words = translate(tag,ttable).split()
for word in words:
set = tokens.get(word,None)
if set == None:
tokens[word] = idset
else:
set.update(idset)
for l in word:
set = tokens.get(l, None)
if set == None:
tokens[word] = idset
else:
set.update(idset)