65 lines
1.2 KiB
Python
65 lines
1.2 KiB
Python
# Bryan Schumaker (8/10/2010)
|
|
|
|
ttable = None
|
|
get = None
|
|
update = None
|
|
translate = None
|
|
split = None
|
|
|
|
class Index(dict):
|
|
def __init__(self):
|
|
dict.__init__(self)
|
|
|
|
def setup(self):
|
|
import string
|
|
global ttable
|
|
global get
|
|
global update
|
|
global translate
|
|
global split
|
|
get = self.get
|
|
update = set.update
|
|
translate = unicode.translate
|
|
split = unicode.split
|
|
|
|
space = ord(" ")
|
|
stripc = u"\"#$%&'*+<=>@[]^`{|}~.?!"
|
|
splitc = u"-\/,:;()_~+"
|
|
upper = string.uppercase
|
|
lower = string.lowercase
|
|
|
|
ttable = dict((ord(c),None) for c in stripc)
|
|
splitt = dict((ord(c),space) for c in splitc)
|
|
lowert = dict((ord(c),ord(lower[i])) for i,c in enumerate(upper))
|
|
for t in (splitt, lowert):
|
|
for c in t:
|
|
ttable[c] = t[c]
|
|
|
|
def insert(self, id, tags):
|
|
global ttable
|
|
global get
|
|
global update
|
|
global translate
|
|
global split
|
|
idset = set([id])
|
|
|
|
if ttable == None:
|
|
self.setup()
|
|
|
|
for tag in tags:
|
|
words = split(translate(tag,ttable))
|
|
for word in words:
|
|
word_set = get(word,None)
|
|
if word_set == None:
|
|
self[word] = idset
|
|
else:
|
|
update(word_set, idset)
|
|
for l in word:
|
|
word_set = get(l, None)
|
|
if word_set == None:
|
|
self[word] = idset
|
|
else:
|
|
update(word_set, idset)
|
|
|
|
|