# Bryan Schumaker (8/10/2010) ttable = None get = None update = None translate = None split = None class Index(dict): def __init__(self): dict.__init__(self) def setup(self): import string global ttable global get global update global translate global split get = self.get update = set.update translate = unicode.translate split = unicode.split space = ord(" ") stripc = u"\"#$%&'*+<=>@[]^`{|}~.?!" splitc = u"-\/,:;()_~+" upper = string.uppercase lower = string.lowercase ttable = dict((ord(c),None) for c in stripc) splitt = dict((ord(c),space) for c in splitc) lowert = dict((ord(c),ord(lower[i])) for i,c in enumerate(upper)) for t in (splitt, lowert): for c in t: ttable[c] = t[c] def insert(self, id, tags): global ttable global get global update global translate global split idset = set([id]) if ttable == None: self.setup() for tag in tags: words = split(translate(tag,ttable)) for word in words: word_set = get(word,None) if word_set == None: self[word] = idset else: update(word_set, idset) for l in word: word_set = get(l, None) if word_set == None: self[word] = idset else: update(word_set, idset)