# Bryan Schumaker (8/10/2010) ttable = None translate = None class Index: def __init__(self): self.tokens = dict() def setup(self): import string space = ord(" ") strip = u"\"#$%&'*+<=>@[]^`{|}~.?!" split = u"-\/,:;()_~+" upper = string.uppercase lower = string.lowercase translate = string.translate ttable = dict((ord(c),None) for c in strip) splitt = dict((ord(c),space) for c in split) lowert = dict((ord(c),ord(lower[i])) for i,c in enumerate(upper)) for t in (splitt, lowert): for c in t: ttable[c] = t[c] def insert(self, id, tags): global ttable idset = set([id]) tokens = self.tokens if ttable == None: self.setup() for tag in tags: words = translate(tag,ttable).split() for word in words: set = tokens.get(word,None) if set == None: tokens[word] = idset else: set.update(idset) for l in word: set = tokens.get(l, None) if set == None: tokens[word] = idset else: set.update(idset)