71 lines
1.4 KiB
Python
71 lines
1.4 KiB
Python
# Bryan Schumaker (8/10/2010)
|
|
|
|
import re
|
|
|
|
translate = unicode.translate
|
|
split = unicode.split
|
|
space_ord = ord(" ")
|
|
stripc = u"\"#$%&'*+<=>@[]^`{|}~.?!"
|
|
splitc = u"-\/,:;()_~+"
|
|
|
|
ttable = None
|
|
|
|
def format_once(text):
|
|
import string
|
|
global ttable
|
|
upper = string.uppercase
|
|
lower = string.lowercase
|
|
|
|
ttable = dict((ord(c),None) for c in stripc)
|
|
splitt = dict((ord(c),space_ord) for c in splitc)
|
|
lowert = dict((ord(c),ord(lower[i])) for i,c in enumerate(upper))
|
|
for t in (splitt, lowert):
|
|
for c in t:
|
|
ttable[c] = t[c]
|
|
format = format_rest
|
|
return format_rest(text)
|
|
def format_rest(text):
|
|
return text.translate(ttable).split()
|
|
format = format_once
|
|
|
|
|
|
class Index(dict):
|
|
def __init__(self):
|
|
dict.__init__(self)
|
|
|
|
def insert(self, tags, id):
|
|
get = self.get
|
|
for tag in tags:
|
|
for word in format(tag):
|
|
ids = get(word, None)
|
|
if ids == None:
|
|
self[word] = set([id])
|
|
else:
|
|
ids.add(id)
|
|
|
|
def filter(self, text):
|
|
text = unicode(text)
|
|
terms = format(text)
|
|
results = dict()
|
|
|
|
search = re.search
|
|
get = self.get
|
|
|
|
for t in terms:
|
|
results[t] = set()
|
|
|
|
for key in self.keys():
|
|
for term in terms:
|
|
if search(term, key):
|
|
results[term].update(get(key))
|
|
|
|
visible = set()
|
|
for i,t in enumerate(terms):
|
|
if i == 0:
|
|
visible.update(results[t])
|
|
else:
|
|
visible.intersection_update(results[t])
|
|
return visible
|
|
|
|
|