2010-03-31 00:13:37 -04:00
|
|
|
#! /usr/bin/python
|
|
|
|
|
|
|
|
# To change this template, choose Tools | Templates
|
|
|
|
# and open the template in the editor.
|
|
|
|
|
|
|
|
__author__="bjschuma"
|
|
|
|
__date__ ="$Mar 23, 2010 9:46:43 PM$"
|
|
|
|
|
|
|
|
import string
|
|
|
|
import re
|
|
|
|
|
|
|
|
global index
|
|
|
|
import ocarina
|
|
|
|
from ct import path
|
|
|
|
|
|
|
|
from et import sql
|
|
|
|
import cPickle as pickle
|
|
|
|
|
|
|
|
global index
|
|
|
|
global alpha
|
|
|
|
index = dict()
|
|
|
|
alpha = dict()
|
|
|
|
|
|
|
|
|
|
|
|
global filePath
|
2010-05-16 23:21:20 -04:00
|
|
|
filePath = path.join( ocarina.vars.OCARINA, "index.pickle" )
|
2010-03-31 00:13:37 -04:00
|
|
|
|
2010-05-16 23:21:20 -04:00
|
|
|
if path.exists(filePath) == True:
|
|
|
|
file = open( filePath )
|
|
|
|
p = pickle.Unpickler( file )
|
|
|
|
(index,alpha) = p.load()
|
|
|
|
file.close()
|
2010-03-31 00:13:37 -04:00
|
|
|
#filePath = "/home/bjschuma/.ocarina3/index.pickle"
|
|
|
|
|
|
|
|
|
|
|
|
def format(word):
|
|
|
|
strip = "!\"#$%&'()*+,.:;<=>?@[]^_`{|}~"
|
|
|
|
split = " |-|\\|/"
|
|
|
|
|
|
|
|
word = word.lower()
|
|
|
|
for l in strip:
|
|
|
|
word = word.replace(l,'')
|
|
|
|
|
|
|
|
words = []
|
|
|
|
for word in re.split(split,word):
|
|
|
|
words += [word.strip()]
|
|
|
|
|
|
|
|
return words
|
|
|
|
|
|
|
|
|
|
|
|
def add(tag,trid):
|
|
|
|
global index
|
|
|
|
global alpha
|
|
|
|
#words = tag.lower().split()
|
|
|
|
words = format(tag)
|
|
|
|
id = int(trid)
|
|
|
|
#print " ",words
|
|
|
|
|
|
|
|
for word in words:
|
|
|
|
if (word in index.keys()) == False:
|
|
|
|
index[word] = set()
|
|
|
|
index[word].add(id)
|
|
|
|
for l in word:
|
|
|
|
if (l in alpha.keys()) == False:
|
|
|
|
alpha[l] = set()
|
|
|
|
alpha[l].add(trid)
|
|
|
|
|
|
|
|
|
|
|
|
def reindex():
|
|
|
|
select = "track.id,track.name,artist.name,album.name"
|
|
|
|
frm = "track,artist,album"
|
|
|
|
where = "track.artist=artist.id AND track.album=album.id"
|
|
|
|
sel = sql.Select(select,frm,where)
|
|
|
|
results = sel.execute().fetchall()
|
|
|
|
|
|
|
|
global index
|
|
|
|
index = dict()
|
|
|
|
for entry in results:
|
|
|
|
#print entry
|
|
|
|
words = entry[1] + " " + entry[2] + " " + entry[3]
|
|
|
|
add(words,entry[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save():
|
|
|
|
global index
|
|
|
|
global alpha
|
|
|
|
global filePath
|
|
|
|
file = open( filePath, 'w' )
|
|
|
|
p = pickle.Pickler(file,pickle.HIGHEST_PROTOCOL)
|
|
|
|
p.dump( (index,alpha) )
|
|
|
|
file.close()
|
|
|
|
|
|
|
|
|
|
|
|
def psearch(phrase,options):
|
|
|
|
results = options[phrase[0]]
|
|
|
|
for word in phrase[1:]:
|
|
|
|
results.intersection_update(options[word])
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
def search(text):
|
|
|
|
global index
|
|
|
|
global alpha
|
|
|
|
|
|
|
|
phrases = []
|
|
|
|
|
|
|
|
words = set()
|
|
|
|
options = dict()
|
|
|
|
# Break the search text into phrases and
|
|
|
|
# gind a set of key terms for each search phrase
|
|
|
|
for phrase in text.split(","):#phrases:
|
|
|
|
split = format(phrase)
|
|
|
|
# Weed out searches of length 0
|
|
|
|
if len(split) == 0:
|
|
|
|
continue
|
|
|
|
phrases += [split]
|
|
|
|
for word in split:
|
|
|
|
if len(word) == 1:
|
|
|
|
options[word] = alpha[word]
|
|
|
|
else:
|
|
|
|
words.add(word)
|
|
|
|
|
|
|
|
# Only do this if we are searching for words
|
|
|
|
if len(words) > 0:
|
|
|
|
regex = ""
|
|
|
|
# Build up a regex and initialize possibilities to empty sets
|
|
|
|
for i,word in enumerate(words):
|
|
|
|
if i > 0:
|
|
|
|
regex += "|"
|
|
|
|
regex += word
|
|
|
|
options[word] = set()
|
|
|
|
|
|
|
|
for key in index:
|
|
|
|
# Check if the key matches any of the search terms
|
|
|
|
if re.search(regex,key):
|
|
|
|
# Find which search term the key matches
|
|
|
|
for word in words:
|
|
|
|
if re.search(word,key):
|
|
|
|
# In place update > replacement update!
|
|
|
|
options[word].update(index[key])
|
|
|
|
|
|
|
|
results = set()
|
|
|
|
for phrase in phrases:
|
|
|
|
results.update( psearch(phrase,options) )
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
def show():
|
|
|
|
global index
|
|
|
|
print len(index)
|
|
|
|
print "====="
|
|
|
|
for key in index:
|
|
|
|
try:
|
|
|
|
print key
|
|
|
|
except:
|
|
|
|
pass
|