ocarina/src/extra/index.py

158 lines
3.0 KiB
Python

#! /usr/bin/python
# To change this template, choose Tools | Templates
# and open the template in the editor.
__author__="bjschuma"
__date__ ="$Mar 23, 2010 9:46:43 PM$"
import string
import re
global index
import ocarina
from ct import path
from et import sql
import cPickle as pickle
global index
global alpha
index = dict()
alpha = dict()
global filePath
filePath = path.join( ocarina.vars.OCARINA, "index.pickle" )
if path.exists(filePath) == True:
file = open( filePath )
p = pickle.Unpickler( file )
(index,alpha) = p.load()
file.close()
#filePath = "/home/bjschuma/.ocarina3/index.pickle"
def format(word):
strip = "!\"#$%&'()*+,.:;<=>?@[]^_`{|}~"
split = " |-|\\|/"
word = word.lower()
for l in strip:
word = word.replace(l,'')
words = []
for word in re.split(split,word):
words += [word.strip()]
return words
def add(tag,trid):
global index
global alpha
#words = tag.lower().split()
words = format(tag)
id = int(trid)
#print " ",words
for word in words:
if (word in index.keys()) == False:
index[word] = set()
index[word].add(id)
for l in word:
if (l in alpha.keys()) == False:
alpha[l] = set()
alpha[l].add(trid)
def reindex():
select = "track.id,track.name,artist.name,album.name"
frm = "track,artist,album"
where = "track.artist=artist.id AND track.album=album.id"
sel = sql.Select(select,frm,where)
results = sel.execute().fetchall()
global index
index = dict()
for entry in results:
#print entry
words = entry[1] + " " + entry[2] + " " + entry[3]
add(words,entry[0])
def save():
global index
global alpha
global filePath
file = open( filePath, 'w' )
p = pickle.Pickler(file,pickle.HIGHEST_PROTOCOL)
p.dump( (index,alpha) )
file.close()
def psearch(phrase,options):
results = options[phrase[0]]
for word in phrase[1:]:
results.intersection_update(options[word])
return results
def search(text):
global index
global alpha
phrases = []
words = set()
options = dict()
# Break the search text into phrases and
# gind a set of key terms for each search phrase
for phrase in text.split(","):#phrases:
split = format(phrase)
# Weed out searches of length 0
if len(split) == 0:
continue
phrases += [split]
for word in split:
if len(word) == 1:
options[word] = alpha[word]
else:
words.add(word)
# Only do this if we are searching for words
if len(words) > 0:
regex = ""
# Build up a regex and initialize possibilities to empty sets
for i,word in enumerate(words):
if i > 0:
regex += "|"
regex += word
options[word] = set()
for key in index:
# Check if the key matches any of the search terms
if re.search(regex,key):
# Find which search term the key matches
for word in words:
if re.search(word,key):
# In place update > replacement update!
options[word].update(index[key])
results = set()
for phrase in phrases:
results.update( psearch(phrase,options) )
return results
def show():
global index
print len(index)
print "====="
for key in index:
try:
print key
except:
pass