Improved collection indexing
I can now create an index over an entire collection faster. The bottleneck is still disk accesses, but I still think this was a good challenge.
This commit is contained in:
parent
52b1236b29
commit
71b3289f62
|
@ -1,42 +1,45 @@
|
||||||
# Bryan Schumaker (8/8/2010)
|
# Bryan Schumaker (8/8/2010)
|
||||||
|
|
||||||
import libsaria
|
import libsaria
|
||||||
import table
|
|
||||||
|
|
||||||
class Collection:
|
class Collection:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.table = table.Table()
|
pass
|
||||||
|
|
||||||
|
|
||||||
def scan(self, path):
|
def scan(self, path):
|
||||||
print "Scanning path:", path
|
print "Scanning path:", path
|
||||||
self.clear()
|
self.reset()
|
||||||
self.update(path)
|
self.update(path)
|
||||||
|
|
||||||
|
|
||||||
def clear(self):
|
def reset(self):
|
||||||
print "Erasing collection ... "
|
print "Erasing collection ... "
|
||||||
|
from table import Table
|
||||||
|
from index import Index
|
||||||
|
self.table = Table()
|
||||||
|
self.index = Index()
|
||||||
|
|
||||||
|
|
||||||
def insert(self, file):
|
def insert(self, file):
|
||||||
tags = file.tag()
|
tags = file.tag()
|
||||||
id = self.table.insert(tags)
|
tuple = (tags.artist, tags.album, tags.title)
|
||||||
print id, tags
|
id = self.table.insert(tuple)
|
||||||
|
self.index.insert(id, tuple)
|
||||||
|
|
||||||
|
|
||||||
def update(self, path):
|
def update(self, path):
|
||||||
FileRef = libsaria.collection.FileRef
|
FileRef = libsaria.collection.FileRef
|
||||||
join = libsaria.path.join
|
join = libsaria.path.join
|
||||||
|
insert = self.insert
|
||||||
|
|
||||||
for root,dirs,files in libsaria.path.walk(path):
|
for root,dirs,files in libsaria.path.walk(path):
|
||||||
for file in files:
|
for file in files:
|
||||||
file = join(root,file)
|
file = join(root,file)
|
||||||
try:
|
try:
|
||||||
tagfile = FileRef(file)
|
insert(FileRef(file))
|
||||||
self.insert(tagfile)
|
|
||||||
except Exception,e:
|
except Exception,e:
|
||||||
print e
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
# Bryan Schumaker (8/10/2010)
|
||||||
|
|
||||||
|
ttable = None
|
||||||
|
translate = None
|
||||||
|
|
||||||
|
class Index:
|
||||||
|
def __init__(self):
|
||||||
|
self.tokens = dict()
|
||||||
|
|
||||||
|
def setup(self):
|
||||||
|
import string
|
||||||
|
space = ord(" ")
|
||||||
|
strip = u"\"#$%&'*+<=>@[]^`{|}~.?!"
|
||||||
|
split = u"-\/,:;()_~+"
|
||||||
|
upper = string.uppercase
|
||||||
|
lower = string.lowercase
|
||||||
|
translate = string.translate
|
||||||
|
|
||||||
|
ttable = dict((ord(c),None) for c in strip)
|
||||||
|
splitt = dict((ord(c),space) for c in split)
|
||||||
|
lowert = dict((ord(c),ord(lower[i])) for i,c in enumerate(upper))
|
||||||
|
for t in (splitt, lowert):
|
||||||
|
for c in t:
|
||||||
|
ttable[c] = t[c]
|
||||||
|
|
||||||
|
def insert(self, id, tags):
|
||||||
|
global ttable
|
||||||
|
idset = set([id])
|
||||||
|
tokens = self.tokens
|
||||||
|
|
||||||
|
if ttable == None:
|
||||||
|
self.setup()
|
||||||
|
|
||||||
|
for tag in tags:
|
||||||
|
words = translate(tag,ttable).split()
|
||||||
|
for word in words:
|
||||||
|
set = tokens.get(word,None)
|
||||||
|
if set == None:
|
||||||
|
tokens[word] = idset
|
||||||
|
else:
|
||||||
|
set.update(idset)
|
||||||
|
for l in word:
|
||||||
|
set = tokens.get(l, None)
|
||||||
|
if set == None:
|
||||||
|
tokens[word] = idset
|
||||||
|
else:
|
||||||
|
set.update(idset)
|
||||||
|
|
||||||
|
|
|
@ -6,9 +6,8 @@ class Table(dict):
|
||||||
self.next_id = 0
|
self.next_id = 0
|
||||||
|
|
||||||
|
|
||||||
def insert(self, tag):
|
def insert(self, tags):
|
||||||
id = self.next_id
|
id = self.next_id
|
||||||
self[id] = (tag.artist, tag.album, tag.title)
|
self[id] = tags
|
||||||
self.next_id += 1
|
self.next_id += 1
|
||||||
print id, self.next_id
|
|
||||||
return id
|
return id
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
|
|
||||||
from libsaria import collection
|
from libsaria import collection
|
||||||
|
|
||||||
#src = "~/Music"
|
src = "~/Music"
|
||||||
#src = "~/Music/Foo Fighters"
|
#src = "~/Music/Foo Fighters"
|
||||||
src = "~/Music/Foo Fighters/Foo Fighters"
|
#src = "~/Music/Foo Fighters/Foo Fighters"
|
||||||
|
|
||||||
collection.new_source(src, bg=False)
|
collection.new_source(src, bg=False)
|
||||||
|
|
Loading…
Reference in New Issue