curds: Create a generic Tags class

I've decided that it would work best if Album and Track classes both
inherit from the same parent class. This will give them some
functionality overlap, and it'll also make it conceptually easier to
store them both in the same tag dictionary.

Signed-off-by: Anna Schumaker <Anna@NoWheyCreamery.com>
This commit is contained in:
Anna Schumaker 2019-03-05 14:19:39 -05:00
parent c4c0c09d46
commit 04ac4398e2
5 changed files with 93 additions and 99 deletions

View File

@ -1,32 +0,0 @@
# Copyright 2019 (c) Anna Schumaker.
import hashlib
import re
album_map = dict()
class Album:
def __init__(self, fileinfo):
self.album = fileinfo.get("album", [ "Unknown Album" ])[0]
self.genre = fileinfo.get("genre", [ "Unknown" ])[0]
self.date = int(fileinfo.get("date", [ 0 ])[0])
self.tracktotal = int(fileinfo.get("tracktotal", [ 0 ])[0])
self.albumartist = fileinfo.get("albumartist",
fileinfo.get("album artist",
fileinfo.get("artist", [ "Unknown Artist" ])))[0]
# Try to detect album names that have a discnumber embedded in them
match = re.search("(cd|dis[c|k])(\s)*(([0-9]+)|one|two|three|four|five)", self.album.lower())
if match and match.start() > 0:
self.album = self.album[:match.start()].strip(" ;({[-")
def hash(self):
md5 = hashlib.md5()
md5.update(self.album.encode('utf-8'))
md5.update(self.albumartist.encode('utf-8'))
md5.update(str(self.date).encode('utf-8'))
return md5.hexdigest()
def lookup(fileinfo):
album = Album(fileinfo)
return album_map.setdefault(album.hash(), album)

34
curds/tags.py Normal file
View File

@ -0,0 +1,34 @@
# Copyright 2019 (c) Anna Schumaker.
import re
tag_map = dict()
class Tag:
def extract(info, key, default):
return info.get(key, [ default ])[0]
def lookup(tag):
return tag_map.setdefault(hash(tag), tag)
class Album(Tag):
def __init__(self, info):
self.album = Tag.extract(info, "album", "Unknown Album")
self.genre = Tag.extract(info, "genre", "Unknown")
self.date = int(Tag.extract(info, "date", 0))
self.tracktotal = int(Tag.extract(info, "tracktotal", 0))
self.albumartist = Tag.extract(info, "albumartist",
Tag.extract(info, "album artist",
Tag.extract(info, "artist", "Unknown Artist")))
# Try to detect album names that have a discnumber embedded in them
match = re.search("(cd|dis[c|k])(\s)*(([0-9]+)|one|two|three|four|five)", self.album.lower())
if match and match.start() > 0:
self.album = self.album[:match.start()].strip(" ;({[-")
def __hash__(self):
return hash((self.album, self.albumartist, self.date))
def lookup(info):
return Tag.lookup(Album(info))

View File

@ -1,65 +0,0 @@
# Copyright 2019 (c) Anna Schumaker
import concurrent.futures
import hashlib
import unittest
import album
album_info = {"album" : [ "Test Album" ], "albumartist" : [ "Test Artist" ],
"date" : [ "2019" ], "genre" : [ "Test" ], "tracktotal" : [ "1" ]}
class TestAlbumClass(unittest.TestCase):
def test_init_basic(self):
a = album.Album(album_info)
self.assertEqual(a.album, "Test Album")
self.assertEqual(a.genre, "Test")
self.assertEqual(a.date, 2019)
self.assertEqual(a.albumartist, "Test Artist")
self.assertEqual(a.tracktotal, 1)
self.assertEqual(a.hash(), hashlib.md5("Test AlbumTest Artist2019".encode('utf-8')).hexdigest())
def test_init_empty(self):
a = album.Album({})
self.assertEqual(a.album, "Unknown Album")
self.assertEqual(a.genre, "Unknown")
self.assertEqual(a.date, 0)
self.assertEqual(a.albumartist, "Unknown Artist")
self.assertEqual(a.tracktotal, 0)
self.assertEqual(a.hash(), hashlib.md5("Unknown AlbumUnknown Artist0".encode('utf-8')).hexdigest())
def test_init_artist_fallback(self):
test_info = {"albumartist" : [ "1" ], "album artist" : [ "2" ], "artist" : [ "3" ]}
self.assertEqual(album.Album(test_info).albumartist, "1")
test_info.pop("albumartist")
self.assertEqual(album.Album(test_info).albumartist, "2")
test_info.pop("album artist")
self.assertEqual(album.Album(test_info).albumartist, "3")
test_info.pop("artist")
self.assertEqual(album.Album(test_info).albumartist, "Unknown Artist")
def test_init_discno_detect(self):
self.assertEqual(album.Album({"album" : [ "Test Album {Disc 1}" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album [Disk One]" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album (Disk Two)" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album - Disc Three)" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album;CD Four" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album;CdFive" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album CD 9/10" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Disc One: Test Album" ]}).album, "Disc One: Test Album")
def test_album_lookup(self):
album.album_map.clear()
a = album.lookup(album_info)
self.assertIsNotNone(a)
for i in range(10):
b = album.lookup(album_info)
self.assertEqual(a, b)
self.assertEqual(len(album.album_map), 1)
def test_parallel_lookup(self):
album.album_map.clear()
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as pool:
res = list(pool.map(album.lookup, [ album_info ] * 20))
self.assertIsNotNone(res[0])
self.assertEqual(res.count(res[0]), 20)
self.assertEqual(len(album.album_map), 1)

57
curds/test_tags_album.py Normal file
View File

@ -0,0 +1,57 @@
# Copyright 2019 (c) Anna Schumaker.
import tags
import unittest
album_info = {"album" : [ "Test Album" ], "albumartist" : [ "Test Artist" ],
"date" : [ "2019" ], "genre" : [ "Test" ], "tracktotal" : [ "1" ]}
class TestAlbumTag(unittest.TestCase):
def test_album_init_basic(self):
a = tags.Album(album_info)
self.assertIsInstance(a, tags.Tag)
self.assertEqual(a.album, "Test Album")
self.assertEqual(a.genre, "Test")
self.assertEqual(a.date, 2019)
self.assertEqual(a.tracktotal, 1)
self.assertEqual(a.albumartist, "Test Artist")
self.assertEqual(hash(a), hash(("Test Album", "Test Artist", 2019)))
def test_album_init_empty(self):
a = tags.Album({})
self.assertIsInstance(a, tags.Tag)
self.assertEqual(a.album, "Unknown Album")
self.assertEqual(a.genre, "Unknown")
self.assertEqual(a.date, 0)
self.assertEqual(a.tracktotal, 0)
self.assertEqual(a.albumartist, "Unknown Artist")
self.assertEqual(hash(a), hash(("Unknown Album", "Unknown Artist", 0)))
def test_album_init_artist(self):
test_info = {"albumartist" : [ "1" ], "album artist" : [ "2" ], "artist" : [ "3" ]}
self.assertEqual(tags.Album(test_info).albumartist, "1")
test_info.pop("albumartist")
self.assertEqual(tags.Album(test_info).albumartist, "2")
test_info.pop("album artist")
self.assertEqual(tags.Album(test_info).albumartist, "3")
test_info.pop("artist")
self.assertEqual(tags.Album(test_info).albumartist, "Unknown Artist")
def test_album_discno_detect(self):
self.assertEqual(tags.Album({"album" : [ "Test Album {Disc 1}" ]}).album, "Test Album")
self.assertEqual(tags.Album({"album" : [ "Test Album [Disk One]" ]}).album, "Test Album")
self.assertEqual(tags.Album({"album" : [ "Test Album (Disk Two)" ]}).album, "Test Album")
self.assertEqual(tags.Album({"album" : [ "Test Album - Disc Three)" ]}).album, "Test Album")
self.assertEqual(tags.Album({"album" : [ "Test Album;CD Four" ]}).album, "Test Album")
self.assertEqual(tags.Album({"album" : [ "Test Album;CdFive" ]}).album, "Test Album")
self.assertEqual(tags.Album({"album" : [ "Test Album CD 9/10" ]}).album, "Test Album")
self.assertEqual(tags.Album({"album" : [ "Disc One: Test Album" ]}).album, "Disc One: Test Album")
def test_album_lookup(self):
tags.tag_map.clear()
a = tags.Album.lookup(album_info)
self.assertIsNotNone(a)
self.assertIn(a, tags.tag_map.values())
for i in range(10):
b = tags.Album.lookup(album_info)
self.assertEqual(a, b)
self.assertEqual(len(tags.tag_map), 1)

View File

@ -1,5 +1,5 @@
# Copyright 2019 (c) Anna Schumaker.
import album
import tags
import concurrent.futures
import mutagen
import os
@ -18,7 +18,7 @@ class Track:
self.artist = fileinfo.get("artist", ["Unknown Artist"])[0]
self.tracknumber = int(fileinfo.get("tracknumber", ["0"])[0])
self.length = fileinfo.info.length
self.album = album.lookup(fileinfo)
self.album = tags.Album.lookup(fileinfo)
self.discnumber = int(fileinfo.get("discnumber", ["1"])[0])
# Try to detect discnumbers that are embedded in the album name