From 04ac4398e26359cb9e249b5d9f17af8596d233db Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 5 Mar 2019 14:19:39 -0500 Subject: [PATCH] curds: Create a generic Tags class I've decided that it would work best if Album and Track classes both inherit from the same parent class. This will give them some functionality overlap, and it'll also make it conceptually easier to store them both in the same tag dictionary. Signed-off-by: Anna Schumaker --- curds/album.py | 32 -------------------- curds/tags.py | 34 +++++++++++++++++++++ curds/test_album.py | 65 ---------------------------------------- curds/test_tags_album.py | 57 +++++++++++++++++++++++++++++++++++ curds/track.py | 4 +-- 5 files changed, 93 insertions(+), 99 deletions(-) delete mode 100644 curds/album.py create mode 100644 curds/tags.py delete mode 100644 curds/test_album.py create mode 100644 curds/test_tags_album.py diff --git a/curds/album.py b/curds/album.py deleted file mode 100644 index 2913256..0000000 --- a/curds/album.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2019 (c) Anna Schumaker. -import hashlib -import re - -album_map = dict() - -class Album: - def __init__(self, fileinfo): - self.album = fileinfo.get("album", [ "Unknown Album" ])[0] - self.genre = fileinfo.get("genre", [ "Unknown" ])[0] - self.date = int(fileinfo.get("date", [ 0 ])[0]) - self.tracktotal = int(fileinfo.get("tracktotal", [ 0 ])[0]) - self.albumartist = fileinfo.get("albumartist", - fileinfo.get("album artist", - fileinfo.get("artist", [ "Unknown Artist" ])))[0] - - # Try to detect album names that have a discnumber embedded in them - match = re.search("(cd|dis[c|k])(\s)*(([0-9]+)|one|two|three|four|five)", self.album.lower()) - if match and match.start() > 0: - self.album = self.album[:match.start()].strip(" ;({[-") - - def hash(self): - md5 = hashlib.md5() - md5.update(self.album.encode('utf-8')) - md5.update(self.albumartist.encode('utf-8')) - md5.update(str(self.date).encode('utf-8')) - return md5.hexdigest() - - -def lookup(fileinfo): - album = Album(fileinfo) - return album_map.setdefault(album.hash(), album) diff --git a/curds/tags.py b/curds/tags.py new file mode 100644 index 0000000..2adc0fb --- /dev/null +++ b/curds/tags.py @@ -0,0 +1,34 @@ +# Copyright 2019 (c) Anna Schumaker. +import re + +tag_map = dict() + +class Tag: + def extract(info, key, default): + return info.get(key, [ default ])[0] + + def lookup(tag): + return tag_map.setdefault(hash(tag), tag) + + +class Album(Tag): + def __init__(self, info): + self.album = Tag.extract(info, "album", "Unknown Album") + self.genre = Tag.extract(info, "genre", "Unknown") + self.date = int(Tag.extract(info, "date", 0)) + self.tracktotal = int(Tag.extract(info, "tracktotal", 0)) + self.albumartist = Tag.extract(info, "albumartist", + Tag.extract(info, "album artist", + Tag.extract(info, "artist", "Unknown Artist"))) + + # Try to detect album names that have a discnumber embedded in them + match = re.search("(cd|dis[c|k])(\s)*(([0-9]+)|one|two|three|four|five)", self.album.lower()) + if match and match.start() > 0: + self.album = self.album[:match.start()].strip(" ;({[-") + + def __hash__(self): + return hash((self.album, self.albumartist, self.date)) + + def lookup(info): + return Tag.lookup(Album(info)) + diff --git a/curds/test_album.py b/curds/test_album.py deleted file mode 100644 index 47a27c3..0000000 --- a/curds/test_album.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright 2019 (c) Anna Schumaker -import concurrent.futures -import hashlib -import unittest -import album - -album_info = {"album" : [ "Test Album" ], "albumartist" : [ "Test Artist" ], - "date" : [ "2019" ], "genre" : [ "Test" ], "tracktotal" : [ "1" ]} - -class TestAlbumClass(unittest.TestCase): - def test_init_basic(self): - a = album.Album(album_info) - self.assertEqual(a.album, "Test Album") - self.assertEqual(a.genre, "Test") - self.assertEqual(a.date, 2019) - self.assertEqual(a.albumartist, "Test Artist") - self.assertEqual(a.tracktotal, 1) - self.assertEqual(a.hash(), hashlib.md5("Test AlbumTest Artist2019".encode('utf-8')).hexdigest()) - - def test_init_empty(self): - a = album.Album({}) - self.assertEqual(a.album, "Unknown Album") - self.assertEqual(a.genre, "Unknown") - self.assertEqual(a.date, 0) - self.assertEqual(a.albumartist, "Unknown Artist") - self.assertEqual(a.tracktotal, 0) - self.assertEqual(a.hash(), hashlib.md5("Unknown AlbumUnknown Artist0".encode('utf-8')).hexdigest()) - - def test_init_artist_fallback(self): - test_info = {"albumartist" : [ "1" ], "album artist" : [ "2" ], "artist" : [ "3" ]} - self.assertEqual(album.Album(test_info).albumartist, "1") - test_info.pop("albumartist") - self.assertEqual(album.Album(test_info).albumartist, "2") - test_info.pop("album artist") - self.assertEqual(album.Album(test_info).albumartist, "3") - test_info.pop("artist") - self.assertEqual(album.Album(test_info).albumartist, "Unknown Artist") - - def test_init_discno_detect(self): - self.assertEqual(album.Album({"album" : [ "Test Album {Disc 1}" ]}).album, "Test Album") - self.assertEqual(album.Album({"album" : [ "Test Album [Disk One]" ]}).album, "Test Album") - self.assertEqual(album.Album({"album" : [ "Test Album (Disk Two)" ]}).album, "Test Album") - self.assertEqual(album.Album({"album" : [ "Test Album - Disc Three)" ]}).album, "Test Album") - self.assertEqual(album.Album({"album" : [ "Test Album;CD Four" ]}).album, "Test Album") - self.assertEqual(album.Album({"album" : [ "Test Album;CdFive" ]}).album, "Test Album") - self.assertEqual(album.Album({"album" : [ "Test Album CD 9/10" ]}).album, "Test Album") - self.assertEqual(album.Album({"album" : [ "Disc One: Test Album" ]}).album, "Disc One: Test Album") - - def test_album_lookup(self): - album.album_map.clear() - a = album.lookup(album_info) - self.assertIsNotNone(a) - - for i in range(10): - b = album.lookup(album_info) - self.assertEqual(a, b) - self.assertEqual(len(album.album_map), 1) - - def test_parallel_lookup(self): - album.album_map.clear() - with concurrent.futures.ThreadPoolExecutor(max_workers=5) as pool: - res = list(pool.map(album.lookup, [ album_info ] * 20)) - self.assertIsNotNone(res[0]) - self.assertEqual(res.count(res[0]), 20) - self.assertEqual(len(album.album_map), 1) diff --git a/curds/test_tags_album.py b/curds/test_tags_album.py new file mode 100644 index 0000000..d1f1ce4 --- /dev/null +++ b/curds/test_tags_album.py @@ -0,0 +1,57 @@ +# Copyright 2019 (c) Anna Schumaker. +import tags +import unittest + +album_info = {"album" : [ "Test Album" ], "albumartist" : [ "Test Artist" ], + "date" : [ "2019" ], "genre" : [ "Test" ], "tracktotal" : [ "1" ]} + +class TestAlbumTag(unittest.TestCase): + def test_album_init_basic(self): + a = tags.Album(album_info) + self.assertIsInstance(a, tags.Tag) + self.assertEqual(a.album, "Test Album") + self.assertEqual(a.genre, "Test") + self.assertEqual(a.date, 2019) + self.assertEqual(a.tracktotal, 1) + self.assertEqual(a.albumartist, "Test Artist") + self.assertEqual(hash(a), hash(("Test Album", "Test Artist", 2019))) + + def test_album_init_empty(self): + a = tags.Album({}) + self.assertIsInstance(a, tags.Tag) + self.assertEqual(a.album, "Unknown Album") + self.assertEqual(a.genre, "Unknown") + self.assertEqual(a.date, 0) + self.assertEqual(a.tracktotal, 0) + self.assertEqual(a.albumartist, "Unknown Artist") + self.assertEqual(hash(a), hash(("Unknown Album", "Unknown Artist", 0))) + + def test_album_init_artist(self): + test_info = {"albumartist" : [ "1" ], "album artist" : [ "2" ], "artist" : [ "3" ]} + self.assertEqual(tags.Album(test_info).albumartist, "1") + test_info.pop("albumartist") + self.assertEqual(tags.Album(test_info).albumartist, "2") + test_info.pop("album artist") + self.assertEqual(tags.Album(test_info).albumartist, "3") + test_info.pop("artist") + self.assertEqual(tags.Album(test_info).albumartist, "Unknown Artist") + + def test_album_discno_detect(self): + self.assertEqual(tags.Album({"album" : [ "Test Album {Disc 1}" ]}).album, "Test Album") + self.assertEqual(tags.Album({"album" : [ "Test Album [Disk One]" ]}).album, "Test Album") + self.assertEqual(tags.Album({"album" : [ "Test Album (Disk Two)" ]}).album, "Test Album") + self.assertEqual(tags.Album({"album" : [ "Test Album - Disc Three)" ]}).album, "Test Album") + self.assertEqual(tags.Album({"album" : [ "Test Album;CD Four" ]}).album, "Test Album") + self.assertEqual(tags.Album({"album" : [ "Test Album;CdFive" ]}).album, "Test Album") + self.assertEqual(tags.Album({"album" : [ "Test Album CD 9/10" ]}).album, "Test Album") + self.assertEqual(tags.Album({"album" : [ "Disc One: Test Album" ]}).album, "Disc One: Test Album") + def test_album_lookup(self): + tags.tag_map.clear() + a = tags.Album.lookup(album_info) + self.assertIsNotNone(a) + self.assertIn(a, tags.tag_map.values()) + + for i in range(10): + b = tags.Album.lookup(album_info) + self.assertEqual(a, b) + self.assertEqual(len(tags.tag_map), 1) diff --git a/curds/track.py b/curds/track.py index a369273..6df790c 100644 --- a/curds/track.py +++ b/curds/track.py @@ -1,5 +1,5 @@ # Copyright 2019 (c) Anna Schumaker. -import album +import tags import concurrent.futures import mutagen import os @@ -18,7 +18,7 @@ class Track: self.artist = fileinfo.get("artist", ["Unknown Artist"])[0] self.tracknumber = int(fileinfo.get("tracknumber", ["0"])[0]) self.length = fileinfo.info.length - self.album = album.lookup(fileinfo) + self.album = tags.Album.lookup(fileinfo) self.discnumber = int(fileinfo.get("discnumber", ["1"])[0]) # Try to detect discnumbers that are embedded in the album name