curds: Try to detect album names that embed a discnumber

We want to be able to pull out just the album name to make a more
accurate musicbrainz search, and so multiple discs link to the same
album object.

Signed-off-by: Anna Schumaker <Anna@NoWheyCreamery.com>
This commit is contained in:
Anna Schumaker 2019-01-28 17:00:40 -05:00
parent 306fa0f40c
commit a26e8867fe
2 changed files with 16 additions and 0 deletions

View File

@ -1,5 +1,6 @@
# Copyright 2019 (c) Anna Schumaker.
import hashlib
import re
album_map = dict()
@ -13,6 +14,11 @@ class Album:
fileinfo.get("album artist",
fileinfo.get("artist", [ "Unknown Artist" ])))[0]
# Try to detect album names that have a discnumber embedded in them
match = re.search("(cd|dis[c|k])(\s)*(([0-9]+)|one|two|three|four|five)", self.album.lower())
if match and match.start() > 0:
self.album = self.album[:match.start()].strip(" ;({[-")
def hash(self):
md5 = hashlib.md5()
md5.update(self.album.encode('utf-8'))

View File

@ -36,6 +36,16 @@ class TestAlbumClass(unittest.TestCase):
test_info.pop("artist")
self.assertEqual(album.Album(test_info).albumartist, "Unknown Artist")
def test_init_discno_detect(self):
self.assertEqual(album.Album({"album" : [ "Test Album {Disc 1}" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album [Disk One]" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album (Disk Two)" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album - Disc Three)" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album;CD Four" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album;CdFive" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Test Album CD 9/10" ]}).album, "Test Album")
self.assertEqual(album.Album({"album" : [ "Disc One: Test Album" ]}).album, "Disc One: Test Album")
def test_album_lookup(self):
album.album_map.clear()
a = album.lookup(album_info)