audio: Implement a file tagger

This class reads the tags in an audio file and parses them into a format
we can use later to build our database playlist objects.

Signed-off-by: Anna Schumaker <Anna@NoWheyCreamery.com>
This commit is contained in:
Anna Schumaker 2023-03-05 17:18:02 -05:00
parent ad3d4840e8
commit 1832a56786
2 changed files with 347 additions and 0 deletions

146
emmental/audio/tagger.py Normal file
View File

@ -0,0 +1,146 @@
# Copyright 2023 (c) Anna Schumaker
"""Extract tags from an audio file."""
import dataclasses
import mutagen
import pathlib
import re
@dataclasses.dataclass
class _Artist:
"""Class for holding Artist-related tags."""
name: str
mbid: str
def __lt__(self, rhs) -> bool:
lhs = (self.name is not None, self.name, self.mbid)
return lhs < (rhs.name is not None, rhs.name, rhs.mbid)
@dataclasses.dataclass
class _Album:
"""Class for holding Album-related tags."""
name: str
mbid: str
artist: str
release: str
cover: pathlib.Path
artists: list[_Artist]
@dataclasses.dataclass
class _Medium:
"""Class for holding Medium-related tags."""
number: int
name: str
type: str
class _Tags:
"""Extract tags found in the Mutagen tag dictionary."""
def __init__(self, file: pathlib.Path, tags: dict):
"""Initialize the Tagger."""
self.file = file
self.tags = tags
self.artists = sorted(self.list_artists())
self.album = _Album(tags.get("album", [""])[0],
tags.get("musicbrainz_releasegroupid", [""])[0],
self.get_album_artist(),
self.get_release(),
file.parent / "cover.jpg",
sorted(self.list_album_artists()))
self.medium = _Medium(int(tags.get("discnumber", [1])[0]),
tags.get("discsubtitle", [""])[0],
tags.get("media", [""])[0])
self.genres = sorted(self.list_genres())
self.year = self.get_year()
def get_album_artist(self) -> str:
"""Find the album artist of the file."""
if (res := self.tags.get("albumartist")) is None:
res = self.tags.get("artist", [""])
return res[0]
def list_album_artists(self) -> list[_Artist]:
"""Find the list of album artists for the track."""
artists = self.tags.get("albumartist", [])
mbids = self.tags.get("musicbrainz_albumartistid", len(artists) * [""])
if len(artists) != len(mbids):
artists = [None] * len(mbids)
map = {a.mbid: a for a in self.artists}
map.update({(a.name, a.mbid): a for a in self.artists})
return [map.get(m, map.get((a, m))) for (a, m) in zip(artists, mbids)]
def list_artists(self) -> list[_Artist]:
"""Find the list of artists for the track."""
artists = self.tags.get("artists", [])
mbids = self.tags.get("musicbrainz_artistid", len(artists) * [""])
found = set()
need = set()
if len(artists) == 0 and len(mbids) == 0:
res = {(a, "") for a in self.tags.get("artist", [])}
elif len(artists) == len(mbids):
res = {(a, m) for (a, m) in zip(artists, mbids)}
found.update({m for m in mbids if len(m)})
else:
res = {(None, mbid) for mbid in mbids}
need.update({mbid for mbid in mbids if len(mbid)})
albumartists = self.tags.get("albumartist", [])
mbids = self.tags.get("musicbrainz_albumartistid",
len(albumartists) * [""])
if len(albumartists) == len(mbids):
res.update({(a, m) for (a, m) in zip(albumartists, mbids)})
found.update({m for m in mbids if len(m)})
else:
res.update({(None, mbid) for mbid in mbids})
need.update({mbid for mbid in mbids if len(mbid)})
res.difference_update({(None, mbid) for mbid in found & need})
return [_Artist(a, m) for (a, m) in list(res)]
def list_genres(self) -> list[str]:
"""Find the genres of the file."""
res = []
for genre in self.tags.get("genre", []):
res.extend([g.strip() for g in re.split("[,;/]", genre)])
for reltype in self.tags.get("releasetype", []):
match reltype:
case "album" | "compilation": continue
case "ep": res.append("EP")
case _: res.append(reltype.title())
return res
def get_release(self) -> str:
"""Find the release date of the file."""
if (res := self.tags.get("originaldate")) is None:
if (res := self.tags.get("originalyear")) is None:
if (res := self.tags.get("date")) is None:
res = self.tags.get("year", [""])
return res[0]
def get_year(self) -> int | None:
"""Find the year in the release string."""
if len(self.album.release):
return int(re.match(r"\d+", self.album.release).group(0))
def tag_file(file: pathlib.Path) -> _Tags | None:
"""Tag the requested file."""
if file.is_file():
if (tags := mutagen.File(file)) is not None:
return _Tags(file, tags)

201
tests/audio/test_tagger.py Normal file
View File

@ -0,0 +1,201 @@
# Copyright 2023 (c) Anna Schumaker
"""Tests our tag extractor class."""
import pathlib
import unittest
import emmental.audio.tagger
_Tags = emmental.audio.tagger._Tags
class TestAudioTagger(unittest.TestCase):
"""Test case for the Tagger."""
def setUp(self):
"""Set up common variables."""
self.file = pathlib.Path("/a/b/c/track.ogg")
def test_init(self):
"""Test that the tagger was initialized properly."""
tags = dict()
tagger = _Tags(self.file, tags)
self.assertEqual(tagger.file, self.file)
self.assertEqual(tagger.tags, tags)
def test_get_album_artist(self):
"""Test that the album artist is tagged properly."""
tagger = _Tags(self.file, dict())
self.assertEqual(tagger.get_album_artist(), "")
for (tag, artist) in [("artist", "Artist"),
("albumartist", "Album Artist")]:
with self.subTest(tag=tag, artist=artist):
tagger.tags[tag] = [artist]
self.assertEqual(tagger.get_album_artist(), artist)
def test_get_release(self):
"""Test that the release date is tagged properly."""
tagger = _Tags(self.file, dict())
self.assertEqual(tagger.get_release(), "")
for (tag, date) in [("year", "1987"),
("date", "1988-06-17"),
("originalyear", "1986"),
("originaldate", "1985-08")]:
with self.subTest(tag=tag, date=date):
tagger.tags[tag] = [date]
self.assertEqual(tagger.get_release(), date)
def test_empty(self):
"""Test handling an empty tag dict."""
tagger = _Tags(self.file, dict())
self.assertEqual(tagger.album.name, "")
self.assertEqual(tagger.album.mbid, "")
self.assertEqual(tagger.album.artist, "")
self.assertEqual(tagger.album.release, "")
self.assertEqual(tagger.album.cover, pathlib.Path("/a/b/c/cover.jpg"))
self.assertListEqual(tagger.album.artists, [])
self.assertEqual(tagger.medium.name, "")
self.assertEqual(tagger.medium.number, 1)
self.assertEqual(tagger.medium.type, "")
self.assertListEqual(tagger.artists, [])
self.assertListEqual(tagger.genres, [])
self.assertIsNone(tagger.year)
def test_album(self):
"""Test that the album was tagged correctly."""
tagger = _Tags(self.file, {"album": ["Album Name"],
"musicbrainz_releasegroupid": ["ab-cd-ef"],
"albumartist": ["Album Artist"],
"date": ["1988-06"]})
self.assertEqual(tagger.album.name, "Album Name")
self.assertEqual(tagger.album.mbid, "ab-cd-ef")
self.assertEqual(tagger.album.artist, "Album Artist")
self.assertEqual(tagger.album.release, "1988-06")
self.assertEqual(tagger.album.cover, pathlib.Path("/a/b/c/cover.jpg"))
self.assertEqual(len(tagger.album.artists), 1)
self.assertEqual(id(tagger.album.artists[0]), id(tagger.artists[0]))
tagger = _Tags(self.file, {"album": ["Album Name"],
"musicbrainz_releasegroupid": ["ab-cd-ef"],
"albumartist": ["Album Artist"],
"musicbrainz_albumartistid": ["gh-ij-kl",
"mn-op-qr"],
"date": ["1988-06"]})
self.assertEqual(tagger.album.name, "Album Name")
self.assertEqual(tagger.album.mbid, "ab-cd-ef")
self.assertEqual(tagger.album.artist, "Album Artist")
self.assertEqual(tagger.album.release, "1988-06")
self.assertEqual(tagger.album.cover, pathlib.Path("/a/b/c/cover.jpg"))
self.assertListEqual([(a.name, a.mbid) for a in tagger.album.artists],
[(None, "gh-ij-kl"), (None, "mn-op-qr")])
def test_artists(self):
"""Test that the artists were tagged correctly."""
tagger = _Tags(self.file, {"artist": ["Artist"],
"albumartist": ["Album Artist"]})
self.assertListEqual([(a.name, a.mbid) for a in tagger.artists],
[("Album Artist", ""), ("Artist", "")])
tagger = _Tags(self.file, {"artist": ["No Artist"],
"artists": ["Artist", "Other Artist"],
"musicbrainz_artistid":
["ab-cd-ef", "gh-ij-kl"],
"albumartist":
["Album Artist", "Other Artist"],
"musicbrainz_albumartistid":
["mn-op-qr", "gh-ij-kl"]})
self.assertListEqual([(a.name, a.mbid) for a in tagger.artists],
[("Album Artist", "mn-op-qr"),
("Artist", "ab-cd-ef"),
("Other Artist", "gh-ij-kl")])
tagger = _Tags(self.file, {"artist": ["No Artist"],
"artists": ["Artist"],
"musicbrainz_artistid":
["ab-cd-ef", "gh-ij-kl"],
"albumartist": ["Album Artist"],
"musicbrainz_albumartistid":
["mn-op-qr", "gh-ij-kl"]})
self.assertListEqual([(a.name, a.mbid) for a in tagger.artists],
[(None, "ab-cd-ef"), (None, "gh-ij-kl"),
(None, "mn-op-qr")])
tagger = _Tags(self.file, {"artists": ["Artist 1", "Artist 2"],
"musicbrainz_artistid":
["ab-cd-ef", "gh-ij-kl"],
"albumartist": ["Artist 1 & 2"],
"musicbrainz_albumartistid":
["ab-cd-ef", "gh-ij-kl"]})
self.assertListEqual([(a.name, a.mbid) for a in tagger.artists],
[("Artist 1", "ab-cd-ef"),
("Artist 2", "gh-ij-kl")])
def test_medium(self):
"""Test that media information was tagged correctly."""
tagger = _Tags(self.file, {"discnumber": ["2"],
"discsubtitle": ["Subtitle"],
"media": ["CD"]})
self.assertEqual(tagger.medium.number, 2)
self.assertEqual(tagger.medium.name, "Subtitle")
self.assertEqual(tagger.medium.type, "CD")
def test_genre(self):
"""Test that genres can be tagged correctly."""
genre = "Genre 1, Genre 2 / Genre 3; Genre 4"
tagger = _Tags(self.file, {"genre": [genre]})
self.assertListEqual(tagger.genres, ["Genre 1", "Genre 2",
"Genre 3", "Genre 4"])
genre = ["Genre 1 / Genre 2", "Genre 3; Genre 4"]
reltype = ["album", "ep", "single", "compilation"]
tagger = _Tags(self.file, {"genre": genre, "releasetype": reltype})
self.assertListEqual(tagger.genres, ["EP", "Genre 1", "Genre 2",
"Genre 3", "Genre 4", "Single"])
def test_year(self):
"""Test the year property."""
tagger = _Tags(self.file, {"date": ["1988-06-17"]})
self.assertEqual(tagger.year, 1988)
@unittest.mock.patch("pathlib.Path.is_file")
class TestTagFile(unittest.TestCase):
"""Test case for the tag_file() function."""
def test_not_file(self, mock_is_file: unittest.mock.Mock):
"""Test calling tag_file() on something other than a file."""
path = pathlib.Path("/a/b/c")
mock_is_file.return_value = False
self.assertIsNone(emmental.audio.tagger.tag_file(path))
mock_is_file.assert_called()
@unittest.mock.patch("mutagen.File")
def test_no_tags(self, mock_mutagen_file: unittest.mock.Mock,
mock_is_file: unittest.mock.Mock):
"""Test calling tag_file() on a file that doesn't have tags."""
path = pathlib.Path("/a/b/c/notags.txt")
mock_is_file.return_value = True
mock_mutagen_file.return_value = None
self.assertIsNone(emmental.audio.tagger.tag_file(path))
mock_is_file.assert_called()
mock_mutagen_file.assert_called_with(path)
@unittest.mock.patch("mutagen.File")
def test_have_tags(self, mock_mutagen_file: unittest.mock.Mock,
mock_is_file: unittest.mock.Mock):
"""Test calling tag_file() successfully."""
path = pathlib.Path("/a/b/c/track.ogg")
mock_is_file.return_value = True
mock_mutagen_file.return_Value = dict()
self.assertIsInstance(emmental.audio.tagger.tag_file(path),
emmental.audio.tagger._Tags)
mock_is_file.assert_called()
mock_mutagen_file.assert_called_with(path)