emmental/emmental/audio/tagger.py

169 lines
5.4 KiB
Python

# Copyright 2023 (c) Anna Schumaker
"""Extract tags from an audio file."""
import dataclasses
import mutagen
import pathlib
import re
@dataclasses.dataclass
class _Artist:
"""Class for holding Artist-related tags."""
name: str
mbid: str
def __lt__(self, rhs) -> bool:
lhs = (self.name is not None, self.name, self.mbid)
return lhs < (rhs.name is not None, rhs.name, rhs.mbid)
@dataclasses.dataclass
class _Album:
"""Class for holding Album-related tags."""
name: str
mbid: str
artist: str
release: str
cover: pathlib.Path
artists: list[_Artist]
@dataclasses.dataclass
class _Medium:
"""Class for holding Medium-related tags."""
number: int
name: str
type: str
@dataclasses.dataclass
class _Track:
"""Class for holding Track-related tags."""
artist: str
length: int
mbid: int
mtime: float
number: int
title: str
class _Tags:
"""Extract tags found in the Mutagen tag dictionary."""
def __init__(self, file: pathlib.Path, tags: dict,
length: int = 0, mtime: float = 0.0):
"""Initialize the Tagger."""
self.file = file
self.tags = tags
self.artists = sorted(self.list_artists())
self.album = _Album(tags.get("album", [""])[0],
tags.get("musicbrainz_releasegroupid", [""])[0],
self.get_album_artist(),
self.get_release(),
file.parent / "cover.jpg",
sorted(self.list_album_artists()))
self.medium = _Medium(int(tags.get("discnumber", [1])[0]),
tags.get("discsubtitle", [""])[0],
tags.get("media", [""])[0])
self.track = _Track(tags.get("artist", [""])[0],
length,
tags.get("musicbrainz_releasetrackid", [""])[0],
mtime,
int(tags.get("tracknumber", [0])[0]),
tags.get("title", [""])[0])
self.genres = sorted(self.list_genres())
self.year = self.get_year()
def get_album_artist(self) -> str:
"""Find the album artist of the file."""
if (res := self.tags.get("albumartist")) is None:
res = self.tags.get("artist", [""])
return res[0]
def list_album_artists(self) -> list[_Artist]:
"""Find the list of album artists for the track."""
artists = self.tags.get("albumartist", [])
mbids = self.tags.get("musicbrainz_albumartistid", len(artists) * [""])
if len(artists) != len(mbids):
artists = [None] * len(mbids)
map = {a.mbid: a for a in self.artists}
map.update({(a.name, a.mbid): a for a in self.artists})
return [map.get(m, map.get((a, m))) for (a, m) in zip(artists, mbids)]
def list_artists(self) -> list[_Artist]:
"""Find the list of artists for the track."""
artists = self.tags.get("artists", [])
mbids = self.tags.get("musicbrainz_artistid", len(artists) * [""])
found = set()
need = set()
if len(artists) == 0 and len(mbids) == 0:
res = {(a, "") for a in self.tags.get("artist", [])}
elif len(artists) == len(mbids):
res = {(a, m) for (a, m) in zip(artists, mbids)}
found.update({m for m in mbids if len(m)})
else:
res = {(None, mbid) for mbid in mbids}
need.update({mbid for mbid in mbids if len(mbid)})
albumartists = self.tags.get("albumartist", [])
mbids = self.tags.get("musicbrainz_albumartistid",
len(albumartists) * [""])
if len(albumartists) == len(mbids):
res.update({(a, m) for (a, m) in zip(albumartists, mbids)})
found.update({m for m in mbids if len(m)})
else:
res.update({(None, mbid) for mbid in mbids})
need.update({mbid for mbid in mbids if len(mbid)})
res.difference_update({(None, mbid) for mbid in found & need})
return [_Artist(a, m) for (a, m) in list(res)]
def list_genres(self) -> list[str]:
"""Find the genres of the file."""
res = []
for genre in self.tags.get("genre", []):
res.extend([g.strip() for g in re.split("[,;/]", genre)])
for reltype in self.tags.get("releasetype", []):
match reltype:
case "album" | "compilation": continue
case "ep": res.append("EP")
case _: res.append(reltype.title())
return res
def get_release(self) -> str:
"""Find the release date of the file."""
if (res := self.tags.get("originaldate")) is None:
if (res := self.tags.get("originalyear")) is None:
if (res := self.tags.get("date")) is None:
res = self.tags.get("year", [""])
return res[0]
def get_year(self) -> int | None:
"""Find the year in the release string."""
if len(self.album.release):
return int(re.match(r"\d+", self.album.release).group(0))
def tag_file(file: pathlib.Path, mtime: float | None) -> _Tags | None:
"""Tag the requested file."""
if file.is_file():
file_mtime = file.stat().st_mtime
if mtime is None or file_mtime > mtime:
if (tags := mutagen.File(file)) is not None:
return _Tags(file, tags, tags.info.length, file_mtime)