From af07f45b2dbdb961ae6bb284f5677706bee87525 Mon Sep 17 00:00:00 2001 From: Henry Date: Thu, 1 Jan 2026 19:08:53 -0800 Subject: [PATCH 01/12] Refactor plugin, add type hints --- beetsplug/fromfilename.py | 129 +++++++++++++++++++------------------- 1 file changed, 65 insertions(+), 64 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index c3fb4bc6bf..ad3cd7d0f6 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -18,8 +18,11 @@ import os import re +import typing -from beets import plugins +from beets.importer import ImportSession, ImportTask +from beets.library import Item +from beets.plugins import BeetsPlugin from beets.util import displayable_path # Filename field extraction patterns. @@ -42,20 +45,22 @@ ] -def equal(seq): +def equal(seq: list[str]): """Determine whether a sequence holds identical elements.""" return len(set(seq)) <= 1 -def equal_fields(matchdict, field): +def equal_fields(matchdict: dict[typing.Any, dict[str, str]], field: str): """Do all items in `matchdict`, whose values are dictionaries, have the same value for `field`? (If they do, the field is probably not the title.) """ - return equal(m[field] for m in matchdict.values()) + return equal(list(m[field] for m in matchdict.values())) -def all_matches(names, pattern): +def all_matches( + names: dict[Item, str], pattern: str +) -> dict[Item, dict[str, str]] | None: """If all the filenames in the item/filename mapping match the pattern, return a dictionary mapping the items to dictionaries giving the value for each named subpattern in the match. Otherwise, @@ -74,7 +79,7 @@ def all_matches(names, pattern): return matches -def bad_title(title): +def bad_title(title: str) -> bool: """Determine whether a given title is "bad" (empty or otherwise meaningless) and in need of replacement. """ @@ -84,62 +89,12 @@ def bad_title(title): return False -def apply_matches(d, log): - """Given a mapping from items to field dicts, apply the fields to - the objects. - """ - some_map = list(d.values())[0] - keys = some_map.keys() - - # Only proceed if the "tag" field is equal across all filenames. - if "tag" in keys and not equal_fields(d, "tag"): - return - - # Given both an "artist" and "title" field, assume that one is - # *actually* the artist, which must be uniform, and use the other - # for the title. This, of course, won't work for VA albums. - # Only check for "artist": patterns containing it, also contain "title" - if "artist" in keys: - if equal_fields(d, "artist"): - artist = some_map["artist"] - title_field = "title" - elif equal_fields(d, "title"): - artist = some_map["title"] - title_field = "artist" - else: - # Both vary. Abort. - return - - for item in d: - if not item.artist: - item.artist = artist - log.info("Artist replaced with: {.artist}", item) - # otherwise, if the pattern contains "title", use that for title_field - elif "title" in keys: - title_field = "title" - else: - title_field = None - - # Apply the title and track, if any. - for item in d: - if title_field and bad_title(item.title): - item.title = str(d[item][title_field]) - log.info("Title replaced with: {.title}", item) - - if "track" in d[item] and item.track == 0: - item.track = int(d[item]["track"]) - log.info("Track replaced with: {.track}", item) - - -# Plugin structure and hook into import process. - - -class FromFilenamePlugin(plugins.BeetsPlugin): - def __init__(self): +class FromFilenamePlugin(BeetsPlugin): + def __init__(self) -> None: super().__init__() self.register_listener("import_task_start", self.filename_task) - def filename_task(self, task, session): + def filename_task(self, task: ImportTask, session: ImportSession) -> None: """Examine each item in the task to see if we can extract a title from the filename. Try to match all filenames to a number of regexps, starting with the most complex patterns and successively @@ -147,14 +102,15 @@ def filename_task(self, task, session): same regex we can make an educated guess of which part of the regex that contains the title. """ - items = task.items if task.is_album else [task.item] + # Create the list of items to process + items: list[Item] = task.items # Look for suspicious (empty or meaningless) titles. missing_titles = sum(bad_title(i.title) for i in items) if missing_titles: # Get the base filenames (no path or extension). - names = {} + names: dict[Item, str] = {} for item in items: path = displayable_path(item.path) name, _ = os.path.splitext(os.path.basename(path)) @@ -163,6 +119,51 @@ def filename_task(self, task, session): # Look for useful information in the filenames. for pattern in PATTERNS: self._log.debug(f"Trying pattern: {pattern}") - d = all_matches(names, pattern) - if d: - apply_matches(d, self._log) + if d := all_matches(names, pattern): + self._apply_matches(d) + + def _apply_matches(self, d: dict[Item, dict[str, str]]) -> None: + """Given a mapping from items to field dicts, apply the fields to + the objects. + """ + some_map = list(d.values())[0] + keys = some_map.keys() + + # Only proceed if the "tag" field is equal across all filenames. + if "tag" in keys and not equal_fields(d, "tag"): + return + + # Given both an "artist" and "title" field, assume that one is + # *actually* the artist, which must be uniform, and use the other + # for the title. This, of course, won't work for VA albums. + # Only check for "artist": patterns containing it, also contain "title" + if "artist" in keys: + if equal_fields(d, "artist"): + artist = some_map["artist"] + title_field = "title" + elif equal_fields(d, "title"): + artist = some_map["title"] + title_field = "artist" + else: + # Both vary. Abort. + return + + for item in d: + if not item.artist: + item.artist = artist + self._log.info(f"Artist replaced with: {item.artist}") + # otherwise, if the pattern contains "title", use that for title_field + elif "title" in keys: + title_field = "title" + else: + title_field = None + + # Apply the title and track, if any. + for item in d: + if title_field and bad_title(item.title): + item.title = str(d[item][title_field]) + self._log.info(f"Title replaced with: {item.title}") + + if "track" in d[item] and item.track == 0: + item.track = int(d[item]["track"]) + self._log.info(f"Track replaced with: {item.track}") From 683786a09f313db5b1cea77bfaf2a349481b6f43 Mon Sep 17 00:00:00 2001 From: Henry Date: Thu, 1 Jan 2026 23:44:46 -0800 Subject: [PATCH 02/12] Rewrite and extend regex, extend bad titles --- beetsplug/fromfilename.py | 84 +++++++++++++++++++++---------- test/plugins/test_fromfilename.py | 81 +++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 27 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index ad3cd7d0f6..ec250a471d 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -25,24 +25,35 @@ from beets.plugins import BeetsPlugin from beets.util import displayable_path -# Filename field extraction patterns. -PATTERNS = [ - # Useful patterns. - ( - r"^(?P\d+)\.?\s*-\s*(?P.+?)\s*-\s*(?P.+?)" - r"(\s*-\s*(?P<tag>.*))?$" - ), - r"^(?P<artist>.+?)\s*-\s*(?P<title>.+?)(\s*-\s*(?P<tag>.*))?$", - r"^(?P<track>\d+)\.?[\s_-]+(?P<title>.+)$", - r"^(?P<title>.+) by (?P<artist>.+)$", - r"^(?P<track>\d+).*$", - r"^(?P<title>.+)$", -] - -# Titles considered "empty" and in need of replacement. -BAD_TITLE_PATTERNS = [ - r"^$", -] +# Filename field extraction patterns +RE_TRACK_INFO = re.compile( + r""" + (?P<disc>\d+(?=[\.\-_]\d))? + # a disc must be followed by punctuation and a digit + [\.\-]{,1} + # disc punctuation + (?P<track>\d+)? + # match the track number + [\.\-_\s]* + # artist separators + (?P<artist>.+?(?=[\s*_]?[\.\-by].+))? + # artist match depends on title existing + [\.\-_\s]* + (?P<by>by)? + # if 'by' is found, artist and title will need to be swapped + [\.\-_\s]* + # title separators + (?P<title>.+)? + # match the track title + """, + re.VERBOSE | re.IGNORECASE, +) + +# Match the disc names of parent folders +RE_DISC = re.compile(r"((?:cd|disc)\s*\d+)", re.IGNORECASE) + +# Matches fields that are empty or only whitespace +RE_BAD_TITLE = re.compile(r"^\s*$") def equal(seq: list[str]): @@ -83,9 +94,8 @@ def bad_title(title: str) -> bool: """Determine whether a given title is "bad" (empty or otherwise meaningless) and in need of replacement. """ - for pat in BAD_TITLE_PATTERNS: - if re.match(pat, title, re.IGNORECASE): - return True + if RE_BAD_TITLE.match(title): + return True return False @@ -117,10 +127,29 @@ def filename_task(self, task: ImportTask, session: ImportSession) -> None: names[item] = name # Look for useful information in the filenames. - for pattern in PATTERNS: - self._log.debug(f"Trying pattern: {pattern}") - if d := all_matches(names, pattern): - self._apply_matches(d) + matches: dict[Item, dict[str, str]] = {} + for item, name in names.items(): + m = self.parse_track_info(name) + matches[item] = m + self._apply_matches(matches) + + def parse_track_info(self, text: str) -> dict[str, str]: + m = RE_TRACK_INFO.match(text) + matches = m.groupdict() + # if the phrase "by" is matched, swap + # artist and title + if matches["by"]: + artist = matches["title"] + matches["title"] = matches["artist"] + matches["artist"] = artist + del matches["by"] + # if all fields except track are none + # set title to track - we can't be sure if it's the + # index or track number + if set(matches.values()) == {None, matches["track"]}: + matches["title"] = matches["track"] + + return matches def _apply_matches(self, d: dict[Item, dict[str, str]]) -> None: """Given a mapping from items to field dicts, apply the fields to @@ -149,7 +178,7 @@ def _apply_matches(self, d: dict[Item, dict[str, str]]) -> None: return for item in d: - if not item.artist: + if not item.artist and artist: item.artist = artist self._log.info(f"Artist replaced with: {item.artist}") # otherwise, if the pattern contains "title", use that for title_field @@ -165,5 +194,6 @@ def _apply_matches(self, d: dict[Item, dict[str, str]]) -> None: self._log.info(f"Title replaced with: {item.title}") if "track" in d[item] and item.track == 0: - item.track = int(d[item]["track"]) + if d[item]["track"]: + item.track = int(d[item]["track"]) self._log.info(f"Track replaced with: {item.track}") diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index f13e88aea2..5f173a8f5f 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -36,6 +36,73 @@ def __init__(self, items): self.is_album = True +@pytest.mark.parametrize( + "text,matchgroup", + [ + ("3", {"disc": None, "track": "3", "artist": None, "title": "3"}), + ("04", {"disc": None, "track": "04", "artist": None, "title": "04"}), + ("6.", {"disc": None, "track": "6", "artist": None, "title": "6"}), + ("3.5", {"disc": "3", "track": "5", "artist": None, "title": None}), + ("1-02", {"disc": "1", "track": "02", "artist": None, "title": None}), + ("100-4", {"disc": "100", "track": "4", "artist": None, "title": None}), + ( + "04.Title", + {"disc": None, "track": "04", "artist": None, "title": "Title"}, + ), + ( + "5_-_Title", + {"disc": None, "track": "5", "artist": None, "title": "Title"}, + ), + ( + "1-02 Title", + {"disc": "1", "track": "02", "artist": None, "title": "Title"}, + ), + ( + "3.5 - Title", + {"disc": "3", "track": "5", "artist": None, "title": "Title"}, + ), + ( + "5_-_Artist_-_Title", + {"disc": None, "track": "5", "artist": "Artist", "title": "Title"}, + ), + ( + "3-8- Artist-Title", + {"disc": "3", "track": "8", "artist": "Artist", "title": "Title"}, + ), + ( + "4-3 - Artist Name - Title", + { + "disc": "4", + "track": "3", + "artist": "Artist Name", + "title": "Title", + }, + ), + ( + "4-3_-_Artist_Name_-_Title", + { + "disc": "4", + "track": "3", + "artist": "Artist_Name", + "title": "Title", + }, + ), + ( + "6 Title by Artist", + {"disc": None, "track": "6", "artist": "Artist", "title": "Title"}, + ), + ( + "Title", + {"disc": None, "track": None, "artist": None, "title": "Title"}, + ), + ], +) +def test_parse_track_info(text, matchgroup): + f = fromfilename.FromFilenamePlugin() + m = f.parse_track_info(text) + assert matchgroup == m + + @pytest.mark.parametrize( "song1, song2", [ @@ -53,6 +120,20 @@ def __init__(self, items): "Song Two", ), ), + ( + ( + "/tmp/01 The Artist - Song One.m4a", + 1, + "The Artist", + "Song One", + ), + ( + "/tmp/02 The Artist - Song Two.m4a", + 2, + "The Artist", + "Song Two", + ), + ), ( ("/tmp/01-The_Artist-Song_One.m4a", 1, "The_Artist", "Song_One"), ("/tmp/02.-The_Artist-Song_Two.m4a", 2, "The_Artist", "Song_Two"), From a7a5e1e12ab31ceef0bf12fd5c0e7900147603a8 Mon Sep 17 00:00:00 2001 From: Henry <henryoberholtzer@gmail.com> Date: Sat, 3 Jan 2026 22:30:53 -0800 Subject: [PATCH 03/12] Add album fields parsing, refactored tag updating, expanded testing. --- beetsplug/fromfilename.py | 418 ++++++++++++++++++++++++------ docs/plugins/fromfilename.rst | 68 ++++- test/plugins/test_fromfilename.py | 335 +++++++++++++++++++----- 3 files changed, 666 insertions(+), 155 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index ec250a471d..897433aa8d 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -16,10 +16,15 @@ (possibly also extract track and artist) """ -import os import re -import typing +from datetime import datetime +from functools import cached_property +from pathlib import Path +from typing import Any, TypedDict +from typing_extensions import NotRequired + +from beets import config from beets.importer import ImportSession, ImportTask from beets.library import Item from beets.plugins import BeetsPlugin @@ -28,24 +33,41 @@ # Filename field extraction patterns RE_TRACK_INFO = re.compile( r""" - (?P<disc>\d+(?=[\.\-_]\d))? - # a disc must be followed by punctuation and a digit - [\.\-]{,1} - # disc punctuation - (?P<track>\d+)? - # match the track number - [\.\-_\s]* - # artist separators - (?P<artist>.+?(?=[\s*_]?[\.\-by].+))? - # artist match depends on title existing - [\.\-_\s]* - (?P<by>by)? - # if 'by' is found, artist and title will need to be swapped - [\.\-_\s]* - # title separators - (?P<title>.+)? - # match the track title - """, + (?P<disc>\d+(?=[\.\-_]\d))? + # a disc must be followed by punctuation and a digit + [\.\-]{,1} + # disc punctuation + (?P<track>\d+)? + # match the track number + [\.\-_\s]* + # artist separators + (?P<artist>.+?(?=[\s*_]?[\.\-by].+))? + # artist match depends on title existing + [\.\-_\s]* + (?P<by>by)? + # if 'by' is found, artist and title will need to be swapped + [\.\-_\s]* + # title separators + (?P<title>.+)? + # match the track title + """, + re.VERBOSE | re.IGNORECASE, +) + +# Catalog number extraction pattern +RE_CATALOGNUM = re.compile( + r""" + [\(\[\{] + # starts with a bracket + (?!flac|mp3|wav) + # does not start with file format + (?P<catalognum>[\w\s]+) + # actual catalog number + (?<!flac|.mp3|.wav) + # does not end with file format + [\)\]\}] + # ends with a bracker + """, re.VERBOSE | re.IGNORECASE, ) @@ -53,25 +75,72 @@ RE_DISC = re.compile(r"((?:cd|disc)\s*\d+)", re.IGNORECASE) # Matches fields that are empty or only whitespace -RE_BAD_TITLE = re.compile(r"^\s*$") +RE_BAD_FIELD = re.compile(r"^\s*$") + +# First priority for matching a year is a year surrounded +# by brackets, dashes, or punctuation +RE_YEAR_BRACKETED = re.compile( + r"[\(\[\{\-\_]\s*(?P<year>\d{4}).*?[\)\]\}\-\_,]" +) + +# Look for a year at the start +RE_YEAR_START = re.compile(r"^(?P<year>\d{4})") + +# Look for a year at the end +RE_YEAR_END = re.compile(r"$(?P<year>\d{4})") + +# Just look for four digits +RE_YEAR_ANY = re.compile(r"(?P<year>\d{4})") + +# All year regexp in order of preference +YEAR_REGEX = [RE_YEAR_BRACKETED, RE_YEAR_START, RE_YEAR_END, RE_YEAR_ANY] + +RE_MEDIA = re.compile( + r""" + [\(\[\{].*? + ((?P<vinyl>vinyl)| + (?P<cd>cd)| + (?P<web>web)| + (?P<cassette>cassette)) + .*?[\)\]\}] + """, + re.VERBOSE | re.IGNORECASE, +) + +RE_VARIOUS = re.compile(r"va(rious)?(\sartists)?", re.IGNORECASE) + +RE_SPLIT = re.compile(r"[\-\_]+") + +RE_BRACKETS = re.compile(r"[\(\[\{].*?[\)\]\}]") -def equal(seq: list[str]): - """Determine whether a sequence holds identical elements.""" - return len(set(seq)) <= 1 +class TrackMatches(TypedDict): + disc: str | None + track: str | None + by: NotRequired[str | None] + artist: str | None + title: str | None -def equal_fields(matchdict: dict[typing.Any, dict[str, str]], field: str): +class AlbumMatches(TypedDict): + albumartist: str | None + album: str | None + year: str | None + catalognum: str | None + media: str | None + + +def equal_fields(matchdict: dict[Any, TrackMatches], field: str) -> bool: """Do all items in `matchdict`, whose values are dictionaries, have the same value for `field`? (If they do, the field is probably not the title.) """ - return equal(list(m[field] for m in matchdict.values())) + return len(set(m[field] for m in matchdict.values())) <= 1 def all_matches( names: dict[Item, str], pattern: str -) -> dict[Item, dict[str, str]] | None: +) -> dict[Item, TrackMatches] | None: """If all the filenames in the item/filename mapping match the pattern, return a dictionary mapping the items to dictionaries giving the value for each named subpattern in the match. Otherwise, @@ -90,20 +159,32 @@ def all_matches( return matches -def bad_title(title: str) -> bool: - """Determine whether a given title is "bad" (empty or otherwise - meaningless) and in need of replacement. - """ - if RE_BAD_TITLE.match(title): - return True - return False - - class FromFilenamePlugin(BeetsPlugin): def __init__(self) -> None: super().__init__() + self.config.add( + { + "fields": [ + "disc", + "track", + "title", + "artist", + "albumartist", + "media", + "catalognum", + ] + } + ) self.register_listener("import_task_start", self.filename_task) + @cached_property + def current_year(self) -> int: + return datetime.now().year + + @cached_property + def fields(self) -> set[str]: + return set(self.config["fields"].as_str_seq()) + def filename_task(self, task: ImportTask, session: ImportSession) -> None: """Examine each item in the task to see if we can extract a title from the filename. Try to match all filenames to a number of @@ -113,87 +194,252 @@ def filename_task(self, task: ImportTask, session: ImportSession) -> None: regex that contains the title. """ # Create the list of items to process + + # TODO: If it's a singleton import task, use the .item field items: list[Item] = task.items + # TODO: Switch this to gather data anyway, but only + # update where missing # Look for suspicious (empty or meaningless) titles. - missing_titles = sum(bad_title(i.title) for i in items) + missing_titles = sum(self._bad_field(i.title) for i in items) if missing_titles: # Get the base filenames (no path or extension). + parent_path: str = "" names: dict[Item, str] = {} for item in items: - path = displayable_path(item.path) - name, _ = os.path.splitext(os.path.basename(path)) + path: Path = Path(displayable_path(item.path)) + name = path.stem names[item] = name + if not parent_path: + parent_path = path.parent.stem + self._log.debug(f"Parent Path: {parent_path}") + album_matches: AlbumMatches = self.parse_album_info(parent_path) + self._log.debug(album_matches) # Look for useful information in the filenames. - matches: dict[Item, dict[str, str]] = {} + track_matches: dict[Item, TrackMatches] = {} for item, name in names.items(): m = self.parse_track_info(name) - matches[item] = m - self._apply_matches(matches) + track_matches[item] = m + self._apply_matches(album_matches, track_matches) - def parse_track_info(self, text: str) -> dict[str, str]: + def parse_track_info(self, text: str) -> TrackMatches: m = RE_TRACK_INFO.match(text) - matches = m.groupdict() + matches: TrackMatches = m.groupdict() # if the phrase "by" is matched, swap # artist and title if matches["by"]: artist = matches["title"] matches["title"] = matches["artist"] matches["artist"] = artist + # remove that key del matches["by"] - # if all fields except track are none - # set title to track - we can't be sure if it's the - # index or track number + # if all fields except `track` are none + # set title to track number as well + # we can't be sure if it's actually the track number + # or track title if set(matches.values()) == {None, matches["track"]}: matches["title"] = matches["track"] return matches - def _apply_matches(self, d: dict[Item, dict[str, str]]) -> None: - """Given a mapping from items to field dicts, apply the fields to - the objects. + def parse_album_info(self, text: str) -> AlbumMatches: + matches: AlbumMatches = { + "albumartist": None, + "album": None, + "year": None, + "catalognum": None, + "media": None, + } + # Start with the extra fields to make parsing + # the album artist and artist field easier + year, span = self._parse_year(text) + if year: + # Remove it from the string if found + text = self._mutate_string(text, span) + matches["year"] = year + + # Look for the catalog number, it must be in brackets + # It will not contain the filetype, flac, mp3, wav, etc + catalognum, span = self._parse_catalognum(text) + if catalognum: + text = self._mutate_string(text, span) + matches["catalognum"] = catalognum + # Look for a media type + media, span = self._parse_media(text) + if media: + text = self._mutate_string(text, span) + matches["media"] = media + + # Remove anything left within brackets + brackets = RE_BRACKETS.search(text) + while brackets: + span = brackets.span() + text = self._mutate_string(text, span) + brackets = RE_BRACKETS.search(text) + # Remaining text used for album, albumartist + album, albumartist = self._parse_album_and_albumartist(text) + matches["album"] = album + matches["albumartist"] = albumartist + + return matches + + def _parse_album_and_albumartist( + self, text + ) -> tuple[str | None, str | None]: + """Takes the remaining string and splits it along common dividers. + Assumes the first field to be the albumartist and the last field to be the + album. Checks against various artist fields. + """ + possible_albumartist = None + possible_album = None + # What is left we can assume to contain the title and artist + remaining = [ + f for field in RE_SPLIT.split(text) if (f := field.strip()) + ] + if remaining: + # If two fields remain, assume artist and album artist + if len(remaining) == 2: + possible_albumartist = remaining[0] + possible_album = remaining[1] + # Look for known album artists + # VA, Various, Vartious Artists will all result in + # using the beets VA default for album artist name + # assume the artist comes before the title in most situations + if RE_VARIOUS.match(possible_album): + possible_album = possible_albumartist + possible_albumartist = config["va_name"].as_str() + elif RE_VARIOUS.match(possible_albumartist): + possible_albumartist = config["va_name"].as_str() + else: + # If one field remains, assume album title + possible_album = remaining[0].strip() + return possible_album, possible_albumartist + + def _parse_year(self, text: str) -> tuple[str | None, tuple[int, int]]: + """The year will be a four digit number. The search goes + through a list of ordered patterns to try and find the year. + To be a valid year, it must be less than the current year. + """ + year = None + span = (0, 0) + for exp in YEAR_REGEX: + match = exp.search(text) + if not match: + continue + year_candidate = match.group("year") + # If the year is matched and not in the future + if year_candidate and int(year_candidate) <= self.current_year: + year = year_candidate + span = match.span() + break + return year, span + + def _parse_media(self, text: str) -> tuple[str | None, tuple[int, int]]: + """Look for the media type, we are only interested in a few common + types - CD, Vinyl, Cassette or WEB. To avoid overreach, in the + case of titles containing a medium, only searches for media types + within a pair of brackets. """ - some_map = list(d.values())[0] - keys = some_map.keys() + mappings = { + "cd": "CD", + "vinyl": "Vinyl", + "web": "Digital Media", + "cassette": "Cassette", + } + match = RE_MEDIA.search(text) + if match: + media = None + for key, value in match.groupdict().items(): + if value: + media = mappings[key] + return media, match.span() + return None, (0, 0) - # Only proceed if the "tag" field is equal across all filenames. - if "tag" in keys and not equal_fields(d, "tag"): - return + def _parse_catalognum( + self, text: str + ) -> tuple[str | None, tuple[int, int]]: + match = RE_CATALOGNUM.search(text) + # assert that it cannot be mistaken for a media type + if match and not RE_MEDIA.match(match[0]): + return match.group("catalognum"), match.span() + return None, (0, 0) + + def _mutate_string(self, text, span: tuple[int, int]) -> str: + """Replace a matched field with a seperator""" + start, end = span + text = text[:start] + "-" + text[end:] + return text + + def _sanity_check_matches( + self, album_match: AlbumMatches, track_matches: dict[Item, TrackMatches] + ) -> None: + """Check to make sure data is coherent between + track and album matches. Largely looking to see + if the arist and album artist fields are properly + identified. + """ + # If the album artist is not various artists + # check that all artists, if any, match + # if they do not, try seeing if all the titles match + # if all the titles match, swap title and artist fields + + # If the suspected title and albumartist fields are not equal + # we have ruled out a self titled album + # Check if the suspected title appears in the track artists + # If so, we should swap the title and albumartist in albummatches + + # If any track title is the same as the album artist + # some_map = list(track_matches.values())[0] + # keys = some_map.keys() # Given both an "artist" and "title" field, assume that one is # *actually* the artist, which must be uniform, and use the other # for the title. This, of course, won't work for VA albums. # Only check for "artist": patterns containing it, also contain "title" - if "artist" in keys: - if equal_fields(d, "artist"): - artist = some_map["artist"] - title_field = "title" - elif equal_fields(d, "title"): - artist = some_map["title"] - title_field = "artist" - else: - # Both vary. Abort. - return - - for item in d: - if not item.artist and artist: - item.artist = artist - self._log.info(f"Artist replaced with: {item.artist}") - # otherwise, if the pattern contains "title", use that for title_field - elif "title" in keys: - title_field = "title" - else: - title_field = None - - # Apply the title and track, if any. - for item in d: - if title_field and bad_title(item.title): - item.title = str(d[item][title_field]) - self._log.info(f"Title replaced with: {item.title}") - - if "track" in d[item] and item.track == 0: - if d[item]["track"]: - item.track = int(d[item]["track"]) - self._log.info(f"Track replaced with: {item.track}") + # if "artist" in keys: + # if equal_fields(track_matches, "artist"): + # artist = some_map["artist"] + # title_field = "title" + # elif equal_fields(track_matches, "title"): + # artist = some_map["title"] + # title_field = "artist" + # else: + # # Both vary. Abort. + # return + # + # for item in track_matches: + # if not item.artist and artist: + # item.artist = artist + # self._log.info(f"Artist replaced with: {item.artist}") + # # otherwise, if the pattern contains "title", use that for title_field + + return + + def _apply_matches( + self, album_match: AlbumMatches, track_matches: dict[Item, TrackMatches] + ) -> None: + """Apply all valid matched fields to all items in the match dictionary.""" + match = album_match + for item in track_matches: + match.update(track_matches[item]) + found_data: dict[str, int | str] = {} + self._log.debug(f"Attempting keys: {match.keys()}") + for key in match.keys(): + if key in self.fields: + old_value = item.get(key) + new_value = match[key] + if self._bad_field(old_value) and new_value: + found_data[key] = new_value + self._log.info(f"Item updated with: {found_data.values()}") + item.update(found_data) + + @staticmethod + def _bad_field(field: str | int) -> bool: + """Determine whether a given title is "bad" (empty or otherwise + meaningless) and in need of replacement. + """ + if isinstance(field, int): + return True if field <= 0 else False + return True if RE_BAD_FIELD.match(field) else False diff --git a/docs/plugins/fromfilename.rst b/docs/plugins/fromfilename.rst index e78677b86c..1e8d0cc7dd 100644 --- a/docs/plugins/fromfilename.rst +++ b/docs/plugins/fromfilename.rst @@ -5,8 +5,72 @@ The ``fromfilename`` plugin helps to tag albums that are missing tags altogether but where the filenames contain useful information like the artist and title. When you attempt to import a track that's missing a title, this plugin will look -at the track's filename and guess its track number, title, and artist. These -will be used to search in MusicBrainz and match track ordering. +at the track's filename and guess its disc, track number, title, and artist. +These will be used to search for metadata and match track ordering. To use the ``fromfilename`` plugin, enable it in your configuration (see :ref:`using-plugins`). + +Configuration +------------- + +Configuration for ``fromfilename`` allows you to choose what fields the plugin +attempts to contribute to files missing information. + +Default +~~~~~~~ + +.. code-block:: yaml + + fromfilename: + fields: + - artist + - album + - albumartist + - catalognum + - disc + - media + - title + - track + +Recognized Patterns +------------------- + +Examples of paths that the plugin can parse successfully, and the fields +retrieved. + +.. code-block:: yaml + + "/Artist - Album (2025)/03.wav" + album: Album + albumartist: Artist + title: "03" + track: 3 + + "/[CAT123] Album - Various [WEB-FLAC]/2-10 - Artist - Song One.flac" + artist: Artist + album: Album + albumartist: Various Artists + catalognum: CAT123 + disc: 2 + media: Digital Media + title: Song One + track: 10 + + "/1-23.flac" + disc: 1 + track: 23 + + "/04. Song.mp3" + title: Song + track: 4 + + "/5_-_My_Artist_-_My_Title.m4a" + artist: My_Artist + title: My_Title + track: 5 + + "/8 Song by Artist.wav" + artist: Artist + title: Song + track: 8 diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index 5f173a8f5f..f887f13bb1 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -13,8 +13,12 @@ """Tests for the fromfilename plugin.""" +from dataclasses import dataclass + import pytest +from beets.library import Item +from beets.test.helper import ConfigMixin from beetsplug import fromfilename @@ -22,18 +26,25 @@ class Session: pass -class Item: - def __init__(self, path): - self.path = path - self.track = 0 - self.artist = "" - self.title = "" +def mock_item(**kwargs): + defaults = dict( + title="", + artist="", + albumartist="", + album="", + disc=0, + track=0, + catalognum="", + media="", + mtime=12345, + ) + return Item(**{**defaults, **kwargs}) +@dataclass class Task: - def __init__(self, items): - self.items = items - self.is_album = True + items: list[Item] + is_album: bool = True @pytest.mark.parametrize( @@ -104,77 +115,267 @@ def test_parse_track_info(text, matchgroup): @pytest.mark.parametrize( - "song1, song2", + "text,matchgroup", [ ( - ( - "/tmp/01 - The Artist - Song One.m4a", - 1, - "The Artist", - "Song One", - ), - ( - "/tmp/02. - The Artist - Song Two.m4a", - 2, - "The Artist", - "Song Two", - ), + # highly unlikely + "", + { + "albumartist": None, + "album": None, + "year": None, + "catalognum": None, + "media": None, + }, ), ( - ( - "/tmp/01 The Artist - Song One.m4a", - 1, - "The Artist", - "Song One", - ), - ( - "/tmp/02 The Artist - Song Two.m4a", - 2, - "The Artist", - "Song Two", - ), + "1970", + { + "albumartist": None, + "album": None, + "year": "1970", + "catalognum": None, + "media": None, + }, + ), + ( + "Album Title", + { + "albumartist": None, + "album": "Album Title", + "year": None, + "catalognum": None, + "media": None, + }, + ), + ( + "Artist - Album Title", + { + "albumartist": "Artist", + "album": "Album Title", + "year": None, + "catalognum": None, + "media": None, + }, + ), + ( + "Artist - Album Title (2024)", + { + "albumartist": "Artist", + "album": "Album Title", + "year": "2024", + "catalognum": None, + "media": None, + }, + ), + ( + "Artist - 2024 - Album Title [flac]", + { + "albumartist": "Artist", + "album": "Album Title", + "year": "2024", + "catalognum": None, + "media": None, + }, + ), + ( + "(2024) Album Title [CATALOGNUM] WEB", + # sometimes things are just going to be unparsable + { + "albumartist": "Album Title", + "album": "WEB", + "year": "2024", + "catalognum": "CATALOGNUM", + "media": None, + }, + ), + ( + "{2024} Album Artist - Album Title [INFO-WAV]", + { + "albumartist": "Album Artist", + "album": "Album Title", + "year": "2024", + "catalognum": None, + "media": None, + }, ), ( - ("/tmp/01-The_Artist-Song_One.m4a", 1, "The_Artist", "Song_One"), - ("/tmp/02.-The_Artist-Song_Two.m4a", 2, "The_Artist", "Song_Two"), + "VA - Album Title [2025] [CD-FLAC]", + { + "albumartist": "Various Artists", + "album": "Album Title", + "year": "2025", + "catalognum": None, + "media": "CD", + }, ), ( - ("/tmp/01 - Song_One.m4a", 1, "", "Song_One"), - ("/tmp/02. - Song_Two.m4a", 2, "", "Song_Two"), + "Artist - Album Title 3000 (1998) [FLAC] {CATALOGNUM}", + { + "albumartist": "Artist", + "album": "Album Title 3000", + "year": "1998", + "catalognum": "CATALOGNUM", + "media": None, + }, ), ( - ("/tmp/Song One by The Artist.m4a", 0, "The Artist", "Song One"), - ("/tmp/Song Two by The Artist.m4a", 0, "The Artist", "Song Two"), + "various - cd album (2023) [catalognum 123] {vinyl mp3}", + { + "albumartist": "Various Artists", + "album": "cd album", + "year": "2023", + "catalognum": "catalognum 123", + "media": "Vinyl", + }, ), - (("/tmp/01.m4a", 1, "", "01"), ("/tmp/02.m4a", 2, "", "02")), ( - ("/tmp/Song One.m4a", 0, "", "Song One"), - ("/tmp/Song Two.m4a", 0, "", "Song Two"), + "[CATALOG567] Album - Various (2020) [WEB-FLAC]", + { + "albumartist": "Various Artists", + "album": "Album", + "year": "2020", + "catalognum": "CATALOG567", + "media": "Digital Media", + }, + ), + ( + "Album 3000 {web}", + { + "albumartist": None, + "album": "Album 3000", + "year": None, + "catalognum": None, + "media": "Digital Media", + }, ), ], ) -def test_fromfilename(song1, song2): - """ - Each "song" is a tuple of path, expected track number, expected artist, - expected title. - - We use two songs for each test for two reasons: - - The plugin needs more than one item to look for uniform strings in paths - in order to guess if the string describes an artist or a title. - - Sometimes we allow for an optional "." after the track number in paths. - """ - - session = Session() - item1 = Item(song1[0]) - item2 = Item(song2[0]) - task = Task([item1, item2]) - +def test_parse_album_info(text, matchgroup): f = fromfilename.FromFilenamePlugin() - f.filename_task(task, session) - - assert task.items[0].track == song1[1] - assert task.items[0].artist == song1[2] - assert task.items[0].title == song1[3] - assert task.items[1].track == song2[1] - assert task.items[1].artist == song2[2] - assert task.items[1].title == song2[3] + m = f.parse_album_info(text) + assert matchgroup == m + + +class TestFromFilename(ConfigMixin): + @pytest.mark.parametrize( + "expected_item", + [ + mock_item( + path="/tmp/01 - The Artist - Song One.m4a", + artist="The Artist", + track=1, + title="Song One", + ), + mock_item( + path="/tmp/01 The Artist - Song One.m4a", + artist="The Artist", + track=1, + title="Song One", + ), + mock_item( + path="/tmp/02 The Artist - Song Two.m4a", + artist="The Artist", + track=2, + title="Song Two", + ), + mock_item( + path="/tmp/01-The_Artist-Song_One.m4a", + artist="The_Artist", + track=1, + title="Song_One", + ), + mock_item( + path="/tmp/02.-The_Artist-Song_Two.m4a", + artist="The_Artist", + track=2, + title="Song_Two", + ), + mock_item( + path="/tmp/01 - Song_One.m4a", + track=1, + title="Song_One", + ), + mock_item( + path="/tmp/02. - Song_Two.m4a", + track=2, + title="Song_Two", + ), + mock_item( + path="/tmp/Song One by The Artist.m4a", + artist="The Artist", + title="Song One", + ), + mock_item( + path="/tmp/Song Two by The Artist.m4a", + artist="The Artist", + title="Song Two", + ), + mock_item( + path="/tmp/01.m4a", + track=1, + title="01", + ), + mock_item( + path="/tmp/02.m4a", + track=2, + title="02", + ), + mock_item( + path="/tmp/Song One.m4a", + title="Song One", + ), + mock_item( + path="/tmp/Song Two.m4a", + title="Song Two", + ), + mock_item( + path=( + "/tmp/" + "[CATALOG567] Album - Various - [WEB-FLAC]" + "/2-10 - Artist - Song One.m4a" + ), + album="Album", + artist="Artist", + track=10, + disc=2, + albumartist="Various Artists", + catalognum="CATALOG567", + title="Song One", + media="Digital Media", + ), + mock_item( + path=( + "/tmp/" + "[CATALOG567] Album - Various - [WEB-FLAC]" + "/03-04 - Other Artist - Song Two.m4a" + ), + album="Album", + artist="Other Artist", + disc=3, + track=4, + albumartist="Various Artists", + catalognum="CATALOG567", + title="Song Two", + media="Digital Media", + ), + ], + ) + def test_fromfilename(self, expected_item): + """ + Take expected items, create a task with just the paths. + + After parsing, compare to the original with the expected attributes defined. + """ + task = Task([mock_item(path=expected_item.path)]) + f = fromfilename.FromFilenamePlugin() + f.filename_task(task, Session()) + res = task.items[0] + exp = expected_item + assert res.path == exp.path + assert res.artist == exp.artist + assert res.albumartist == exp.albumartist + assert res.disc == exp.disc + assert res.catalognum == exp.catalognum + assert res.year == exp.year + assert res.title == exp.title From 3e4eedbbc1a28d82d89d565afbef64968e863439 Mon Sep 17 00:00:00 2001 From: Henry <henryoberholtzer@gmail.com> Date: Mon, 5 Jan 2026 21:06:00 -0800 Subject: [PATCH 04/12] Add configurable field test, multi-item test, sanity check --- beetsplug/fromfilename.py | 267 +++++++++++++++------------ docs/plugins/fromfilename.rst | 64 ++++++- test/plugins/test_fromfilename.py | 290 +++++++++++++++++++++++++++++- 3 files changed, 496 insertions(+), 125 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index 897433aa8d..8c905d486d 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -20,12 +20,12 @@ from datetime import datetime from functools import cached_property from pathlib import Path -from typing import Any, TypedDict +from typing import TypedDict from typing_extensions import NotRequired from beets import config -from beets.importer import ImportSession, ImportTask +from beets.importer import ImportSession, ImportTask, SingletonImportTask from beets.library import Item from beets.plugins import BeetsPlugin from beets.util import displayable_path @@ -41,7 +41,7 @@ # match the track number [\.\-_\s]* # artist separators - (?P<artist>.+?(?=[\s*_]?[\.\-by].+))? + (?P<artist>.+?(?=[\s_]*?[\.\-]|by.+))? # artist match depends on title existing [\.\-_\s]* (?P<by>by)? @@ -66,13 +66,12 @@ (?<!flac|.mp3|.wav) # does not end with file format [\)\]\}] - # ends with a bracker + # ends with a bracket """, re.VERBOSE | re.IGNORECASE, ) -# Match the disc names of parent folders -RE_DISC = re.compile(r"((?:cd|disc)\s*\d+)", re.IGNORECASE) +RE_NAMED_SUBGROUP = re.compile(r"\(\?P\<\w+\>") # Matches fields that are empty or only whitespace RE_BAD_FIELD = re.compile(r"^\s*$") @@ -95,7 +94,7 @@ # All year regexp in order of preference YEAR_REGEX = [RE_YEAR_BRACKETED, RE_YEAR_START, RE_YEAR_END, RE_YEAR_ANY] -RE_MEDIA = re.compile( +RE_MEDIA_TYPE = re.compile( r""" [\(\[\{].*? ((?P<vinyl>vinyl)| @@ -130,61 +129,64 @@ class AlbumMatches(TypedDict): media: str | None -def equal_fields(matchdict: dict[Any, TrackMatches], field: str) -> bool: - """Do all items in `matchdict`, whose values are dictionaries, have - the same value for `field`? (If they do, the field is probably not - the title.) - """ - return len(set(m[field] for m in matchdict.values())) <= 1 - - -def all_matches( - names: dict[Item, str], pattern: str -) -> dict[Item, TrackMatches] | None: - """If all the filenames in the item/filename mapping match the - pattern, return a dictionary mapping the items to dictionaries - giving the value for each named subpattern in the match. Otherwise, - return None. - """ - matches = {} - for item, name in names.items(): - m = re.match(pattern, name, re.IGNORECASE) - if m and m.groupdict(): - # Only yield a match when the regex applies *and* has - # capture groups. Otherwise, no information can be extracted - # from the filename. - matches[item] = m.groupdict() - else: - return None - return matches - - class FromFilenamePlugin(BeetsPlugin): def __init__(self) -> None: super().__init__() self.config.add( { "fields": [ - "disc", - "track", - "title", "artist", + "album", "albumartist", - "media", "catalognum", - ] + "disc", + "media", + "title", + "track", + "year", + ], + "patterns": {"folder": [], "file": []}, + # TODO: Add ignore parent folder } ) self.register_listener("import_task_start", self.filename_task) - @cached_property - def current_year(self) -> int: - return datetime.now().year - @cached_property def fields(self) -> set[str]: return set(self.config["fields"].as_str_seq()) + @cached_property + def file_patterns(self) -> list[re.Pattern[str]]: + return self._to_regex(self.config["patterns"]["file"].as_str_seq()) + + @cached_property + def folder_patterns(self) -> list[re.Pattern[str]]: + return self._to_regex(self.config["patterns"]["folder"].as_str_seq()) + + def _to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]: + """Compile user patterns into a list of usable regex + patterns. Catches errors are continues without bad regex patterns. + """ + compiled: list[re.Pattern[str]] = [] + for p in patterns: + try: + # check that the pattern has actual content + if len(p) < 1: + raise Exception("pattern is empty") + if not RE_NAMED_SUBGROUP.search(p): + raise Exception("no named subgroups") + regexp = re.compile(p, re.IGNORECASE | re.VERBOSE) + compiled.append(regexp) + except Exception as e: + self._log.info(f"Invalid user pattern {self._escape(p)!r}: {e}") + return compiled + + @staticmethod + def _escape(text: str) -> str: + # escape brackets for fstring logs + # TODO: Create an issue for brackets in logger + return re.sub("}", "}}", re.sub("{", "{{", text)) + def filename_task(self, task: ImportTask, session: ImportSession) -> None: """Examine each item in the task to see if we can extract a title from the filename. Try to match all filenames to a number of @@ -196,7 +198,11 @@ def filename_task(self, task: ImportTask, session: ImportSession) -> None: # Create the list of items to process # TODO: If it's a singleton import task, use the .item field - items: list[Item] = task.items + items: list[Item] = [] + if isinstance(task, SingletonImportTask): + item = task.item + else: + items = task.items # TODO: Switch this to gather data anyway, but only # update where missing @@ -213,22 +219,43 @@ def filename_task(self, task: ImportTask, session: ImportSession) -> None: names[item] = name if not parent_path: parent_path = path.parent.stem - self._log.debug(f"Parent Path: {parent_path}") + self._log.debug( + f"Parent Folder: {self._escape(parent_path)}" + ) - album_matches: AlbumMatches = self.parse_album_info(parent_path) + album_matches: AlbumMatches = self._parse_album_info(parent_path) self._log.debug(album_matches) # Look for useful information in the filenames. track_matches: dict[Item, TrackMatches] = {} for item, name in names.items(): - m = self.parse_track_info(name) + m = self._parse_track_info(name) track_matches[item] = m + # Make sure we got the fields right + self._sanity_check_matches(album_matches, track_matches) self._apply_matches(album_matches, track_matches) - def parse_track_info(self, text: str) -> TrackMatches: + @staticmethod + def _parse_track_info(text: str) -> TrackMatches: + matches: TrackMatches = { + "disc": None, + "track": None, + "by": None, + "artist": None, + "title": None, + } m = RE_TRACK_INFO.match(text) - matches: TrackMatches = m.groupdict() - # if the phrase "by" is matched, swap - # artist and title + if m: + if disc := m.group("disc"): + matches["disc"] = str(disc) + if track := m.group("track"): + matches["track"] = str(track).strip() + if by := m.group("by"): + matches["by"] = str(by) + if artist := m.group("artist"): + matches["artist"] = str(artist).strip() + if title := m.group("title"): + matches["title"] = str(title).strip() + # if the phrase "by" is matched, swap artist and title if matches["by"]: artist = matches["title"] matches["title"] = matches["artist"] @@ -244,7 +271,7 @@ def parse_track_info(self, text: str) -> TrackMatches: return matches - def parse_album_info(self, text: str) -> AlbumMatches: + def _parse_album_info(self, text: str) -> AlbumMatches: matches: AlbumMatches = { "albumartist": None, "album": None, @@ -285,8 +312,27 @@ def parse_album_info(self, text: str) -> AlbumMatches: return matches + def _apply_matches( + self, album_match: AlbumMatches, track_matches: dict[Item, TrackMatches] + ) -> None: + """Apply all valid matched fields to all items in the match dictionary.""" + match = album_match + for item in track_matches: + match.update(track_matches[item]) + found_data: dict[str, int | str] = {} + self._log.debug(f"Attempting keys: {match.keys()}") + for key in match.keys(): + if key in self.fields: + old_value = item.get(key) + new_value = match[key] # type: ignore + if self._bad_field(old_value) and new_value: + found_data[key] = new_value + self._log.info(f"Item updated with: {found_data.items()}") + item.update(found_data) + + @staticmethod def _parse_album_and_albumartist( - self, text + text: str, ) -> tuple[str | None, str | None]: """Takes the remaining string and splits it along common dividers. Assumes the first field to be the albumartist and the last field to be the @@ -317,11 +363,13 @@ def _parse_album_and_albumartist( possible_album = remaining[0].strip() return possible_album, possible_albumartist - def _parse_year(self, text: str) -> tuple[str | None, tuple[int, int]]: + @staticmethod + def _parse_year(text: str) -> tuple[str | None, tuple[int, int]]: """The year will be a four digit number. The search goes through a list of ordered patterns to try and find the year. To be a valid year, it must be less than the current year. """ + current_year = datetime.now().year year = None span = (0, 0) for exp in YEAR_REGEX: @@ -330,13 +378,14 @@ def _parse_year(self, text: str) -> tuple[str | None, tuple[int, int]]: continue year_candidate = match.group("year") # If the year is matched and not in the future - if year_candidate and int(year_candidate) <= self.current_year: + if year_candidate and int(year_candidate) <= current_year: year = year_candidate span = match.span() break return year, span - def _parse_media(self, text: str) -> tuple[str | None, tuple[int, int]]: + @staticmethod + def _parse_media(text: str) -> tuple[str | None, tuple[int, int]]: """Look for the media type, we are only interested in a few common types - CD, Vinyl, Cassette or WEB. To avoid overreach, in the case of titles containing a medium, only searches for media types @@ -348,7 +397,7 @@ def _parse_media(self, text: str) -> tuple[str | None, tuple[int, int]]: "web": "Digital Media", "cassette": "Cassette", } - match = RE_MEDIA.search(text) + match = RE_MEDIA_TYPE.search(text) if match: media = None for key, value in match.groupdict().items(): @@ -357,16 +406,16 @@ def _parse_media(self, text: str) -> tuple[str | None, tuple[int, int]]: return media, match.span() return None, (0, 0) - def _parse_catalognum( - self, text: str - ) -> tuple[str | None, tuple[int, int]]: + @staticmethod + def _parse_catalognum(text: str) -> tuple[str | None, tuple[int, int]]: match = RE_CATALOGNUM.search(text) # assert that it cannot be mistaken for a media type - if match and not RE_MEDIA.match(match[0]): + if match and not RE_MEDIA_TYPE.match(match[0]): return match.group("catalognum"), match.span() return None, (0, 0) - def _mutate_string(self, text, span: tuple[int, int]) -> str: + @staticmethod + def _mutate_string(text: str, span: tuple[int, int]) -> str: """Replace a matched field with a seperator""" start, end = span text = text[:start] + "-" + text[end:] @@ -380,60 +429,52 @@ def _sanity_check_matches( if the arist and album artist fields are properly identified. """ + + def swap_artist_title(tracks: list[TrackMatches]): + for track in tracks: + artist = track["title"] + track["title"] = track["artist"] + track["artist"] = artist + # swap the track titles and track artists + self._log.info("Swapped title and artist fields.") + + # None of this logic applies if there's only one track + if len(track_matches) < 2: + return + # If the album artist is not various artists - # check that all artists, if any, match + # check that all artists match # if they do not, try seeing if all the titles match # if all the titles match, swap title and artist fields - - # If the suspected title and albumartist fields are not equal - # we have ruled out a self titled album - # Check if the suspected title appears in the track artists - # If so, we should swap the title and albumartist in albummatches - - # If any track title is the same as the album artist - # some_map = list(track_matches.values())[0] - # keys = some_map.keys() - - # Given both an "artist" and "title" field, assume that one is - # *actually* the artist, which must be uniform, and use the other - # for the title. This, of course, won't work for VA albums. - # Only check for "artist": patterns containing it, also contain "title" - # if "artist" in keys: - # if equal_fields(track_matches, "artist"): - # artist = some_map["artist"] - # title_field = "title" - # elif equal_fields(track_matches, "title"): - # artist = some_map["title"] - # title_field = "artist" - # else: - # # Both vary. Abort. - # return - # - # for item in track_matches: - # if not item.artist and artist: - # item.artist = artist - # self._log.info(f"Artist replaced with: {item.artist}") - # # otherwise, if the pattern contains "title", use that for title_field - + # If we know that it's a VA album, then we can't assert much from the artists + tracks: list[TrackMatches] = list(track_matches.values()) + album_artist = album_match["albumartist"] + one_artist = self._equal_fields(tracks, "artist") + one_title = self._equal_fields(tracks, "title") + + if not album_artist or album_artist != config["va_name"].as_str(): + if one_artist and not one_title: + # All the artist fields match, and the title fields don't + # It's probably the artist + return + elif one_title and not one_artist and not album_artist: + # If the track titles match, and there's no album + # artist to check on + swap_artist_title(tracks) + elif album_artist: + # The artist fields don't match, and the title fields don't match + # If the albumartist field matches any track, then we know + # that the track field is likely the artist field. + # Sometimes an album has a presenter credited + track_titles = [str(t["title"]).upper() for t in tracks] + if album_artist and album_artist.upper() in track_titles: + swap_artist_title(tracks) return - def _apply_matches( - self, album_match: AlbumMatches, track_matches: dict[Item, TrackMatches] - ) -> None: - """Apply all valid matched fields to all items in the match dictionary.""" - match = album_match - for item in track_matches: - match.update(track_matches[item]) - found_data: dict[str, int | str] = {} - self._log.debug(f"Attempting keys: {match.keys()}") - for key in match.keys(): - if key in self.fields: - old_value = item.get(key) - new_value = match[key] - if self._bad_field(old_value) and new_value: - found_data[key] = new_value - self._log.info(f"Item updated with: {found_data.values()}") - item.update(found_data) + @staticmethod + def _equal_fields(dictionaries: list[TrackMatches], field: str) -> bool: + """Checks if all values of a field on a dictionary match.""" + return len(set(d[field] for d in dictionaries)) <= 1 # type: ignore @staticmethod def _bad_field(field: str | int) -> bool: diff --git a/docs/plugins/fromfilename.rst b/docs/plugins/fromfilename.rst index 1e8d0cc7dd..7431c45a2b 100644 --- a/docs/plugins/fromfilename.rst +++ b/docs/plugins/fromfilename.rst @@ -15,7 +15,8 @@ Configuration ------------- Configuration for ``fromfilename`` allows you to choose what fields the plugin -attempts to contribute to files missing information. +attempts to contribute to files missing information, as well as specify extra +patterns to match. Default ~~~~~~~ @@ -32,9 +33,22 @@ Default - media - title - track + - year + patterns: + file: [] + folder: [] -Recognized Patterns -------------------- +.. conf:: fields + :default: [ artist, album, albumartist, catalognum, disc, media, title, track, year ] + + The fields the plugin will guess with its default pattern matching. If a field is specified in a user pattern, that field does not need to be present on this list to be applied. If you only want the plugin contribute the track title and artist, you would put ``[title, artist]``. + +.. conf:: patterns + + Extra regular expression patterns specified by the user. See the section on patterns for more information. + +Patterns +-------- Examples of paths that the plugin can parse successfully, and the fields retrieved. @@ -46,6 +60,7 @@ retrieved. albumartist: Artist title: "03" track: 3 + year: 2025 "/[CAT123] Album - Various [WEB-FLAC]/2-10 - Artist - Song One.flac" artist: Artist @@ -57,7 +72,10 @@ retrieved. title: Song One track: 10 - "/1-23.flac" + "/Album Artist - Album Title (1997) {CATALOGNUM123}/1-23.flac" + albumartist: Album Artist + album: Album Title + year: 1997 disc: 1 track: 23 @@ -74,3 +92,41 @@ retrieved. artist: Artist title: Song track: 8 + +User Patterns +~~~~~~~~~~~~~ + +Users can specify patterns to improve the efficacy of the plugin. Patterns can +be specified as ``file`` or ``folder`` patterns. ``file`` patterns are checked +against the filename. ``folder`` patterns are checked against the parent folder +of the file. + +To contribute information, the patterns must use named capture groups +``(?P<name>...)``. The name of the capture group represents the beets field the +captured text will be applied to. User patterns are compiled with the verbose +and ignore case flags. Spaces in a match should be noted with `\s`. + +If ``fromfilename`` can't match the entire string to the given pattern, it will +fall back to the default pattern. + +The following custom patterns will match this path and retrieve the specified +fields. + +``/music/James Lawson - 841689/Coming Up - James Lawson & Andy Farley.mp3`` + +.. code-block:: yaml + + patterns: + folder: + # multiline blocks are allowed for readability + - | + (?P<albumartist>\w+) + \s-\s + (?P<discogs_albumid>\d+)' + file: + - '(?P<artist>\w+)\s-\s(?P<track>\d+)' + +For more information on writing regular expressions, check out the `python +documentation`_. + +.. _python documentation: https://docs.python.org/3/library/re.html diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index f887f13bb1..b1472e3db5 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -18,8 +18,8 @@ import pytest from beets.library import Item -from beets.test.helper import ConfigMixin -from beetsplug import fromfilename +from beets.test.helper import PluginMixin +from beetsplug.fromfilename import FromFilenamePlugin class Session: @@ -109,8 +109,8 @@ class Task: ], ) def test_parse_track_info(text, matchgroup): - f = fromfilename.FromFilenamePlugin() - m = f.parse_track_info(text) + f = FromFilenamePlugin() + m = f._parse_track_info(text) assert matchgroup == m @@ -252,12 +252,53 @@ def test_parse_track_info(text, matchgroup): ], ) def test_parse_album_info(text, matchgroup): - f = fromfilename.FromFilenamePlugin() - m = f.parse_album_info(text) + f = FromFilenamePlugin() + m = f._parse_album_info(text) assert matchgroup == m -class TestFromFilename(ConfigMixin): +@pytest.mark.parametrize( + "patterns,expected", + [ + ( + [ + r""" + (?P<disc>\d+(?=[\.\-_]\d))? + # a disc must be followed by punctuation and a digit + [\.\-]{,1} + # disc punctuation + (?P<track>\d+)? + # match the track number + [\.\-_\s]* + # artist separators + (?P<artist>.+?(?=[\s*_]?[\.\-by].+))? + # artist match depends on title existing + [\.\-_\s]* + (?P<by>by)? + # if 'by' is found, artist and title will need to be swapped + [\.\-_\s]* + # title separators + (?P<title>.+)? + # match the track title + """, + r"", + r"(?:<invalid)", + r"(.*)", + r"(?P<disc>asda}]", + ], + 1, + ) + ], +) +def test_to_regex(patterns, expected): + f = FromFilenamePlugin() + p = f._to_regex(patterns) + assert len(p) == expected + + +class TestFromFilename(PluginMixin): + plugin = "fromfilename" + @pytest.mark.parametrize( "expected_item", [ @@ -368,7 +409,7 @@ def test_fromfilename(self, expected_item): After parsing, compare to the original with the expected attributes defined. """ task = Task([mock_item(path=expected_item.path)]) - f = fromfilename.FromFilenamePlugin() + f = FromFilenamePlugin() f.filename_task(task, Session()) res = task.items[0] exp = expected_item @@ -379,3 +420,236 @@ def test_fromfilename(self, expected_item): assert res.catalognum == exp.catalognum assert res.year == exp.year assert res.title == exp.title + + @pytest.mark.parametrize( + "expected_items", + [ + [ + mock_item( + path="/Artist - Album/01 - Track1 - Performer.flac", + track=1, + title="Track1", + album="Album", + albumartist="Artist", + artist="Performer", + ), + mock_item( + path="/Artist - Album/02 - Track2 - Artist.flac", + track=2, + title="Track2", + album="Album", + albumartist="Artist", + artist="Artist", + ), + ], + [ + mock_item( + path=( + "/DiY - 8 Definitions of Bounce/" + "01 - Essa - Definition of Bounce.flac" + ), + track=1, + title="Definition of Bounce", + albumartist="DiY", + album="8 Definitions of Bounce", + artist="Essa", + ), + mock_item( + path=( + "/DiY - 8 Definitions of Bounce/" + "02 - Digs - Definition of Bounce.flac" + ), + track=2, + title="Definition of Bounce", + album="8 Definitions of Bounce", + albumartist="DiY", + artist="Digs", + ), + ], + [ + mock_item( + path=("/Essa - Magneto Essa/1 - Essa - Magneto Essa.flac"), + track=1, + title="Magneto Essa", + album="Magneto Essa", + albumartist="Essa", + artist="Essa", + ), + mock_item( + path=("/Essa - Magneto Essa/2 - Essa - The Immortals.flac"), + track=2, + title="The Immortals", + album="Magneto Essa", + albumartist="Essa", + artist="Essa", + ), + ], + [ + mock_item( + path=("/Magneto Essa/1 - Magneto Essa - Essa.flac"), + track=1, + title="Magneto Essa", + album="Magneto Essa", + artist="Essa", + ), + mock_item( + path=("/Magneto Essa/2 - The Immortals - Essa.flac"), + track=2, + title="The Immortals", + album="Magneto Essa", + artist="Essa", + ), + ], + [ + # Even though it might be clear to human eyes, + # we can't guess since the various flag is thrown + mock_item( + path=( + "/Various - 303 Alliance 012/" + "1 - The End of Satellite - Benji303.flac" + ), + track=1, + title="Benji303", + album="303 Alliance 012", + artist="The End of Satellite", + albumartist="Various Artists", + ), + mock_item( + path=( + "/Various - 303 Alliance 012/" + "2 - Ruff Beats - Benji303.flac" + ), + track=2, + title="Benji303", + album="303 Alliance 012", + artist="Ruff Beats", + albumartist="Various Artists", + ), + ], + [ + # Even though it might be clear to human eyes, + # we can't guess since the various flag is thrown + mock_item( + path=( + "/303 Alliance 012/" + "1 - The End of Satellite - Benji303.flac" + ), + track=1, + title="Benji303", + album="303 Alliance 012", + artist="The End of Satellite", + ), + mock_item( + path=( + "/303 Alliance 012/" + "2 - Ruff Beats - Benji303 & Sam J.flac" + ), + track=2, + title="Benji303 & Sam J", + album="303 Alliance 012", + artist="Ruff Beats", + ), + ], + ], + ) + def test_sanity_check(self, expected_items): + """ + Take a list of expected items, create a task with just the paths. + + Goal is to ensure that sanity check + correctly adjusts the parsed artists and albums + + After parsing, compare to the expected items. + """ + task = Task([mock_item(path=item.path) for item in expected_items]) + f = FromFilenamePlugin() + f.filename_task(task, Session()) + res = task.items + exp = expected_items + assert res[0].path == exp[0].path + assert res[0].artist == exp[0].artist + assert res[0].albumartist == exp[0].albumartist + assert res[0].disc == exp[0].disc + assert res[0].catalognum == exp[0].catalognum + assert res[0].year == exp[0].year + assert res[0].title == exp[0].title + assert res[1].path == exp[1].path + assert res[1].artist == exp[1].artist + assert res[1].albumartist == exp[1].albumartist + assert res[1].disc == exp[1].disc + assert res[1].catalognum == exp[1].catalognum + assert res[1].year == exp[1].year + assert res[1].title == exp[1].title + + # TODO: Test with singleton import tasks + + # TODO: Test with items that already have data, or other types of bad data. + + # TODO: Test with items that have perfectly fine data for the most part + + @pytest.mark.parametrize( + "fields,expected", + [ + ( + [ + "albumartist", + "album", + "year", + "media", + "catalognum", + "artist", + "track", + "disc", + "title", + ], + mock_item( + albumartist="Album Artist", + album="Album", + year="2025", + media="CD", + catalognum="CATALOGNUM", + disc=1, + track=2, + artist="Artist", + title="Track", + ), + ), + ( + ["album", "year", "media", "track", "disc", "title"], + mock_item( + album="Album", + year="2025", + media="CD", + disc=1, + title="Track", + ), + ), + ], + ) + def test_fields(self, fields, expected): + """ + With a set item and changing list of fields + + After parsing, compare to the original with the expected attributes defined. + """ + path = ( + "/Album Artist - Album (2025) [FLAC CD] {CATALOGNUM}/" + "1-2 Artist - Track.wav" + ) + task = Task([mock_item(path=path)]) + expected.path = path + with self.configure_plugin({"fields": fields}): + f = FromFilenamePlugin() + f.config + f.filename_task(task, Session()) + res = task.items[0] + assert res.path == expected.path + assert res.artist == expected.artist + assert res.albumartist == expected.albumartist + assert res.disc == expected.disc + assert res.catalognum == expected.catalognum + assert res.year == expected.year + assert res.title == expected.title + + def test_user_regex(self): + return From 9ca9e86cf271b2f9a300b6023747619333f7e38a Mon Sep 17 00:00:00 2001 From: Henry <henryoberholtzer@gmail.com> Date: Tue, 6 Jan 2026 07:37:29 -0800 Subject: [PATCH 05/12] Rework fromfilename_task, rename dictionaries, create mock task --- beetsplug/fromfilename.py | 143 +++++++++++++++--------------- test/plugins/test_fromfilename.py | 26 ++++-- 2 files changed, 87 insertions(+), 82 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index 8c905d486d..2141b4ed5b 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -113,7 +113,7 @@ RE_BRACKETS = re.compile(r"[\(\[\{].*?[\)\]\}]") -class TrackMatches(TypedDict): +class TrackMatch(TypedDict): disc: str | None track: str | None by: NotRequired[str | None] @@ -121,7 +121,7 @@ class TrackMatches(TypedDict): title: str | None -class AlbumMatches(TypedDict): +class AlbumMatch(TypedDict): albumartist: str | None album: str | None year: str | None @@ -188,91 +188,88 @@ def _escape(text: str) -> str: return re.sub("}", "}}", re.sub("{", "{{", text)) def filename_task(self, task: ImportTask, session: ImportSession) -> None: - """Examine each item in the task to see if we can extract a title - from the filename. Try to match all filenames to a number of - regexps, starting with the most complex patterns and successively - trying less complex patterns. As soon as all filenames match the - same regex we can make an educated guess of which part of the - regex that contains the title. + """ Examines all files in the given import task for any missing + information it can gather from the file and folder names. + + Once the information has been obtained and checked, it + is applied to the items to improve later metadata lookup. """ # Create the list of items to process - # TODO: If it's a singleton import task, use the .item field - items: list[Item] = [] - if isinstance(task, SingletonImportTask): - item = task.item - else: - items = task.items - - # TODO: Switch this to gather data anyway, but only - # update where missing - # Look for suspicious (empty or meaningless) titles. - missing_titles = sum(self._bad_field(i.title) for i in items) - - if missing_titles: - # Get the base filenames (no path or extension). - parent_path: str = "" - names: dict[Item, str] = {} - for item in items: - path: Path = Path(displayable_path(item.path)) - name = path.stem - names[item] = name - if not parent_path: - parent_path = path.parent.stem - self._log.debug( - f"Parent Folder: {self._escape(parent_path)}" - ) - - album_matches: AlbumMatches = self._parse_album_info(parent_path) - self._log.debug(album_matches) - # Look for useful information in the filenames. - track_matches: dict[Item, TrackMatches] = {} - for item, name in names.items(): - m = self._parse_track_info(name) - track_matches[item] = m - # Make sure we got the fields right - self._sanity_check_matches(album_matches, track_matches) - self._apply_matches(album_matches, track_matches) + items: list[Item] = task.items + + # TODO: Check each of the fields to see if any are missing + # information on the file. + parent_folder, item_filenames = self._get_path_strings(items) + + album_matches = self._parse_album_info(parent_folder) + # Look for useful information in the filenames. + track_matches = self._build_track_matches(item_filenames) + # Make sure we got the fields right + self._sanity_check_matches(album_matches, track_matches) + # Apply the information + self._apply_matches(album_matches, track_matches) @staticmethod - def _parse_track_info(text: str) -> TrackMatches: - matches: TrackMatches = { + def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]: + parent_folder: str = "" + filenames: dict[Item, str] = {} + for item in items: + path: Path = Path(displayable_path(item.path)) + filename = path.stem + filenames[item] = filename + if not parent_folder: + parent_folder = path.parent.stem + return parent_folder, filenames + + def _build_track_matches(self, + item_filenames: dict[Item, str]) -> dict[Item, TrackMatch]: + track_matches: dict[Item, TrackMatch] = {} + for item, filename in item_filenames.items(): + m = self._parse_track_info(filename) + track_matches[item] = m + return track_matches + + + @staticmethod + def _parse_track_info(text: str) -> TrackMatch: + trackmatch: TrackMatch = { "disc": None, "track": None, "by": None, "artist": None, "title": None, } - m = RE_TRACK_INFO.match(text) - if m: - if disc := m.group("disc"): - matches["disc"] = str(disc) - if track := m.group("track"): - matches["track"] = str(track).strip() - if by := m.group("by"): - matches["by"] = str(by) - if artist := m.group("artist"): - matches["artist"] = str(artist).strip() - if title := m.group("title"): - matches["title"] = str(title).strip() + match = RE_TRACK_INFO.match(text) + assert match is not None + if disc := match.group("disc"): + trackmatch["disc"] = str(disc) + if track := match.group("track"): + trackmatch["track"] = str(track).strip() + if by := match.group("by"): + trackmatch["by"] = str(by) + if artist := match.group("artist"): + trackmatch["artist"] = str(artist).strip() + if title := match.group("title"): + trackmatch["title"] = str(title).strip() # if the phrase "by" is matched, swap artist and title - if matches["by"]: - artist = matches["title"] - matches["title"] = matches["artist"] - matches["artist"] = artist + if trackmatch["by"]: + artist = trackmatch["title"] + trackmatch["title"] = trackmatch["artist"] + trackmatch["artist"] = artist # remove that key - del matches["by"] + del trackmatch["by"] # if all fields except `track` are none # set title to track number as well # we can't be sure if it's actually the track number # or track title - if set(matches.values()) == {None, matches["track"]}: - matches["title"] = matches["track"] + if set(trackmatch.values()) == {None, trackmatch["track"]}: + trackmatch["title"] = trackmatch["track"] - return matches + return trackmatch - def _parse_album_info(self, text: str) -> AlbumMatches: - matches: AlbumMatches = { + def _parse_album_info(self, text: str) -> AlbumMatch: + matches: AlbumMatch = { "albumartist": None, "album": None, "year": None, @@ -313,7 +310,7 @@ def _parse_album_info(self, text: str) -> AlbumMatches: return matches def _apply_matches( - self, album_match: AlbumMatches, track_matches: dict[Item, TrackMatches] + self, album_match: AlbumMatch, track_matches: dict[Item, TrackMatch] ) -> None: """Apply all valid matched fields to all items in the match dictionary.""" match = album_match @@ -422,7 +419,7 @@ def _mutate_string(text: str, span: tuple[int, int]) -> str: return text def _sanity_check_matches( - self, album_match: AlbumMatches, track_matches: dict[Item, TrackMatches] + self, album_match: AlbumMatch, track_matches: dict[Item, TrackMatch] ) -> None: """Check to make sure data is coherent between track and album matches. Largely looking to see @@ -430,7 +427,7 @@ def _sanity_check_matches( identified. """ - def swap_artist_title(tracks: list[TrackMatches]): + def swap_artist_title(tracks: list[TrackMatch]): for track in tracks: artist = track["title"] track["title"] = track["artist"] @@ -447,7 +444,7 @@ def swap_artist_title(tracks: list[TrackMatches]): # if they do not, try seeing if all the titles match # if all the titles match, swap title and artist fields # If we know that it's a VA album, then we can't assert much from the artists - tracks: list[TrackMatches] = list(track_matches.values()) + tracks: list[TrackMatch] = list(track_matches.values()) album_artist = album_match["albumartist"] one_artist = self._equal_fields(tracks, "artist") one_title = self._equal_fields(tracks, "title") @@ -472,7 +469,7 @@ def swap_artist_title(tracks: list[TrackMatches]): return @staticmethod - def _equal_fields(dictionaries: list[TrackMatches], field: str) -> bool: + def _equal_fields(dictionaries: list[TrackMatch], field: str) -> bool: """Checks if all values of a field on a dictionary match.""" return len(set(d[field] for d in dictionaries)) <= 1 # type: ignore diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index b1472e3db5..d964c714ee 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -19,6 +19,7 @@ from beets.library import Item from beets.test.helper import PluginMixin +from beets.importer.tasks import ImportTask, SingletonImportTask from beetsplug.fromfilename import FromFilenamePlugin @@ -41,10 +42,8 @@ def mock_item(**kwargs): return Item(**{**defaults, **kwargs}) -@dataclass -class Task: - items: list[Item] - is_album: bool = True +def mock_task(items): + return ImportTask(toppath=None, paths=None, items=items) @pytest.mark.parametrize( @@ -408,7 +407,7 @@ def test_fromfilename(self, expected_item): After parsing, compare to the original with the expected attributes defined. """ - task = Task([mock_item(path=expected_item.path)]) + task = mock_task(items=[mock_item(path=expected_item.path)]) f = FromFilenamePlugin() f.filename_task(task, Session()) res = task.items[0] @@ -561,7 +560,7 @@ def test_sanity_check(self, expected_items): After parsing, compare to the expected items. """ - task = Task([mock_item(path=item.path) for item in expected_items]) + task = mock_task([mock_item(path=item.path) for item in expected_items]) f = FromFilenamePlugin() f.filename_task(task, Session()) res = task.items @@ -581,7 +580,15 @@ def test_sanity_check(self, expected_items): assert res[1].year == exp[1].year assert res[1].title == exp[1].title - # TODO: Test with singleton import tasks + def test_singleton_import(self): + task = SingletonImportTask( + toppath=None, + item=mock_item(path="/01 Track.wav") + ) + f = FromFilenamePlugin() + f.filename_task(task, Session()) + assert task.item.track == 1 + assert task.item.title == "Track" # TODO: Test with items that already have data, or other types of bad data. @@ -636,7 +643,7 @@ def test_fields(self, fields, expected): "/Album Artist - Album (2025) [FLAC CD] {CATALOGNUM}/" "1-2 Artist - Track.wav" ) - task = Task([mock_item(path=path)]) + task = mock_task([mock_item(path=path)]) expected.path = path with self.configure_plugin({"fields": fields}): f = FromFilenamePlugin() @@ -651,5 +658,6 @@ def test_fields(self, fields, expected): assert res.year == expected.year assert res.title == expected.title - def test_user_regex(self): + @pytest.mark.parametrize("patterns,expected_item", []) + def test_user_regex(self, patterns, expected_item): return From e147a218948d1ebeaaf46acda9dc62d67f908650 Mon Sep 17 00:00:00 2001 From: Henry Oberholtzer <henryoberholtzer@gmail.com> Date: Tue, 6 Jan 2026 17:00:55 -0800 Subject: [PATCH 06/12] User patterns implemented --- beetsplug/fromfilename.py | 94 ++++++++++++++++------------- docs/plugins/fromfilename.rst | 99 ++++++------------------------- test/plugins/test_fromfilename.py | 98 +++++++++++++++++------------- 3 files changed, 126 insertions(+), 165 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index 2141b4ed5b..34ea1155aa 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -25,7 +25,7 @@ from typing_extensions import NotRequired from beets import config -from beets.importer import ImportSession, ImportTask, SingletonImportTask +from beets.importer import ImportSession, ImportTask from beets.library import Item from beets.plugins import BeetsPlugin from beets.util import displayable_path @@ -149,43 +149,19 @@ def __init__(self) -> None: # TODO: Add ignore parent folder } ) + self.fields = set(self.config["fields"].as_str_seq()) self.register_listener("import_task_start", self.filename_task) - @cached_property - def fields(self) -> set[str]: - return set(self.config["fields"].as_str_seq()) - @cached_property def file_patterns(self) -> list[re.Pattern[str]]: - return self._to_regex(self.config["patterns"]["file"].as_str_seq()) + return self._user_pattern_to_regex( + self.config["patterns"]["file"].as_str_seq()) @cached_property def folder_patterns(self) -> list[re.Pattern[str]]: - return self._to_regex(self.config["patterns"]["folder"].as_str_seq()) - - def _to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]: - """Compile user patterns into a list of usable regex - patterns. Catches errors are continues without bad regex patterns. - """ - compiled: list[re.Pattern[str]] = [] - for p in patterns: - try: - # check that the pattern has actual content - if len(p) < 1: - raise Exception("pattern is empty") - if not RE_NAMED_SUBGROUP.search(p): - raise Exception("no named subgroups") - regexp = re.compile(p, re.IGNORECASE | re.VERBOSE) - compiled.append(regexp) - except Exception as e: - self._log.info(f"Invalid user pattern {self._escape(p)!r}: {e}") - return compiled - - @staticmethod - def _escape(text: str) -> str: - # escape brackets for fstring logs - # TODO: Create an issue for brackets in logger - return re.sub("}", "}}", re.sub("{", "{{", text)) + return self._user_pattern_to_regex( + self.config["patterns"]["folder"].as_str_seq() + ) def filename_task(self, task: ImportTask, session: ImportSession) -> None: """ Examines all files in the given import task for any missing @@ -210,6 +186,21 @@ def filename_task(self, task: ImportTask, session: ImportSession) -> None: # Apply the information self._apply_matches(album_matches, track_matches) + def _user_pattern_to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]: + """Compile user patterns into a list of usable regex + patterns. Catches errors are continues without bad regex patterns. + """ + return [ + re.compile(regexp) for p in patterns if ( + regexp := self._parse_user_pattern_strings(p)) + ] + + @staticmethod + def _escape(text: str) -> str: + # escape brackets for fstring logs + # TODO: Create an issue for brackets in logger + return re.sub("}", "}}", re.sub("{", "{{", text)) + @staticmethod def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]: parent_folder: str = "" @@ -222,15 +213,24 @@ def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]: parent_folder = path.parent.stem return parent_folder, filenames + def _check_user_matches(self, text: str, + patterns: list[re.Pattern[str]]) -> dict[str, str]: + for p in patterns: + if (usermatch := p.fullmatch(text)): + return usermatch.groupdict() + return None + def _build_track_matches(self, - item_filenames: dict[Item, str]) -> dict[Item, TrackMatch]: - track_matches: dict[Item, TrackMatch] = {} + item_filenames: dict[Item, str]) -> dict[Item, dict[str, str]]: + track_matches: dict[Item, dict[str, str]] = {} for item, filename in item_filenames.items(): - m = self._parse_track_info(filename) - track_matches[item] = m + if (m := self._check_user_matches(filename, self.file_patterns)): + track_matches[item] = m + else: + match = self._parse_track_info(filename) + track_matches[item] = match return track_matches - @staticmethod def _parse_track_info(text: str) -> TrackMatch: trackmatch: TrackMatch = { @@ -268,7 +268,10 @@ def _parse_track_info(text: str) -> TrackMatch: return trackmatch - def _parse_album_info(self, text: str) -> AlbumMatch: + def _parse_album_info(self, text: str) -> dict[str, str]: + # Check if a user pattern matches + if (m := self._check_user_matches(text, self.folder_patterns)): + return m matches: AlbumMatch = { "albumartist": None, "album": None, @@ -411,6 +414,18 @@ def _parse_catalognum(text: str) -> tuple[str | None, tuple[int, int]]: return match.group("catalognum"), match.span() return None, (0, 0) + def _parse_user_pattern_strings(self, text: str) -> str | None: + # escape any special characters + fields: list[str] = [s.lower() for s in re.findall(r"\$([a-zA-Z\_]+)", text)] + if not fields: + # if there are no usable fields + return None + pattern = re.escape(text) + for f in fields: + pattern = re.sub(rf"\\\${f}", f"(?P<{f}>.+)", pattern) + self.fields.add(f) + return rf"{pattern}" + @staticmethod def _mutate_string(text: str, span: tuple[int, int]) -> str: """Replace a matched field with a seperator""" @@ -439,11 +454,6 @@ def swap_artist_title(tracks: list[TrackMatch]): if len(track_matches) < 2: return - # If the album artist is not various artists - # check that all artists match - # if they do not, try seeing if all the titles match - # if all the titles match, swap title and artist fields - # If we know that it's a VA album, then we can't assert much from the artists tracks: list[TrackMatch] = list(track_matches.values()) album_artist = album_match["albumartist"] one_artist = self._equal_fields(tracks, "artist") diff --git a/docs/plugins/fromfilename.rst b/docs/plugins/fromfilename.rst index 7431c45a2b..5308d5c5b3 100644 --- a/docs/plugins/fromfilename.rst +++ b/docs/plugins/fromfilename.rst @@ -5,8 +5,9 @@ The ``fromfilename`` plugin helps to tag albums that are missing tags altogether but where the filenames contain useful information like the artist and title. When you attempt to import a track that's missing a title, this plugin will look -at the track's filename and guess its disc, track number, title, and artist. -These will be used to search for metadata and match track ordering. +at the track's filename and parent folder, and guess a number of fields. + +The extracted information will be used to search for metadata and match track ordering. To use the ``fromfilename`` plugin, enable it in your configuration (see :ref:`using-plugins`). @@ -45,88 +46,24 @@ Default .. conf:: patterns - Extra regular expression patterns specified by the user. See the section on patterns for more information. - -Patterns --------- - -Examples of paths that the plugin can parse successfully, and the fields -retrieved. + Users can specify patterns to improve the efficacy of the plugin. Patterns can + be specified as ``file`` or ``folder`` patterns. ``file`` patterns are checked + against the filename. ``folder`` patterns are checked against the parent folder + of the file. -.. code-block:: yaml + If ``fromfilename`` can't match the entire string to the given pattern, it will + falls back to the default pattern. - "/Artist - Album (2025)/03.wav" - album: Album - albumartist: Artist - title: "03" - track: 3 - year: 2025 - - "/[CAT123] Album - Various [WEB-FLAC]/2-10 - Artist - Song One.flac" - artist: Artist - album: Album - albumartist: Various Artists - catalognum: CAT123 - disc: 2 - media: Digital Media - title: Song One - track: 10 - - "/Album Artist - Album Title (1997) {CATALOGNUM123}/1-23.flac" - albumartist: Album Artist - album: Album Title - year: 1997 - disc: 1 - track: 23 - - "/04. Song.mp3" - title: Song - track: 4 - - "/5_-_My_Artist_-_My_Title.m4a" - artist: My_Artist - title: My_Title - track: 5 - - "/8 Song by Artist.wav" - artist: Artist - title: Song - track: 8 - -User Patterns -~~~~~~~~~~~~~ - -Users can specify patterns to improve the efficacy of the plugin. Patterns can -be specified as ``file`` or ``folder`` patterns. ``file`` patterns are checked -against the filename. ``folder`` patterns are checked against the parent folder -of the file. - -To contribute information, the patterns must use named capture groups -``(?P<name>...)``. The name of the capture group represents the beets field the -captured text will be applied to. User patterns are compiled with the verbose -and ignore case flags. Spaces in a match should be noted with `\s`. - -If ``fromfilename`` can't match the entire string to the given pattern, it will -fall back to the default pattern. - -The following custom patterns will match this path and retrieve the specified -fields. - -``/music/James Lawson - 841689/Coming Up - James Lawson & Andy Farley.mp3`` + The following custom patterns will match this path and retrieve the specified + fields. -.. code-block:: yaml + ``/music/James Lawson - 841689 (2004)/Coming Up - James Lawson & Andy Farley.mp3`` - patterns: - folder: - # multiline blocks are allowed for readability - - | - (?P<albumartist>\w+) - \s-\s - (?P<discogs_albumid>\d+)' - file: - - '(?P<artist>\w+)\s-\s(?P<track>\d+)' + .. code-block:: yaml -For more information on writing regular expressions, check out the `python -documentation`_. + patterns: + folder: + - "$albumartist - $discogs_albumid ($year)" + file: + - "$title - $artist" -.. _python documentation: https://docs.python.org/3/library/re.html diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index d964c714ee..98709c2011 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -13,8 +13,6 @@ """Tests for the fromfilename plugin.""" -from dataclasses import dataclass - import pytest from beets.library import Item @@ -255,44 +253,19 @@ def test_parse_album_info(text, matchgroup): m = f._parse_album_info(text) assert matchgroup == m - -@pytest.mark.parametrize( - "patterns,expected", - [ +@pytest.mark.parametrize("string,pattern",[ ( - [ - r""" - (?P<disc>\d+(?=[\.\-_]\d))? - # a disc must be followed by punctuation and a digit - [\.\-]{,1} - # disc punctuation - (?P<track>\d+)? - # match the track number - [\.\-_\s]* - # artist separators - (?P<artist>.+?(?=[\s*_]?[\.\-by].+))? - # artist match depends on title existing - [\.\-_\s]* - (?P<by>by)? - # if 'by' is found, artist and title will need to be swapped - [\.\-_\s]* - # title separators - (?P<title>.+)? - # match the track title - """, - r"", - r"(?:<invalid)", - r"(.*)", - r"(?P<disc>asda}]", - ], - 1, - ) - ], -) -def test_to_regex(patterns, expected): + "$albumartist - $album ($year) {$comments}", + r"(?P<albumartist>.+)\ \-\ (?P<album>.+)\ \((?P<year>.+)\)\ \ \{(?P<comments>.+)\}" + ), + ( + "$", + None + ), + ]) +def test_parse_user_pattern_strings(string,pattern): f = FromFilenamePlugin() - p = f._to_regex(patterns) - assert len(p) == expected + assert f._parse_user_pattern_strings(string) == pattern class TestFromFilename(PluginMixin): @@ -647,7 +620,6 @@ def test_fields(self, fields, expected): expected.path = path with self.configure_plugin({"fields": fields}): f = FromFilenamePlugin() - f.config f.filename_task(task, Session()) res = task.items[0] assert res.path == expected.path @@ -658,6 +630,48 @@ def test_fields(self, fields, expected): assert res.year == expected.year assert res.title == expected.title - @pytest.mark.parametrize("patterns,expected_item", []) - def test_user_regex(self, patterns, expected_item): - return + @pytest.mark.parametrize("patterns,expected", [ + ( + { + "folder": ["($comments) - {$albumartist} - {$album}"], + "file": ["$artist - $track - $title"] + }, + mock_item( + path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac", + comments="Comment", + albumartist="Album Artist", + album="Album", + artist="Artist", + track=2, + title="Title", + ) + ), + ( + { + "folder": ["[$comments] - {$albumartist} - {$album}"], + "file": ["$artist - $track - $title"] + }, + mock_item( + path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac", + artist="Artist", + track=2, + title="Title", + catalognum="Comment" + ) + ) + ]) + def test_user_patterns(self, patterns, expected): + task = mock_task([mock_item(path=expected.path)]) + with self.configure_plugin({ "patterns": patterns }): + f = FromFilenamePlugin() + f.filename_task(task, Session()) + res = task.items[0] + assert res.comments == expected.comments + assert res.path == expected.path + assert res.artist == expected.artist + assert res.albumartist == expected.albumartist + assert res.disc == expected.disc + assert res.catalognum == expected.catalognum + assert res.year == expected.year + assert res.title == expected.title + From 4672ee0487f020139fefa1e751a250dcde4ee110 Mon Sep 17 00:00:00 2001 From: Henry <henryoberholtzer@gmail.com> Date: Tue, 6 Jan 2026 22:01:37 -0800 Subject: [PATCH 07/12] Refactor to use mutable mapping. --- beetsplug/fromfilename.py | 98 ++++----- test/plugins/test_fromfilename.py | 343 ++++++++++++++++-------------- 2 files changed, 224 insertions(+), 217 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index 34ea1155aa..c4bfd538ac 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -17,12 +17,10 @@ """ import re +from collections.abc import MutableMapping from datetime import datetime from functools import cached_property from pathlib import Path -from typing import TypedDict - -from typing_extensions import NotRequired from beets import config from beets.importer import ImportSession, ImportTask @@ -113,20 +111,32 @@ RE_BRACKETS = re.compile(r"[\(\[\{].*?[\)\]\}]") -class TrackMatch(TypedDict): - disc: str | None - track: str | None - by: NotRequired[str | None] - artist: str | None - title: str | None +class FilenameMatch(MutableMapping[str, str | None]): + def __init__(self, matches: dict[str, str] = {}) -> None: + self._matches: dict[str, str] = {} + for key, value in matches.items(): + if value is not None: + self._matches[key.lower()] = str(value).strip() + + def __getitem__(self, key) -> str | None: + return self._matches.get(key, None) + + def __iter__(self): + return iter(self._matches) + + def __len__(self) -> int: + return len(self._matches) + def __setitem__(self, key: str, value: str | None) -> None: + if value: + self._matches[key] = value.strip() + + def __delitem__(self, key: str) -> None: + del self._matches[key] + + def values(self): + return self._matches.values() -class AlbumMatch(TypedDict): - albumartist: str | None - album: str | None - year: str | None - catalognum: str | None - media: str | None class FromFilenamePlugin(BeetsPlugin): @@ -198,7 +208,6 @@ def _user_pattern_to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]: @staticmethod def _escape(text: str) -> str: # escape brackets for fstring logs - # TODO: Create an issue for brackets in logger return re.sub("}", "}}", re.sub("{", "{{", text)) @staticmethod @@ -214,15 +223,15 @@ def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]: return parent_folder, filenames def _check_user_matches(self, text: str, - patterns: list[re.Pattern[str]]) -> dict[str, str]: + patterns: list[re.Pattern[str]]) -> FilenameMatch: for p in patterns: if (usermatch := p.fullmatch(text)): - return usermatch.groupdict() + return FilenameMatch(usermatch.groupdict()) return None def _build_track_matches(self, - item_filenames: dict[Item, str]) -> dict[Item, dict[str, str]]: - track_matches: dict[Item, dict[str, str]] = {} + item_filenames: dict[Item, str]) -> dict[Item, FilenameMatch]: + track_matches: dict[Item, FilenameMatch] = {} for item, filename in item_filenames.items(): if (m := self._check_user_matches(filename, self.file_patterns)): track_matches[item] = m @@ -232,53 +241,32 @@ def _build_track_matches(self, return track_matches @staticmethod - def _parse_track_info(text: str) -> TrackMatch: - trackmatch: TrackMatch = { - "disc": None, - "track": None, - "by": None, - "artist": None, - "title": None, - } + def _parse_track_info(text: str) -> FilenameMatch: match = RE_TRACK_INFO.match(text) assert match is not None - if disc := match.group("disc"): - trackmatch["disc"] = str(disc) - if track := match.group("track"): - trackmatch["track"] = str(track).strip() - if by := match.group("by"): - trackmatch["by"] = str(by) - if artist := match.group("artist"): - trackmatch["artist"] = str(artist).strip() - if title := match.group("title"): - trackmatch["title"] = str(title).strip() + trackmatch = FilenameMatch(match.groupdict()) # if the phrase "by" is matched, swap artist and title if trackmatch["by"]: artist = trackmatch["title"] trackmatch["title"] = trackmatch["artist"] trackmatch["artist"] = artist - # remove that key - del trackmatch["by"] + # remove that key + del trackmatch["by"] # if all fields except `track` are none # set title to track number as well # we can't be sure if it's actually the track number # or track title - if set(trackmatch.values()) == {None, trackmatch["track"]}: - trackmatch["title"] = trackmatch["track"] + track = match.group("track") + if set(trackmatch.values()) == {track}: + trackmatch["title"] = track return trackmatch - def _parse_album_info(self, text: str) -> dict[str, str]: + def _parse_album_info(self, text: str) -> FilenameMatch: # Check if a user pattern matches if (m := self._check_user_matches(text, self.folder_patterns)): return m - matches: AlbumMatch = { - "albumartist": None, - "album": None, - "year": None, - "catalognum": None, - "media": None, - } + matches = FilenameMatch() # Start with the extra fields to make parsing # the album artist and artist field easier year, span = self._parse_year(text) @@ -313,7 +301,7 @@ def _parse_album_info(self, text: str) -> dict[str, str]: return matches def _apply_matches( - self, album_match: AlbumMatch, track_matches: dict[Item, TrackMatch] + self, album_match: FilenameMatch, track_matches: dict[Item, FilenameMatch] ) -> None: """Apply all valid matched fields to all items in the match dictionary.""" match = album_match @@ -434,7 +422,7 @@ def _mutate_string(text: str, span: tuple[int, int]) -> str: return text def _sanity_check_matches( - self, album_match: AlbumMatch, track_matches: dict[Item, TrackMatch] + self, album_match: FilenameMatch, track_matches: dict[Item, FilenameMatch] ) -> None: """Check to make sure data is coherent between track and album matches. Largely looking to see @@ -442,7 +430,7 @@ def _sanity_check_matches( identified. """ - def swap_artist_title(tracks: list[TrackMatch]): + def swap_artist_title(tracks: list[FilenameMatch]): for track in tracks: artist = track["title"] track["title"] = track["artist"] @@ -454,7 +442,7 @@ def swap_artist_title(tracks: list[TrackMatch]): if len(track_matches) < 2: return - tracks: list[TrackMatch] = list(track_matches.values()) + tracks: list[FilenameMatch] = list(track_matches.values()) album_artist = album_match["albumartist"] one_artist = self._equal_fields(tracks, "artist") one_title = self._equal_fields(tracks, "title") @@ -479,7 +467,7 @@ def swap_artist_title(tracks: list[TrackMatch]): return @staticmethod - def _equal_fields(dictionaries: list[TrackMatch], field: str) -> bool: + def _equal_fields(dictionaries: list[FilenameMatch], field: str) -> bool: """Checks if all values of a field on a dictionary match.""" return len(set(d[field] for d in dictionaries)) <= 1 # type: ignore diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index 98709c2011..c00b567214 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -15,10 +15,10 @@ import pytest +from beets.importer.tasks import ImportTask, SingletonImportTask from beets.library import Item from beets.test.helper import PluginMixin -from beets.importer.tasks import ImportTask, SingletonImportTask -from beetsplug.fromfilename import FromFilenamePlugin +from beetsplug.fromfilename import FilenameMatch, FromFilenamePlugin class Session: @@ -47,68 +47,79 @@ def mock_task(items): @pytest.mark.parametrize( "text,matchgroup", [ - ("3", {"disc": None, "track": "3", "artist": None, "title": "3"}), - ("04", {"disc": None, "track": "04", "artist": None, "title": "04"}), - ("6.", {"disc": None, "track": "6", "artist": None, "title": "6"}), - ("3.5", {"disc": "3", "track": "5", "artist": None, "title": None}), - ("1-02", {"disc": "1", "track": "02", "artist": None, "title": None}), - ("100-4", {"disc": "100", "track": "4", "artist": None, "title": None}), + ("3", FilenameMatch({"track": "3", "title": "3"})), + ("04", FilenameMatch({"track": "04", "title": "04"})), + ("6.", FilenameMatch({"track": "6", "title": "6"})), + ("3.5", FilenameMatch({"disc": "3", "track": "5"})), + ("1-02", FilenameMatch({"disc": "1", "track": "02"})), + ("100-4", FilenameMatch({"disc": "100", "track": "4"})), ( "04.Title", - {"disc": None, "track": "04", "artist": None, "title": "Title"}, + FilenameMatch({"track": "04", "title": "Title"}), ), ( "5_-_Title", - {"disc": None, "track": "5", "artist": None, "title": "Title"}, + FilenameMatch({"track": "5", "title": "Title"}), ), ( "1-02 Title", - {"disc": "1", "track": "02", "artist": None, "title": "Title"}, + FilenameMatch({"disc": "1", "track": "02", "title": "Title"}), ), ( "3.5 - Title", - {"disc": "3", "track": "5", "artist": None, "title": "Title"}, + FilenameMatch({"disc": "3", "track": "5", "title": "Title"}), ), ( "5_-_Artist_-_Title", - {"disc": None, "track": "5", "artist": "Artist", "title": "Title"}, + FilenameMatch({"track": "5", "artist": "Artist", "title": "Title"}), ), ( "3-8- Artist-Title", - {"disc": "3", "track": "8", "artist": "Artist", "title": "Title"}, + FilenameMatch( + { + "disc": "3", + "track": "8", + "artist": "Artist", + "title": "Title", + } + ), ), ( "4-3 - Artist Name - Title", - { - "disc": "4", - "track": "3", - "artist": "Artist Name", - "title": "Title", - }, + FilenameMatch( + { + "disc": "4", + "track": "3", + "artist": "Artist Name", + "title": "Title", + } + ), ), ( "4-3_-_Artist_Name_-_Title", - { - "disc": "4", - "track": "3", - "artist": "Artist_Name", - "title": "Title", - }, + FilenameMatch( + { + "disc": "4", + "track": "3", + "artist": "Artist_Name", + "title": "Title", + } + ), ), ( "6 Title by Artist", - {"disc": None, "track": "6", "artist": "Artist", "title": "Title"}, + FilenameMatch({"track": "6", "artist": "Artist", "title": "Title"}), ), ( "Title", - {"disc": None, "track": None, "artist": None, "title": "Title"}, + FilenameMatch({"title": "Title"}), ), ], ) def test_parse_track_info(text, matchgroup): f = FromFilenamePlugin() m = f._parse_track_info(text) - assert matchgroup == m + assert dict(matchgroup.items()) == dict(m.items()) @pytest.mark.parametrize( @@ -117,134 +128,137 @@ def test_parse_track_info(text, matchgroup): ( # highly unlikely "", - { - "albumartist": None, - "album": None, - "year": None, - "catalognum": None, - "media": None, - }, + FilenameMatch( + { + "albumartist": None, + "album": None, + "year": None, + "catalognum": None, + "media": None, + } + ), ), ( "1970", - { - "albumartist": None, - "album": None, - "year": "1970", - "catalognum": None, - "media": None, - }, + FilenameMatch( + { + "year": "1970", + } + ), ), ( "Album Title", - { - "albumartist": None, - "album": "Album Title", - "year": None, - "catalognum": None, - "media": None, - }, + FilenameMatch( + { + "album": "Album Title", + } + ), ), ( "Artist - Album Title", - { - "albumartist": "Artist", - "album": "Album Title", - "year": None, - "catalognum": None, - "media": None, - }, + FilenameMatch( + { + "albumartist": "Artist", + "album": "Album Title", + } + ), ), ( "Artist - Album Title (2024)", - { - "albumartist": "Artist", - "album": "Album Title", - "year": "2024", - "catalognum": None, - "media": None, - }, + FilenameMatch( + { + "albumartist": "Artist", + "album": "Album Title", + "year": "2024", + } + ), ), ( "Artist - 2024 - Album Title [flac]", - { - "albumartist": "Artist", - "album": "Album Title", - "year": "2024", - "catalognum": None, - "media": None, - }, + FilenameMatch( + { + "albumartist": "Artist", + "album": "Album Title", + "year": "2024", + } + ), ), ( "(2024) Album Title [CATALOGNUM] WEB", # sometimes things are just going to be unparsable - { - "albumartist": "Album Title", - "album": "WEB", - "year": "2024", - "catalognum": "CATALOGNUM", - "media": None, - }, + FilenameMatch( + { + "albumartist": "Album Title", + "album": "WEB", + "year": "2024", + "catalognum": "CATALOGNUM", + } + ), ), ( "{2024} Album Artist - Album Title [INFO-WAV]", - { - "albumartist": "Album Artist", - "album": "Album Title", - "year": "2024", - "catalognum": None, - "media": None, - }, + FilenameMatch( + { + "albumartist": "Album Artist", + "album": "Album Title", + "year": "2024", + } + ), ), ( "VA - Album Title [2025] [CD-FLAC]", - { - "albumartist": "Various Artists", - "album": "Album Title", - "year": "2025", - "catalognum": None, - "media": "CD", - }, + FilenameMatch( + { + "albumartist": "Various Artists", + "album": "Album Title", + "year": "2025", + "media": "CD", + } + ), ), ( "Artist - Album Title 3000 (1998) [FLAC] {CATALOGNUM}", - { - "albumartist": "Artist", - "album": "Album Title 3000", - "year": "1998", - "catalognum": "CATALOGNUM", - "media": None, - }, + FilenameMatch( + { + "albumartist": "Artist", + "album": "Album Title 3000", + "year": "1998", + "catalognum": "CATALOGNUM", + } + ), ), ( "various - cd album (2023) [catalognum 123] {vinyl mp3}", - { - "albumartist": "Various Artists", - "album": "cd album", - "year": "2023", - "catalognum": "catalognum 123", - "media": "Vinyl", - }, + FilenameMatch( + { + "albumartist": "Various Artists", + "album": "cd album", + "year": "2023", + "catalognum": "catalognum 123", + "media": "Vinyl", + } + ), ), ( "[CATALOG567] Album - Various (2020) [WEB-FLAC]", - { - "albumartist": "Various Artists", - "album": "Album", - "year": "2020", - "catalognum": "CATALOG567", - "media": "Digital Media", - }, + FilenameMatch( + { + "albumartist": "Various Artists", + "album": "Album", + "year": "2020", + "catalognum": "CATALOG567", + "media": "Digital Media", + } + ), ), ( "Album 3000 {web}", - { - "albumartist": None, - "album": "Album 3000", - "year": None, - "catalognum": None, - "media": "Digital Media", - }, + FilenameMatch( + { + "album": "Album 3000", + "media": "Digital Media", + } + ), ), ], ) @@ -253,17 +267,18 @@ def test_parse_album_info(text, matchgroup): m = f._parse_album_info(text) assert matchgroup == m -@pytest.mark.parametrize("string,pattern",[ - ( - "$albumartist - $album ($year) {$comments}", - r"(?P<albumartist>.+)\ \-\ (?P<album>.+)\ \((?P<year>.+)\)\ \ \{(?P<comments>.+)\}" - ), + +@pytest.mark.parametrize( + "string,pattern", + [ ( - "$", - None + "$albumartist - $album ($year) {$comments}", + r"(?P<albumartist>.+)\ \-\ (?P<album>.+)\ \((?P<year>.+)\)\ \ \{(?P<comments>.+)\}", ), - ]) -def test_parse_user_pattern_strings(string,pattern): + ("$", None), + ], +) +def test_parse_user_pattern_strings(string, pattern): f = FromFilenamePlugin() assert f._parse_user_pattern_strings(string) == pattern @@ -555,8 +570,7 @@ def test_sanity_check(self, expected_items): def test_singleton_import(self): task = SingletonImportTask( - toppath=None, - item=mock_item(path="/01 Track.wav") + toppath=None, item=mock_item(path="/01 Track.wav") ) f = FromFilenamePlugin() f.filename_task(task, Session()) @@ -630,39 +644,42 @@ def test_fields(self, fields, expected): assert res.year == expected.year assert res.title == expected.title - @pytest.mark.parametrize("patterns,expected", [ - ( - { - "folder": ["($comments) - {$albumartist} - {$album}"], - "file": ["$artist - $track - $title"] - }, - mock_item( - path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac", - comments="Comment", - albumartist="Album Artist", - album="Album", - artist="Artist", - track=2, - title="Title", - ) - ), - ( - { - "folder": ["[$comments] - {$albumartist} - {$album}"], - "file": ["$artist - $track - $title"] - }, - mock_item( - path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac", - artist="Artist", - track=2, - title="Title", - catalognum="Comment" - ) - ) - ]) + @pytest.mark.parametrize( + "patterns,expected", + [ + ( + { + "folder": ["($comments) - {$albumartist} - {$album}"], + "file": ["$artist - $track - $title"], + }, + mock_item( + path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac", + comments="Comment", + albumartist="Album Artist", + album="Album", + artist="Artist", + track=2, + title="Title", + ), + ), + ( + { + "folder": ["[$comments] - {$albumartist} - {$album}"], + "file": ["$artist - $track - $title"], + }, + mock_item( + path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac", + artist="Artist", + track=2, + title="Title", + catalognum="Comment", + ), + ), + ], + ) def test_user_patterns(self, patterns, expected): task = mock_task([mock_item(path=expected.path)]) - with self.configure_plugin({ "patterns": patterns }): + with self.configure_plugin({"patterns": patterns}): f = FromFilenamePlugin() f.filename_task(task, Session()) res = task.items[0] @@ -675,3 +692,5 @@ def test_user_patterns(self, patterns, expected): assert res.year == expected.year assert res.title == expected.title + def test_escape(self): + assert FromFilenamePlugin._escape("{text}") == "{{text}}" From 3676c39d651b8206ff0541122edc4037a7e8f4e5 Mon Sep 17 00:00:00 2001 From: Henry Oberholtzer <henryoberholtzer@gmail.com> Date: Wed, 7 Jan 2026 13:02:34 -0800 Subject: [PATCH 08/12] Lint, complete coverage --- beetsplug/fromfilename.py | 54 ++++++++++++---------- test/plugins/test_fromfilename.py | 77 ++++++++++++++++++------------- 2 files changed, 75 insertions(+), 56 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index c4bfd538ac..1ecb197987 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -138,7 +138,6 @@ def values(self): return self._matches.values() - class FromFilenamePlugin(BeetsPlugin): def __init__(self) -> None: super().__init__() @@ -165,16 +164,17 @@ def __init__(self) -> None: @cached_property def file_patterns(self) -> list[re.Pattern[str]]: return self._user_pattern_to_regex( - self.config["patterns"]["file"].as_str_seq()) + self.config["patterns"]["file"].as_str_seq() + ) @cached_property def folder_patterns(self) -> list[re.Pattern[str]]: return self._user_pattern_to_regex( self.config["patterns"]["folder"].as_str_seq() - ) + ) def filename_task(self, task: ImportTask, session: ImportSession) -> None: - """ Examines all files in the given import task for any missing + """Examines all files in the given import task for any missing information it can gather from the file and folder names. Once the information has been obtained and checked, it @@ -196,19 +196,17 @@ def filename_task(self, task: ImportTask, session: ImportSession) -> None: # Apply the information self._apply_matches(album_matches, track_matches) - def _user_pattern_to_regex(self, patterns: list[str]) -> list[re.Pattern[str]]: + def _user_pattern_to_regex( + self, patterns: list[str] + ) -> list[re.Pattern[str]]: """Compile user patterns into a list of usable regex patterns. Catches errors are continues without bad regex patterns. """ return [ - re.compile(regexp) for p in patterns if ( - regexp := self._parse_user_pattern_strings(p)) - ] - - @staticmethod - def _escape(text: str) -> str: - # escape brackets for fstring logs - return re.sub("}", "}}", re.sub("{", "{{", text)) + re.compile(regexp) + for p in patterns + if (regexp := self._parse_user_pattern_strings(p)) + ] @staticmethod def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]: @@ -222,18 +220,20 @@ def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]: parent_folder = path.parent.stem return parent_folder, filenames - def _check_user_matches(self, text: str, - patterns: list[re.Pattern[str]]) -> FilenameMatch: + def _check_user_matches( + self, text: str, patterns: list[re.Pattern[str]] + ) -> FilenameMatch: for p in patterns: - if (usermatch := p.fullmatch(text)): + if usermatch := p.fullmatch(text): return FilenameMatch(usermatch.groupdict()) - return None + return FilenameMatch() - def _build_track_matches(self, - item_filenames: dict[Item, str]) -> dict[Item, FilenameMatch]: + def _build_track_matches( + self, item_filenames: dict[Item, str] + ) -> dict[Item, FilenameMatch]: track_matches: dict[Item, FilenameMatch] = {} for item, filename in item_filenames.items(): - if (m := self._check_user_matches(filename, self.file_patterns)): + if m := self._check_user_matches(filename, self.file_patterns): track_matches[item] = m else: match = self._parse_track_info(filename) @@ -264,7 +264,7 @@ def _parse_track_info(text: str) -> FilenameMatch: def _parse_album_info(self, text: str) -> FilenameMatch: # Check if a user pattern matches - if (m := self._check_user_matches(text, self.folder_patterns)): + if m := self._check_user_matches(text, self.folder_patterns): return m matches = FilenameMatch() # Start with the extra fields to make parsing @@ -301,7 +301,9 @@ def _parse_album_info(self, text: str) -> FilenameMatch: return matches def _apply_matches( - self, album_match: FilenameMatch, track_matches: dict[Item, FilenameMatch] + self, + album_match: FilenameMatch, + track_matches: dict[Item, FilenameMatch], ) -> None: """Apply all valid matched fields to all items in the match dictionary.""" match = album_match @@ -404,7 +406,9 @@ def _parse_catalognum(text: str) -> tuple[str | None, tuple[int, int]]: def _parse_user_pattern_strings(self, text: str) -> str | None: # escape any special characters - fields: list[str] = [s.lower() for s in re.findall(r"\$([a-zA-Z\_]+)", text)] + fields: list[str] = [ + s.lower() for s in re.findall(r"\$([a-zA-Z\_]+)", text) + ] if not fields: # if there are no usable fields return None @@ -422,7 +426,9 @@ def _mutate_string(text: str, span: tuple[int, int]) -> str: return text def _sanity_check_matches( - self, album_match: FilenameMatch, track_matches: dict[Item, FilenameMatch] + self, + album_match: FilenameMatch, + track_matches: dict[Item, FilenameMatch], ) -> None: """Check to make sure data is coherent between track and album matches. Largely looking to see diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index c00b567214..371a92b0d8 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -22,10 +22,13 @@ class Session: + """Mock session, not used by the plugin.""" + pass def mock_item(**kwargs): + """Mock item with blank defaults.""" defaults = dict( title="", artist="", @@ -41,6 +44,7 @@ def mock_item(**kwargs): def mock_task(items): + """Mock task for ease of testing.""" return ImportTask(toppath=None, paths=None, items=items) @@ -117,6 +121,7 @@ def mock_task(items): ], ) def test_parse_track_info(text, matchgroup): + """Test parsing track information from a filename.""" f = FromFilenamePlugin() m = f._parse_track_info(text) assert dict(matchgroup.items()) == dict(m.items()) @@ -128,15 +133,7 @@ def test_parse_track_info(text, matchgroup): ( # highly unlikely "", - FilenameMatch( - { - "albumartist": None, - "album": None, - "year": None, - "catalognum": None, - "media": None, - } - ), + FilenameMatch(), ), ( "1970", @@ -263,6 +260,7 @@ def test_parse_track_info(text, matchgroup): ], ) def test_parse_album_info(text, matchgroup): + """Test parsing album information from a folder name.""" f = FromFilenamePlugin() m = f._parse_album_info(text) assert matchgroup == m @@ -273,12 +271,16 @@ def test_parse_album_info(text, matchgroup): [ ( "$albumartist - $album ($year) {$comments}", - r"(?P<albumartist>.+)\ \-\ (?P<album>.+)\ \((?P<year>.+)\)\ \ \{(?P<comments>.+)\}", + ( + r"(?P<albumartist>.+)\ \-\ (?P<album>.+)\ " + r"\((?P<year>.+)\)\ \ \{(?P<comments>.+)\}" + ), ), ("$", None), ], ) def test_parse_user_pattern_strings(string, pattern): + """Test converting a user's format string to regexp""" f = FromFilenamePlugin() assert f._parse_user_pattern_strings(string) == pattern @@ -514,8 +516,6 @@ def test_fromfilename(self, expected_item): ), ], [ - # Even though it might be clear to human eyes, - # we can't guess since the various flag is thrown mock_item( path=( "/303 Alliance 012/" @@ -540,13 +540,9 @@ def test_fromfilename(self, expected_item): ], ) def test_sanity_check(self, expected_items): - """ - Take a list of expected items, create a task with just the paths. - - Goal is to ensure that sanity check - correctly adjusts the parsed artists and albums - - After parsing, compare to the expected items. + """Take a list of expected items, create a task with just the paths. + Assert the conditions that cause sanity check to change artist and title + fields. """ task = mock_task([mock_item(path=item.path) for item in expected_items]) f = FromFilenamePlugin() @@ -569,6 +565,7 @@ def test_sanity_check(self, expected_items): assert res[1].title == exp[1].title def test_singleton_import(self): + """Ensure that singletons behave correctly.""" task = SingletonImportTask( toppath=None, item=mock_item(path="/01 Track.wav") ) @@ -577,9 +574,25 @@ def test_singleton_import(self): assert task.item.track == 1 assert task.item.title == "Track" - # TODO: Test with items that already have data, or other types of bad data. - - # TODO: Test with items that have perfectly fine data for the most part + def test_item_with_existing_data(self): + """Ensure that existing metadata is not overwritten, no matter + how incorrect it may be.""" + path = "/Album Artist - Album (1999)/01 - Track Title.wav" + albumartist = "Other Artist" + title = "Existing Title" + given = mock_item( + path=path, + albumartist=albumartist, + album=" ", + title=title, + year=2024, + ) + f = FromFilenamePlugin() + f.filename_task(mock_task([given]), Session()) + assert given.title == title + assert given.albumartist == albumartist + assert given.album == "Album" + assert given.year == 2024 @pytest.mark.parametrize( "fields,expected", @@ -621,11 +634,7 @@ def test_singleton_import(self): ], ) def test_fields(self, fields, expected): - """ - With a set item and changing list of fields - - After parsing, compare to the original with the expected attributes defined. - """ + """Test that the applied fields can be adjusted by the user.""" path = ( "/Album Artist - Album (2025) [FLAC CD] {CATALOGNUM}/" "1-2 Artist - Track.wav" @@ -653,7 +662,10 @@ def test_fields(self, fields, expected): "file": ["$artist - $track - $title"], }, mock_item( - path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac", + path=( + "/(Comment) - {Album Artist} - {Album}" + "/Artist - 02 - Title.flac" + ), comments="Comment", albumartist="Album Artist", album="Album", @@ -668,7 +680,10 @@ def test_fields(self, fields, expected): "file": ["$artist - $track - $title"], }, mock_item( - path="/(Comment) - {Album Artist} - {Album}/Artist - 02 - Title.flac", + path=( + "/(Comment) - {Album Artist} - {Album}" + "/Artist - 02 - Title.flac" + ), artist="Artist", track=2, title="Title", @@ -678,6 +693,7 @@ def test_fields(self, fields, expected): ], ) def test_user_patterns(self, patterns, expected): + """Test recognizing data from a given user pattern.""" task = mock_task([mock_item(path=expected.path)]) with self.configure_plugin({"patterns": patterns}): f = FromFilenamePlugin() @@ -691,6 +707,3 @@ def test_user_patterns(self, patterns, expected): assert res.catalognum == expected.catalognum assert res.year == expected.year assert res.title == expected.title - - def test_escape(self): - assert FromFilenamePlugin._escape("{text}") == "{{text}}" From 32672054d43ccd5376b7c16d76152ec02d59f021 Mon Sep 17 00:00:00 2001 From: Henry Oberholtzer <henryoberholtzer@gmail.com> Date: Wed, 7 Jan 2026 15:08:38 -0800 Subject: [PATCH 09/12] Final types, docs, ignore directories --- beetsplug/fromfilename.py | 71 ++++++++++++++++++++++--------- docs/plugins/fromfilename.rst | 21 ++++++++- test/plugins/test_fromfilename.py | 50 ++++++++++++++++++++++ 3 files changed, 119 insertions(+), 23 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index 1ecb197987..35b35bec99 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -17,7 +17,7 @@ """ import re -from collections.abc import MutableMapping +from collections.abc import Iterator, MutableMapping, ValuesView from datetime import datetime from functools import cached_property from pathlib import Path @@ -118,10 +118,10 @@ def __init__(self, matches: dict[str, str] = {}) -> None: if value is not None: self._matches[key.lower()] = str(value).strip() - def __getitem__(self, key) -> str | None: + def __getitem__(self, key: str) -> str | None: return self._matches.get(key, None) - def __iter__(self): + def __iter__(self) -> Iterator[str]: return iter(self._matches) def __len__(self) -> int: @@ -134,7 +134,7 @@ def __setitem__(self, key: str, value: str | None) -> None: def __delitem__(self, key: str) -> None: del self._matches[key] - def values(self): + def values(self) -> ValuesView[str | None]: return self._matches.values() @@ -155,23 +155,22 @@ def __init__(self) -> None: "year", ], "patterns": {"folder": [], "file": []}, - # TODO: Add ignore parent folder + "ignore_dirs": [], } ) self.fields = set(self.config["fields"].as_str_seq()) - self.register_listener("import_task_start", self.filename_task) - - @cached_property - def file_patterns(self) -> list[re.Pattern[str]]: - return self._user_pattern_to_regex( + # Evaluate the user patterns to expand the fields + self.file_patterns = self._user_pattern_to_regex( self.config["patterns"]["file"].as_str_seq() ) - - @cached_property - def folder_patterns(self) -> list[re.Pattern[str]]: - return self._user_pattern_to_regex( + self.folder_patterns = self._user_pattern_to_regex( self.config["patterns"]["folder"].as_str_seq() ) + self.register_listener("import_task_start", self.filename_task) + + @cached_property + def ignored_directories(self) -> set[str]: + return set([p.lower() for p in self.config["ignore_dirs"].as_str_seq()]) def filename_task(self, task: ImportTask, session: ImportSession) -> None: """Examines all files in the given import task for any missing @@ -184,8 +183,10 @@ def filename_task(self, task: ImportTask, session: ImportSession) -> None: items: list[Item] = task.items - # TODO: Check each of the fields to see if any are missing - # information on the file. + # If there's no missing data to parse + if not self._check_missing_data(items): + return + parent_folder, item_filenames = self._get_path_strings(items) album_matches = self._parse_album_info(parent_folder) @@ -196,6 +197,31 @@ def filename_task(self, task: ImportTask, session: ImportSession) -> None: # Apply the information self._apply_matches(album_matches, track_matches) + def _check_missing_data(self, items: list[Item]) -> bool: + """Look for what fields are missing data on the items. + Compare each field in self.fields to the fields on the + item. + + If all items have it, remove it from fields. + + If any items are missing it, keep it on the fields. + + If no fields are detect that need to be processed, + return false to shortcut the plugin. + """ + remove = set() + for field in self.fields: + # If any field is a bad field + if any([True for item in items if self._bad_field(item[field])]): + continue + else: + remove.add(field) + self.fields = self.fields - remove + # If all fields have been removed, there is nothing to do + if not len(self.fields): + return False + return True + def _user_pattern_to_regex( self, patterns: list[str] ) -> list[re.Pattern[str]]: @@ -208,8 +234,9 @@ def _user_pattern_to_regex( if (regexp := self._parse_user_pattern_strings(p)) ] - @staticmethod - def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]: + def _get_path_strings( + self, items: list[Item] + ) -> tuple[str, dict[Item, str]]: parent_folder: str = "" filenames: dict[Item, str] = {} for item in items: @@ -218,6 +245,8 @@ def _get_path_strings(items: list[Item]) -> tuple[str, dict[Item, str]]: filenames[item] = filename if not parent_folder: parent_folder = path.parent.stem + if parent_folder.lower() in self.ignored_directories: + parent_folder = "" return parent_folder, filenames def _check_user_matches( @@ -314,7 +343,7 @@ def _apply_matches( for key in match.keys(): if key in self.fields: old_value = item.get(key) - new_value = match[key] # type: ignore + new_value = match[key] if self._bad_field(old_value) and new_value: found_data[key] = new_value self._log.info(f"Item updated with: {found_data.items()}") @@ -436,7 +465,7 @@ def _sanity_check_matches( identified. """ - def swap_artist_title(tracks: list[FilenameMatch]): + def swap_artist_title(tracks: list[FilenameMatch]) -> None: for track in tracks: artist = track["title"] track["title"] = track["artist"] @@ -475,7 +504,7 @@ def swap_artist_title(tracks: list[FilenameMatch]): @staticmethod def _equal_fields(dictionaries: list[FilenameMatch], field: str) -> bool: """Checks if all values of a field on a dictionary match.""" - return len(set(d[field] for d in dictionaries)) <= 1 # type: ignore + return len(set(d[field] for d in dictionaries)) <= 1 @staticmethod def _bad_field(field: str | int) -> bool: diff --git a/docs/plugins/fromfilename.rst b/docs/plugins/fromfilename.rst index 5308d5c5b3..fae2cf12d7 100644 --- a/docs/plugins/fromfilename.rst +++ b/docs/plugins/fromfilename.rst @@ -7,7 +7,8 @@ but where the filenames contain useful information like the artist and title. When you attempt to import a track that's missing a title, this plugin will look at the track's filename and parent folder, and guess a number of fields. -The extracted information will be used to search for metadata and match track ordering. +The extracted information will be used to search for metadata and match track +ordering. To use the ``fromfilename`` plugin, enable it in your configuration (see :ref:`using-plugins`). @@ -38,11 +39,21 @@ Default patterns: file: [] folder: [] + ignore_dirs: .. conf:: fields :default: [ artist, album, albumartist, catalognum, disc, media, title, track, year ] - The fields the plugin will guess with its default pattern matching. If a field is specified in a user pattern, that field does not need to be present on this list to be applied. If you only want the plugin contribute the track title and artist, you would put ``[title, artist]``. + The fields the plugin will guess with its default pattern matching. + + By default, the plugin is configured to match all fields its default + patterns are capable of matching. + + If a field is specified in a user pattern, that field does not need + to be present on this list to be applied. + + If you only want the plugin to contribute the track title and artist, + you would put ``[title, artist]``. .. conf:: patterns @@ -67,3 +78,9 @@ Default file: - "$title - $artist" +.. conf:: ignore_dirs + :default: [] + + Specify parent directory names that will not be searched for album + information. Useful if you use a regular directory for importing + single files. diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index 371a92b0d8..8800ee04df 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -13,6 +13,9 @@ """Tests for the fromfilename plugin.""" +from copy import deepcopy +from unittest.mock import patch + import pytest from beets.importer.tasks import ImportTask, SingletonImportTask @@ -287,6 +290,7 @@ def test_parse_user_pattern_strings(string, pattern): class TestFromFilename(PluginMixin): plugin = "fromfilename" + preload_plugin = False @pytest.mark.parametrize( "expected_item", @@ -707,3 +711,49 @@ def test_user_patterns(self, patterns, expected): assert res.catalognum == expected.catalognum assert res.year == expected.year assert res.title == expected.title + + def test_no_changes(self): + item = mock_item( + path="/Folder/File.wav", + albumartist="AlbumArtist", + artist="Artist", + title="Title", + ) + fields = ["artist", "title", "albumartist"] + task = mock_task([item]) + with self.configure_plugin({"fields": fields}): + with patch.object(FromFilenamePlugin, "_get_path_strings") as mock: + f = FromFilenamePlugin() + f.filename_task(task, Session()) + mock.assert_not_called() + + def test_changes_missing_values(self): + item = mock_item( + path="/Folder/File.wav", + albumartist="AlbumArtist", + artist="Artist", + title="Title", + ) + item2 = deepcopy(item) + item2.title = "" + fields = ["artist", "title", "albumartist"] + task = mock_task([item, item2]) + with self.configure_plugin({"fields": fields}): + with patch.object( + FromFilenamePlugin, + "_get_path_strings", + return_value=("mock", {item: "mock"}), + ) as mock: + f = FromFilenamePlugin() + f.filename_task(task, Session()) + assert len(f.fields) == 1 + assert "title" in f.fields + mock.assert_called() + + def test_ignored_directories(self): + ignored = "Incoming" + item = mock_item(path="/tmp/" + ignored + "/01 - File.wav") + with self.configure_plugin({"ignore_dirs": [ignored]}): + f = FromFilenamePlugin() + parent_folder, _ = f._get_path_strings([item]) + assert parent_folder == "" From 4cd29f9a85b2d9402fb864a0c05ecc728619f42f Mon Sep 17 00:00:00 2001 From: Henry <henryoberholtzer@gmail.com> Date: Fri, 9 Jan 2026 18:08:56 -0800 Subject: [PATCH 10/12] Fixes and refinements, 100% coverage. --- beetsplug/fromfilename.py | 28 +++++++++++++--------------- docs/plugins/fromfilename.rst | 2 +- test/plugins/test_fromfilename.py | 2 +- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index 35b35bec99..a95d63b199 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -112,11 +112,12 @@ class FilenameMatch(MutableMapping[str, str | None]): - def __init__(self, matches: dict[str, str] = {}) -> None: + def __init__(self, matches: dict[str, str] | None = None) -> None: self._matches: dict[str, str] = {} - for key, value in matches.items(): - if value is not None: - self._matches[key.lower()] = str(value).strip() + if matches: + for key, value in matches.items(): + if value is not None: + self._matches[key.lower()] = str(value).strip() def __getitem__(self, key: str) -> str | None: return self._matches.get(key, None) @@ -159,13 +160,13 @@ def __init__(self) -> None: } ) self.fields = set(self.config["fields"].as_str_seq()) - # Evaluate the user patterns to expand the fields self.file_patterns = self._user_pattern_to_regex( self.config["patterns"]["file"].as_str_seq() ) self.folder_patterns = self._user_pattern_to_regex( self.config["patterns"]["folder"].as_str_seq() ) + self.session_fields: set[str] = set() self.register_listener("import_task_start", self.filename_task) @cached_property @@ -209,16 +210,13 @@ def _check_missing_data(self, items: list[Item]) -> bool: If no fields are detect that need to be processed, return false to shortcut the plugin. """ - remove = set() + self.session_fields = set({}) for field in self.fields: # If any field is a bad field if any([True for item in items if self._bad_field(item[field])]): - continue - else: - remove.add(field) - self.fields = self.fields - remove + self.session_fields.add(field) # If all fields have been removed, there is nothing to do - if not len(self.fields): + if not len(self.session_fields): return False return True @@ -335,13 +333,13 @@ def _apply_matches( track_matches: dict[Item, FilenameMatch], ) -> None: """Apply all valid matched fields to all items in the match dictionary.""" - match = album_match + match = dict(album_match._matches) for item in track_matches: - match.update(track_matches[item]) + match.update(track_matches[item]._matches) found_data: dict[str, int | str] = {} self._log.debug(f"Attempting keys: {match.keys()}") for key in match.keys(): - if key in self.fields: + if key in self.session_fields: old_value = item.get(key) new_value = match[key] if self._bad_field(old_value) and new_value: @@ -445,7 +443,7 @@ def _parse_user_pattern_strings(self, text: str) -> str | None: for f in fields: pattern = re.sub(rf"\\\${f}", f"(?P<{f}>.+)", pattern) self.fields.add(f) - return rf"{pattern}" + return pattern @staticmethod def _mutate_string(text: str, span: tuple[int, int]) -> str: diff --git a/docs/plugins/fromfilename.rst b/docs/plugins/fromfilename.rst index fae2cf12d7..98ea92f94c 100644 --- a/docs/plugins/fromfilename.rst +++ b/docs/plugins/fromfilename.rst @@ -63,7 +63,7 @@ Default of the file. If ``fromfilename`` can't match the entire string to the given pattern, it will - falls back to the default pattern. + fall back to the default pattern. The following custom patterns will match this path and retrieve the specified fields. diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index 8800ee04df..4f09deee16 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -746,7 +746,7 @@ def test_changes_missing_values(self): ) as mock: f = FromFilenamePlugin() f.filename_task(task, Session()) - assert len(f.fields) == 1 + assert len(f.session_fields) == 1 assert "title" in f.fields mock.assert_called() From 177e997cb05c2364b827df4241296f824be04bfb Mon Sep 17 00:00:00 2001 From: Henry Oberholtzer <henryoberholtzer@gmail.com> Date: Tue, 13 Jan 2026 10:39:56 -0800 Subject: [PATCH 11/12] Add future annotations, add vinyl track index parsing, simplify docs --- beetsplug/fromfilename.py | 61 ++++++++++++++++++++++++++++--- docs/plugins/fromfilename.rst | 14 +++---- test/plugins/test_fromfilename.py | 31 ++++++++++++++++ 3 files changed, 94 insertions(+), 12 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index a95d63b199..9960dfe973 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2016, Jan-Erik Dahlin +# Copyright 2016, Jan-Erik Dahlin, Henry Oberholtzer. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -16,18 +16,23 @@ (possibly also extract track and artist) """ +from __future__ import annotations + import re from collections.abc import Iterator, MutableMapping, ValuesView from datetime import datetime from functools import cached_property from pathlib import Path +from typing import TYPE_CHECKING from beets import config -from beets.importer import ImportSession, ImportTask -from beets.library import Item from beets.plugins import BeetsPlugin from beets.util import displayable_path +if TYPE_CHECKING: + from beets.importer import ImportSession, ImportTask + from beets.library import Item + # Filename field extraction patterns RE_TRACK_INFO = re.compile( r""" @@ -52,6 +57,8 @@ re.VERBOSE | re.IGNORECASE, ) +RE_ALPHANUM_INDEX = re.compile(r"^[A-Z]{1,2}\d{,2}\b") + # Catalog number extraction pattern RE_CATALOGNUM = re.compile( r""" @@ -180,14 +187,14 @@ def filename_task(self, task: ImportTask, session: ImportSession) -> None: Once the information has been obtained and checked, it is applied to the items to improve later metadata lookup. """ - # Create the list of items to process + # Retrieve the list of items to process items: list[Item] = task.items # If there's no missing data to parse if not self._check_missing_data(items): return - + # Retrieve the path characteristics to check parent_folder, item_filenames = self._get_path_strings(items) album_matches = self._parse_album_info(parent_folder) @@ -259,6 +266,8 @@ def _build_track_matches( self, item_filenames: dict[Item, str] ) -> dict[Item, FilenameMatch]: track_matches: dict[Item, FilenameMatch] = {} + # Check for alphanumeric indices + self._parse_alphanumeric_index(item_filenames) for item, filename in item_filenames.items(): if m := self._check_user_matches(filename, self.file_patterns): track_matches[item] = m @@ -267,6 +276,48 @@ def _build_track_matches( track_matches[item] = match return track_matches + @staticmethod + def _parse_alphanumeric_index(item_filenames: dict[Item, str]) -> None: + """Before continuing to regular track matches, see if an alphanumeric + tracklist can be extracted. "A1, B1, B2" Sometimes these are followed + by a dash or dot and must be anchored to the start of the string. + + All matched patterns are extracted, and replaced with integers. + + Discs are not accounted for. + """ + + def match_index(filename: str) -> str: + m = RE_ALPHANUM_INDEX.match(filename) + if not m: + return "" + else: + return m.group() + + # Extract matches for alphanumeric indexes + indexes: list[tuple[str, Item]] = [ + (match_index(filename), item) + for item, filename in item_filenames.items() + ] + # If all the tracks do not start with a vinyl index, abort + if not all([i[0] for i in indexes]): + return + + # Utility function for sorting + def index_key(x: tuple[str, Item]): + return x[0] + + # If all have match, sort by the matched strings + indexes.sort(key=index_key) + # Iterate through all the filenames + for index, pair in enumerate(indexes): + match, item = pair + # Substitute the alnum index with an integer + new_filename = item_filenames[item].replace( + match, str(index + 1), 1 + ) + item_filenames[item] = new_filename + @staticmethod def _parse_track_info(text: str) -> FilenameMatch: match = RE_TRACK_INFO.match(text) diff --git a/docs/plugins/fromfilename.rst b/docs/plugins/fromfilename.rst index 98ea92f94c..c4f15a59e9 100644 --- a/docs/plugins/fromfilename.rst +++ b/docs/plugins/fromfilename.rst @@ -57,16 +57,16 @@ Default .. conf:: patterns - Users can specify patterns to improve the efficacy of the plugin. Patterns can - be specified as ``file`` or ``folder`` patterns. ``file`` patterns are checked - against the filename. ``folder`` patterns are checked against the parent folder - of the file. + Users can specify patterns to expand the set of filenames that can + be recognized by the plugin. Patterns can be specified as ``file`` + or ``folder`` patterns. ``file`` patterns are checked against the filename. + ``folder`` patterns are checked against the parent folder of the file. - If ``fromfilename`` can't match the entire string to the given pattern, it will + If ``fromfilename`` can't match the entire string to one of the given pattern, it will fall back to the default pattern. - The following custom patterns will match this path and retrieve the specified - fields. + For example, the following custom patterns will match this path and folder, + and retrieve the specified fields. ``/music/James Lawson - 841689 (2004)/Coming Up - James Lawson & Andy Farley.mp3`` diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index 4f09deee16..7d264e396e 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -712,6 +712,37 @@ def test_user_patterns(self, patterns, expected): assert res.year == expected.year assert res.title == expected.title + @pytest.mark.parametrize( + "expected", + [ + ( + mock_item(path="/temp/A - track.wav", track=1), + mock_item(path="/temp/B - track.wav", track=2), + mock_item(path="/temp/C - track.wav", track=3), + ), + # Test with numbers + ( + mock_item(path="/temp/A1 - track.wav", track=1), + mock_item(path="/temp/A2 - track.wav", track=2), + mock_item(path="/temp/B1 - track.wav", track=3), + ), + # Test out of order + ( + mock_item(path="/temp/Z - track.wav", track=3), + mock_item(path="/temp/X - track.wav", track=1), + mock_item(path="/temp/Y - track.wav", track=2), + ), + ], + ) + def test_alphanumeric_index(self, expected): + """Test parsing an alphanumeric index string.""" + task = mock_task([mock_item(path=item.path) for item in expected]) + f = FromFilenamePlugin() + f.filename_task(task, Session()) + assert task.items[0].track == expected[0].track + assert task.items[1].track == expected[1].track + assert task.items[2].track == expected[2].track + def test_no_changes(self): item = mock_item( path="/Folder/File.wav", From 7a1c92d8617afee535ec39eb7b201226e24deb79 Mon Sep 17 00:00:00 2001 From: Henry <henryoberholtzer@gmail.com> Date: Mon, 19 Jan 2026 21:34:02 -0800 Subject: [PATCH 12/12] Adjust log message. Initial fix for group albums. Test outlines written. TODO: Write tests. --- beetsplug/fromfilename.py | 22 ++++++++++++++++--- docs/plugins/fromfilename.rst | 11 ++++++++++ test/plugins/test_fromfilename.py | 36 ++++++++++++++++++++++++++++--- 3 files changed, 63 insertions(+), 6 deletions(-) diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index 9960dfe973..82c3d444cb 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -164,6 +164,7 @@ def __init__(self) -> None: ], "patterns": {"folder": [], "file": []}, "ignore_dirs": [], + "guess": {"folder": True, "file": True}, } ) self.fields = set(self.config["fields"].as_str_seq()) @@ -341,10 +342,17 @@ def _parse_track_info(text: str) -> FilenameMatch: return trackmatch def _parse_album_info(self, text: str) -> FilenameMatch: + matches = FilenameMatch() + + if not self.config["guess"]["folder"] or ( + config["import"]["group_albums"] or config["import"]["singletons"] + ): + # If the group albums flag is thrown, we can't trust the parent directory + # likewise for singletons - return an empty match + return matches # Check if a user pattern matches if m := self._check_user_matches(text, self.folder_patterns): return m - matches = FilenameMatch() # Start with the extra fields to make parsing # the album artist and artist field easier year, span = self._parse_year(text) @@ -388,16 +396,24 @@ def _apply_matches( for item in track_matches: match.update(track_matches[item]._matches) found_data: dict[str, int | str] = {} - self._log.debug(f"Attempting keys: {match.keys()}") + self._log.debug(f"keys: {', '.join(match.keys())}") + # Check every key we are supposed to match. for key in match.keys(): + # If the key is applicable to the session, we will update it. if key in self.session_fields: old_value = item.get(key) new_value = match[key] + # If the field is bad, and we have a new value if self._bad_field(old_value) and new_value: found_data[key] = new_value - self._log.info(f"Item updated with: {found_data.items()}") + self._log.info(f"guessing {self._format_guesses(found_data)}") item.update(found_data) + @staticmethod + def _format_guesses(guesses: dict[str, int | str]) -> str: + """Format guesses in a 'field="guess"' style for logging""" + return ", ".join([f'{g[0]}="{g[1]}"' for g in guesses.items()]) + @staticmethod def _parse_album_and_albumartist( text: str, diff --git a/docs/plugins/fromfilename.rst b/docs/plugins/fromfilename.rst index c4f15a59e9..1c48caffbe 100644 --- a/docs/plugins/fromfilename.rst +++ b/docs/plugins/fromfilename.rst @@ -84,3 +84,14 @@ Default Specify parent directory names that will not be searched for album information. Useful if you use a regular directory for importing single files. + +.. conf:: guess + + Disable guessing from the folder or filename. Be aware that disabling both + will cause the plugin to have no effect! + + .. code-block:: yaml + + guess: + folder: yes + file: yes diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index 7d264e396e..cf444993d7 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -735,7 +735,7 @@ def test_user_patterns(self, patterns, expected): ], ) def test_alphanumeric_index(self, expected): - """Test parsing an alphanumeric index string.""" + """Assert that an alphanumeric index is guessed in order.""" task = mock_task([mock_item(path=item.path) for item in expected]) f = FromFilenamePlugin() f.filename_task(task, Session()) @@ -743,7 +743,9 @@ def test_alphanumeric_index(self, expected): assert task.items[1].track == expected[1].track assert task.items[2].track == expected[2].track - def test_no_changes(self): + def test_no_guesses(self): + """Assert that an item with complete information is + has no guesses attempted.""" item = mock_item( path="/Folder/File.wav", albumartist="AlbumArtist", @@ -758,7 +760,9 @@ def test_no_changes(self): f.filename_task(task, Session()) mock.assert_not_called() - def test_changes_missing_values(self): + def test_only_one_guess(self): + """Assert that an item missing only one value + will just have that key in session fields.""" item = mock_item( path="/Folder/File.wav", albumartist="AlbumArtist", @@ -782,9 +786,35 @@ def test_changes_missing_values(self): mock.assert_called() def test_ignored_directories(self): + """Assert that a given parent directory name is ignored.""" ignored = "Incoming" item = mock_item(path="/tmp/" + ignored + "/01 - File.wav") with self.configure_plugin({"ignore_dirs": [ignored]}): f = FromFilenamePlugin() parent_folder, _ = f._get_path_strings([item]) assert parent_folder == "" + + def test_guess_folder(self): + """Assert that from filename does not + guess from the folder, if guess folder is `no`.""" + return + + def test_guess_file(self): + """Assert that from filename does not guess + from the file, if guess file is `no`.""" + return + + def test_singleton_flag_import(self): + """If the import task is a singleton, assert that + the plugin does not guess from the folder.""" + return + + def test_group_album_flag_import(self): + """If the group albums flag is thrown, assert + that the plugin does not guess from the folder.""" + return + + def test_import_split_by_group(self): + """Asser that an initial run without group by album, and an inaccurate + album guess, results in a run omitting it with the group album flag.""" + return