format: Add a function for formatting sort keys

This function casefolds the input string and makes a series of
substitutions before splitting the string into a tuple of strings that
can be compared against.

Signed-off-by: Anna Schumaker <Anna@NoWheyCreamery.com>
This commit is contained in:
Anna Schumaker 2022-08-10 12:22:30 -04:00
parent 6cade5d779
commit ec5c4ddd2c
2 changed files with 41 additions and 0 deletions

View File

@ -1,5 +1,8 @@
# Copyright 2022 (c) Anna Schumaker
"""Helper functions for formatting strings."""
import re
IGNORE_WORDS = set(["a", "an", "the", ""])
def search(input: str) -> str | None:
@ -19,3 +22,14 @@ def search(input: str) -> str | None:
input += "*"
return input
def sort_key(input: str) -> tuple:
"""Translate the input string into a sort key."""
if len(input) == 0:
return ()
input = re.sub(r"[\"\'’“”]", "", input.casefold())
res = re.split(r"[ /_-]", input)
if len(res) > 1 and res[0] in IGNORE_WORDS:
res = res[1:]
return tuple(res)

View File

@ -18,3 +18,30 @@ class TestFormatter(unittest.TestCase):
self.assertEqual(format.search("*Test$"), "*test")
self.assertEqual(format.search("^"), "*")
self.assertEqual(format.search("$"), "*")
def test_ignore_words(self):
"""Test words that aren't included at the start of the sort key."""
self.assertSetEqual(format.IGNORE_WORDS, set(["a", "an", "the", ""]))
def test_sort_key(self):
"""Test translating a string into a sort key."""
self.assertEqual(format.sort_key(""), ())
self.assertEqual(format.sort_key("Test Text"), ("test", "text"))
self.assertEqual(format.sort_key("Tést Téxt"), ("tést", "téxt"))
self.assertEqual(format.sort_key("A Test"), ("test",))
self.assertEqual(format.sort_key("A"), ("a",))
self.assertEqual(format.sort_key("An Extra Test"), ("extra", "test",))
self.assertEqual(format.sort_key("An"), ("an",))
self.assertEqual(format.sort_key("The Test"), ("test",))
self.assertEqual(format.sort_key("The"), ("the",))
self.assertEqual(format.sort_key("Test The"), ("test", "the"))
self.assertEqual(format.sort_key("Test-Text"), ("test", "text"))
self.assertEqual(format.sort_key("Test_Text"), ("test", "text"))
self.assertEqual(format.sort_key("\"Test\" Text"), ("test", "text"))
self.assertEqual(format.sort_key("“Test” Text"), ("test", "text"))
self.assertEqual(format.sort_key("'Test' Text"), ("test", "text"))
self.assertEqual(format.sort_key("Fish N Chips"),
("fish", "n", "chips"))
self.assertEqual(format.sort_key("ac/dc"), ("ac", "dc"))
self.assertEqual(format.sort_key("/a/B/c/D"),
("a", "b", "c", "d"))