format: Add a function for formatting sort keys
This function casefolds the input string and makes a series of substitutions before splitting the string into a tuple of strings that can be compared against. Signed-off-by: Anna Schumaker <Anna@NoWheyCreamery.com>
This commit is contained in:
parent
6cade5d779
commit
ec5c4ddd2c
|
@ -1,5 +1,8 @@
|
|||
# Copyright 2022 (c) Anna Schumaker
|
||||
"""Helper functions for formatting strings."""
|
||||
import re
|
||||
|
||||
IGNORE_WORDS = set(["a", "an", "the", ""])
|
||||
|
||||
|
||||
def search(input: str) -> str | None:
|
||||
|
@ -19,3 +22,14 @@ def search(input: str) -> str | None:
|
|||
input += "*"
|
||||
|
||||
return input
|
||||
|
||||
|
||||
def sort_key(input: str) -> tuple:
|
||||
"""Translate the input string into a sort key."""
|
||||
if len(input) == 0:
|
||||
return ()
|
||||
input = re.sub(r"[\"\'’“”]", "", input.casefold())
|
||||
res = re.split(r"[ /_-]", input)
|
||||
if len(res) > 1 and res[0] in IGNORE_WORDS:
|
||||
res = res[1:]
|
||||
return tuple(res)
|
||||
|
|
|
@ -18,3 +18,30 @@ class TestFormatter(unittest.TestCase):
|
|||
self.assertEqual(format.search("*Test$"), "*test")
|
||||
self.assertEqual(format.search("^"), "*")
|
||||
self.assertEqual(format.search("$"), "*")
|
||||
|
||||
def test_ignore_words(self):
|
||||
"""Test words that aren't included at the start of the sort key."""
|
||||
self.assertSetEqual(format.IGNORE_WORDS, set(["a", "an", "the", ""]))
|
||||
|
||||
def test_sort_key(self):
|
||||
"""Test translating a string into a sort key."""
|
||||
self.assertEqual(format.sort_key(""), ())
|
||||
self.assertEqual(format.sort_key("Test Text"), ("test", "text"))
|
||||
self.assertEqual(format.sort_key("Tést Téxt"), ("tést", "téxt"))
|
||||
self.assertEqual(format.sort_key("A Test"), ("test",))
|
||||
self.assertEqual(format.sort_key("A"), ("a",))
|
||||
self.assertEqual(format.sort_key("An Extra Test"), ("extra", "test",))
|
||||
self.assertEqual(format.sort_key("An"), ("an",))
|
||||
self.assertEqual(format.sort_key("The Test"), ("test",))
|
||||
self.assertEqual(format.sort_key("The"), ("the",))
|
||||
self.assertEqual(format.sort_key("Test The"), ("test", "the"))
|
||||
self.assertEqual(format.sort_key("Test-Text"), ("test", "text"))
|
||||
self.assertEqual(format.sort_key("Test_Text"), ("test", "text"))
|
||||
self.assertEqual(format.sort_key("\"Test\" Text"), ("test", "text"))
|
||||
self.assertEqual(format.sort_key("“Test” Text"), ("test", "text"))
|
||||
self.assertEqual(format.sort_key("'Test' Text"), ("test", "text"))
|
||||
self.assertEqual(format.sort_key("Fish N’ Chips"),
|
||||
("fish", "n", "chips"))
|
||||
self.assertEqual(format.sort_key("ac/dc"), ("ac", "dc"))
|
||||
self.assertEqual(format.sort_key("/a/B/c/D"),
|
||||
("a", "b", "c", "d"))
|
||||
|
|
Loading…
Reference in New Issue