emmental/emmental/format.py
Anna Schumaker ec5c4ddd2c format: Add a function for formatting sort keys
This function casefolds the input string and makes a series of
substitutions before splitting the string into a tuple of strings that
can be compared against.

Signed-off-by: Anna Schumaker <Anna@NoWheyCreamery.com>
2023-04-12 10:42:14 -04:00

36 lines
900 B
Python

# Copyright 2022 (c) Anna Schumaker
"""Helper functions for formatting strings."""
import re
IGNORE_WORDS = set(["a", "an", "the", ""])
def search(input: str) -> str | None:
"""Translate the input string into a sqlite3 GLOB statement."""
input = input.strip().casefold()
if len(input) == 0:
return None
if input[0] == "^":
input = input[1:] if len(input) > 1 else "*"
elif input[0] != "*":
input = "*" + input
if input[-1] == "$":
input = input[:-1]
elif input[-1] != "*":
input += "*"
return input
def sort_key(input: str) -> tuple:
"""Translate the input string into a sort key."""
if len(input) == 0:
return ()
input = re.sub(r"[\"\'’“”]", "", input.casefold())
res = re.split(r"[ /_-]", input)
if len(res) > 1 and res[0] in IGNORE_WORDS:
res = res[1:]
return tuple(res)