emmental/curds/sort.py

32 lines
837 B
Python

# Copyright 2019 (c) Anna Schumaker.
import unicodedata
import os
def bisect(lst, lhs, *args):
begin = 0
end = len(lst)
while end - begin > 0:
pos = (end + begin) // 2
if lhs == (rhs := lst[pos].sort_key(*args)):
return (pos, lst[pos])
elif lhs < rhs:
end = pos
else:
begin = pos + 1
return (begin, None)
def key(text):
if os.path.exists(text):
return text.strip("/").split("/")
words = normalize(text).lower().split()
words = [ ''.join(filter(str.isalnum, w)) for w in words ]
words = [ w for w in words if w != "" ]
if words[0] in [ "a", "the" ]:
return words[1:]
return words
def normalize(text):
decode = unicodedata.normalize("NFD", text)
return decode.encode("ascii", "ignore").decode("utf8")