89 lines
1.4 KiB
Python
89 lines
1.4 KiB
Python
import logging
|
|
import re
|
|
|
|
from language.rules import default_rules
|
|
from language.types import Language
|
|
|
|
logger = logging.getLogger()
|
|
|
|
valid_consonant_sequences = [
|
|
"cc",
|
|
"ht",
|
|
"kd",
|
|
"kl",
|
|
"km",
|
|
"kp",
|
|
"kt",
|
|
"kv",
|
|
"kw",
|
|
"ky",
|
|
"lc",
|
|
"ld",
|
|
"lf",
|
|
"ll",
|
|
"lm",
|
|
"lp",
|
|
"lt",
|
|
"lv",
|
|
"lw",
|
|
"ly",
|
|
"mb",
|
|
"mm",
|
|
"mp",
|
|
"my",
|
|
"nc",
|
|
"nd",
|
|
"ng",
|
|
"nn",
|
|
"nt",
|
|
"nw",
|
|
"ny",
|
|
"ps",
|
|
"pt",
|
|
"rc",
|
|
"rd",
|
|
"rm",
|
|
"rn",
|
|
"rp",
|
|
"rr",
|
|
"rs",
|
|
"rt",
|
|
"rw",
|
|
"ry",
|
|
"sc",
|
|
"ss",
|
|
"ts",
|
|
"tt",
|
|
"th",
|
|
"tw",
|
|
"ty",
|
|
]
|
|
|
|
|
|
def valid_sequences(language: Language, word: str) -> bool:
|
|
found = re.compile(r"([bcdfghjklmnpqrstvwxz]{2})").findall(word)
|
|
if not found:
|
|
return True
|
|
|
|
invalid = [seq for seq in found if seq not in valid_consonant_sequences]
|
|
if invalid:
|
|
logger.debug(f"{word} contains invalid consonant sequences: {invalid}")
|
|
return False
|
|
return True
|
|
|
|
|
|
def too_many_vowels(language: Language, word: str) -> bool:
|
|
found = re.compile(r"[" + "".join(language.vowels.members) + r"]{3}").findall(word)
|
|
if found == []:
|
|
return True
|
|
logger.debug(f"{word} has too many contiguous vowels: {found}")
|
|
return False
|
|
|
|
|
|
rules = default_rules.union(
|
|
{
|
|
valid_sequences,
|
|
too_many_vowels,
|
|
}
|
|
)
|