462 lines
15 KiB
Python
462 lines
15 KiB
Python
|
import inspect
|
||
|
import random
|
||
|
from collections import defaultdict
|
||
|
from typing import Union
|
||
|
|
||
|
|
||
|
class LanguageError(Exception):
|
||
|
"""
|
||
|
Thrown when an error is encountered in language construction.
|
||
|
"""
|
||
|
|
||
|
|
||
|
class ImprobableTemplateError(Exception):
|
||
|
"""
|
||
|
Thrown when too many successive attempts to create a word which passes all
|
||
|
language rules fails.
|
||
|
"""
|
||
|
|
||
|
|
||
|
class WeightedSet:
|
||
|
"""
|
||
|
A set in which members each have a weight, used for selecting at random.
|
||
|
|
||
|
Usage:
|
||
|
>>> ws = WeightedSet(('foo', 1.0), ('bar', 0.5))
|
||
|
>>> ws.random()
|
||
|
('foo', 1.0)
|
||
|
"""
|
||
|
|
||
|
def __init__(self, *weighted_members: tuple):
|
||
|
self.members = []
|
||
|
self.weights = []
|
||
|
if weighted_members:
|
||
|
self.members, self.weights = list(zip(*weighted_members))
|
||
|
|
||
|
def random(self) -> str:
|
||
|
return random.choices(self.members, self.weights)[0]
|
||
|
|
||
|
def __add__(self, obj):
|
||
|
ws = WeightedSet()
|
||
|
ws.members = self.members + obj.members
|
||
|
ws.weights = self.weights + obj.weights
|
||
|
return ws
|
||
|
|
||
|
def __str__(self):
|
||
|
return f"{self.members}\n{self.weights}"
|
||
|
|
||
|
|
||
|
class Syllable:
|
||
|
"""
|
||
|
One syllable of a word. Used to populate a SyllableSet.
|
||
|
|
||
|
A syllable template is a string consisting of one or more grapheme types
|
||
|
separated by a vertical pipe (|). Multiple template strings can be
|
||
|
concatenated together with commas. When words are constructed, each
|
||
|
syllable is populated with a random sequence chosen by
|
||
|
Language.add_grapheme().
|
||
|
|
||
|
A syllable template must contain at least one 'vowel'.
|
||
|
|
||
|
Syllables can be multiplied by integers to produce repeated templates.
|
||
|
|
||
|
Usage:
|
||
|
# A syllable consisting of either a vowel or a consonant, followed by
|
||
|
# a vowel, followed by either a vowel or consonant.
|
||
|
>>> foo = Syllable(template='vowel|consonant,vowel,consonant|vowel')
|
||
|
|
||
|
# Example multiplication
|
||
|
>>> print(Syllable(template='vowel|consonant') * 3)
|
||
|
vowel|consonant vowel|consonant vowel|consonant
|
||
|
"""
|
||
|
|
||
|
def __init__(self, template: str = "vowel|consonant"):
|
||
|
self.template = template
|
||
|
self.validate()
|
||
|
|
||
|
def validate(self):
|
||
|
if "vowel" not in self.template:
|
||
|
raise LanguageError(
|
||
|
f"Invalid syllable template {self.template}!\n"
|
||
|
"Syllables must have at least one vowel in the template."
|
||
|
)
|
||
|
|
||
|
def __mul__(self, count: int):
|
||
|
return Syllable(template=",".join([self.template] * count))
|
||
|
|
||
|
__rmul__ = __mul__
|
||
|
|
||
|
def __str__(self):
|
||
|
return self.template
|
||
|
|
||
|
|
||
|
class SyllableSet(WeightedSet):
|
||
|
"""
|
||
|
A WeightedSet that selects random syllables.
|
||
|
|
||
|
Usage:
|
||
|
>>> word = SyllableSet(
|
||
|
(Syllable('vowel'), 1.0),
|
||
|
(Syllable('vowel|consonant') * 2, 1.0),
|
||
|
(syllable('vowel|consonant,vowel|consonant') * 3, 0.75)
|
||
|
)
|
||
|
>>> word.random()
|
||
|
vowel consonant consonant vowel
|
||
|
"""
|
||
|
|
||
|
def random(self) -> iter:
|
||
|
for syllable in random.choices(self.members, self.weights)[0].template.split(","):
|
||
|
grapheme_template = random.choice(syllable.split("|"))
|
||
|
yield grapheme_template.lower()
|
||
|
|
||
|
|
||
|
class Language:
|
||
|
"""
|
||
|
A class representing a language.
|
||
|
|
||
|
Usage:
|
||
|
>>> Common = Language(
|
||
|
name="common",
|
||
|
vowels=WeightedSet(("a", 1.0), ("e", 1.0), ("i", 1.0), ...),
|
||
|
consonants=WeightedSet(("b", 0.5), ("c", 0.5), ("d", 0.5), ...),
|
||
|
prefixes=WeightedSet(("re", 0.5), ("de", 0.5), ("", 1.0), ...),
|
||
|
suffixes=WeightedSet(("ed", 0.5), ("ing", 0.5), ("", 1.0), ...),
|
||
|
syllables=SyllableSet(
|
||
|
(Syllable('consonant|vowel'), 1.0),
|
||
|
(Syllable('consonant|vowel') * 2, 0.75),
|
||
|
...
|
||
|
),
|
||
|
rules=set(callable1, callable2, ...),
|
||
|
minimum_grapheme_count=2,
|
||
|
)
|
||
|
>>> Common.word()
|
||
|
reibing
|
||
|
|
||
|
How Words Are Constructed:
|
||
|
|
||
|
The main interface for callers is word(), which returns a
|
||
|
randomly-generated word in the language according to the following
|
||
|
algorithm:
|
||
|
|
||
|
1. Choose a random syllable from the syllable set
|
||
|
2. For each grapheme in the syllable
|
||
|
3. Choose a random grapheme template
|
||
|
4. Choose a random sequence from the language for that grapheme
|
||
|
5. Validate the word against the language rules
|
||
|
6. Repeat 1-5 until a valid word is generated
|
||
|
7. Add a prefix and suffix, if they are defined
|
||
|
|
||
|
The following graphemes are supported by default:
|
||
|
- vowel
|
||
|
- consonant
|
||
|
- prefix
|
||
|
- suffix
|
||
|
|
||
|
When graphemes are chosen, the following rules are applied:
|
||
|
- Every syllable must have at least one vowel
|
||
|
- A syllable may never have three consecutive consonants
|
||
|
|
||
|
How Words Are Validated:
|
||
|
|
||
|
Once a word has been constructed by populating syllable templates, it is
|
||
|
tested against one or more language rules.
|
||
|
|
||
|
The default rules are defined in language.rules.default_rules; they are:
|
||
|
- the word must contain at least one vowel
|
||
|
- the word must not contain 3 or more contiguous english vowels
|
||
|
- the word must not contain 3 or more contiguous english consonants
|
||
|
- the word must not consist of just one vowel, repeated
|
||
|
|
||
|
Since it is possible to craft Syllables resulting in grapheme
|
||
|
selections that rarely or never yield valid words, or rules that
|
||
|
reject every word, an ImprobableTemplateError will be thrown if
|
||
|
10 successive attempts to create a valid word fail.
|
||
|
|
||
|
Extending Languages:
|
||
|
|
||
|
Graphemes are populated by means of callbacks which select a member
|
||
|
of the associated weighted set at random. Graphemes can be any string,
|
||
|
so long as the Language class has a matching callback.
|
||
|
|
||
|
To add support for a new grapheme type, define a method on your
|
||
|
Language class called get_grapheme_TYPE, where TYPE is the string
|
||
|
used in your Syllable templates. Examine test cases in test_types.py
|
||
|
for examples.
|
||
|
"""
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
name: str,
|
||
|
vowels: WeightedSet,
|
||
|
consonants: WeightedSet,
|
||
|
prefixes: WeightedSet,
|
||
|
suffixes: WeightedSet,
|
||
|
syllables: SyllableSet,
|
||
|
rules: set = set(),
|
||
|
minimum_grapheme_count: int = 1,
|
||
|
):
|
||
|
"""
|
||
|
Args:
|
||
|
name - friendly name for the language
|
||
|
vowels - the weighted set of vowel graphemes
|
||
|
consonants - the weighted set of consonant graphemes
|
||
|
prefixes - the weighted set of prefix graphemes
|
||
|
suffixes - the weighted set of suffix graphemes
|
||
|
rules - a set of rules callbacks; see above.
|
||
|
minimum_grapheme_count - the minimum number of graphemes in each word
|
||
|
"""
|
||
|
self.name = name
|
||
|
self.vowels = vowels
|
||
|
self.consonants = consonants
|
||
|
self.prefixes = prefixes
|
||
|
self.suffixes = suffixes
|
||
|
self.rules = rules
|
||
|
self.syllables = syllables
|
||
|
self.minimum_grapheme_count = minimum_grapheme_count
|
||
|
self.validate_syllable_set()
|
||
|
|
||
|
self.handlers = dict([(n, v) for (n, v) in inspect.getmembers(self, inspect.ismethod) if n.startswith("get_")])
|
||
|
|
||
|
def validate(self, word: str) -> bool:
|
||
|
"""
|
||
|
Returns true if the given word is possible in the current language.
|
||
|
"""
|
||
|
if not word:
|
||
|
return False
|
||
|
for rule in self.rules:
|
||
|
if not rule(self, word):
|
||
|
return False
|
||
|
return True
|
||
|
|
||
|
def validate_syllable_set(self):
|
||
|
for syllable in self.syllables.members:
|
||
|
if len(syllable.template.split(",")) < self.minimum_grapheme_count:
|
||
|
raise ImprobableTemplateError(
|
||
|
f"Syllable {syllable} does not define enough graphemes ({self.minimum_grapheme_count} required)."
|
||
|
)
|
||
|
|
||
|
def validate_graphemes(self, graphemes: list) -> bool:
|
||
|
if len(graphemes) < self.minimum_grapheme_count:
|
||
|
return False
|
||
|
|
||
|
last = ""
|
||
|
count = 0
|
||
|
for g in graphemes:
|
||
|
if g == last:
|
||
|
count += 1
|
||
|
if count == 3:
|
||
|
return False
|
||
|
else:
|
||
|
count = 1
|
||
|
last = g
|
||
|
return True
|
||
|
|
||
|
def word(self, count: int = 1) -> list:
|
||
|
"""
|
||
|
Yields words composed of randomized phonemes built from a random word template.
|
||
|
"""
|
||
|
words = []
|
||
|
for _ in range(count):
|
||
|
random_word = ""
|
||
|
attempts = 0
|
||
|
while not self.validate(random_word):
|
||
|
if attempts == 10:
|
||
|
raise ImprobableTemplateError(
|
||
|
f"Exhausted all attempts to create a valid word. Last attempt: {random_word}. "
|
||
|
"If you're getting this a lot, try enabling debugging to see what rules are failing."
|
||
|
)
|
||
|
graphemes = []
|
||
|
random_word = ""
|
||
|
while not self.validate_graphemes(graphemes):
|
||
|
graphemes = list(self.syllables.random())
|
||
|
for grapheme in graphemes:
|
||
|
random_word = self.add_grapheme(random_word, grapheme)
|
||
|
attempts += 1
|
||
|
if self.prefixes:
|
||
|
random_word = self.get_grapheme_prefix() + random_word
|
||
|
if self.suffixes:
|
||
|
random_word = random_word + self.get_grapheme_suffix()
|
||
|
words.append(random_word)
|
||
|
return words
|
||
|
|
||
|
def add_grapheme(self, word: str, template: str) -> str:
|
||
|
"""
|
||
|
Returns a random grapheme of a supported type. The class must support a method of the name:
|
||
|
get_grapheme_{template}
|
||
|
"""
|
||
|
template = template.lower()
|
||
|
try:
|
||
|
return word + self.handlers[f"get_grapheme_{template}"]()
|
||
|
except KeyError:
|
||
|
raise NotImplementedError(
|
||
|
f"No handler found for grapheme template '{template}'. "
|
||
|
f"Do you need to define get_grapheme_{template}()?\n"
|
||
|
"Supported handlers: " + self.handlers.keys
|
||
|
)
|
||
|
|
||
|
def get_grapheme_consonant(self) -> str:
|
||
|
return self.consonants.random()
|
||
|
|
||
|
def get_grapheme_vowel(self) -> str:
|
||
|
return self.vowels.random()
|
||
|
|
||
|
def get_grapheme_prefix(self) -> str:
|
||
|
return self.prefixes.random()
|
||
|
|
||
|
def get_grapheme_suffix(self) -> str:
|
||
|
return self.suffixes.random()
|
||
|
|
||
|
def text(self, count: int = 25) -> str:
|
||
|
phrases = []
|
||
|
phrase = []
|
||
|
for word in self.word(count):
|
||
|
phrase.append(str(word))
|
||
|
if len(phrase) >= random.randint(1, 12):
|
||
|
phrases.append(" ".join(phrase))
|
||
|
phrase = []
|
||
|
if phrase:
|
||
|
phrases.append(" ".join(phrase))
|
||
|
|
||
|
paragraph = phrases[0].capitalize()
|
||
|
for phrase in phrases[1:]:
|
||
|
if random.choice([0, 0, 1]):
|
||
|
paragraph = paragraph + random.choice("?!.") + " " + phrase.capitalize()
|
||
|
else:
|
||
|
paragraph = paragraph + ", " + phrase
|
||
|
paragraph = paragraph + random.choice("?!.")
|
||
|
return paragraph
|
||
|
|
||
|
def copy(self):
|
||
|
return self.__class__(
|
||
|
name=self.name,
|
||
|
vowels=self.vowels,
|
||
|
consonants=self.consonants,
|
||
|
prefixes=self.prefixes,
|
||
|
suffixes=self.suffixes,
|
||
|
rules=self.rules,
|
||
|
syllables=self.syllables,
|
||
|
minimum_grapheme_count=self.minimum_grapheme_count,
|
||
|
)
|
||
|
|
||
|
def __str__(self) -> str:
|
||
|
return self.word()[0]
|
||
|
|
||
|
|
||
|
NameSet = SyllableSet
|
||
|
|
||
|
|
||
|
class Name(defaultdict):
|
||
|
def __str__(self):
|
||
|
return self["fullname"][0]
|
||
|
|
||
|
|
||
|
class NameTemplate(Syllable):
|
||
|
def validate(self):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class NameGenerator:
|
||
|
def __init__(
|
||
|
self,
|
||
|
language: Language,
|
||
|
templates: NameSet,
|
||
|
syllables: Union[SyllableSet, None] = None,
|
||
|
names: Union[WeightedSet, None] = None,
|
||
|
surnames: Union[WeightedSet, None] = None,
|
||
|
nicknames: Union[WeightedSet, None] = None,
|
||
|
adjectives: Union[WeightedSet, None] = None,
|
||
|
titles: Union[WeightedSet, None] = None,
|
||
|
counts: Union[WeightedSet, None] = None,
|
||
|
affixes: Union[WeightedSet, None] = None,
|
||
|
suffixes: Union[WeightedSet, None] = None,
|
||
|
):
|
||
|
self.language = language.copy()
|
||
|
if syllables:
|
||
|
self.language.syllables = syllables
|
||
|
self.templates = templates
|
||
|
self._names = names
|
||
|
self._surnames = surnames
|
||
|
self._nicknames = nicknames
|
||
|
self._adjectives = adjectives
|
||
|
self._titles = titles
|
||
|
self._counts = counts
|
||
|
self._suffixes = suffixes
|
||
|
self._affixes = affixes
|
||
|
|
||
|
self.handlers = dict([(n, v) for (n, v) in inspect.getmembers(self, inspect.ismethod) if n.startswith("get_")])
|
||
|
|
||
|
def name(self, count: int = 1) -> list:
|
||
|
"""
|
||
|
Generate Name instances.
|
||
|
"""
|
||
|
names = []
|
||
|
for _ in range(count):
|
||
|
name = Name(list)
|
||
|
fullname = []
|
||
|
for part in self.templates.random():
|
||
|
thisname = self.add_part(part).strip()
|
||
|
if not thisname:
|
||
|
continue
|
||
|
name[part].append(thisname)
|
||
|
fullname.append(thisname)
|
||
|
name["fullname"] = " ".join(fullname)
|
||
|
names.append(name)
|
||
|
return names
|
||
|
|
||
|
def add_part(self, template: str) -> str:
|
||
|
template = template.lower()
|
||
|
try:
|
||
|
return self.handlers[f"get_{template}"]()
|
||
|
except KeyError:
|
||
|
raise NotImplementedError(
|
||
|
f"No handler found for name template '{template}' on class {self.__class__.__name__}. "
|
||
|
f"Do you need to define get_{template}()?\nSupported Handlers: "
|
||
|
+ ",".join(n for n in dir(self) if n.startswith("get_"))
|
||
|
)
|
||
|
|
||
|
def get_name(self) -> str:
|
||
|
name = (self._names.random() if self._names else self.language.word())[0]
|
||
|
return name.title()
|
||
|
|
||
|
def get_surname(self) -> str:
|
||
|
name = (self._surnames.random() if self._surnames else self.language.word())[0]
|
||
|
if self._suffixes:
|
||
|
name = name + self._suffixes.random()
|
||
|
if len(name) == 1:
|
||
|
name = f"{name}."
|
||
|
return name.title()
|
||
|
|
||
|
def get_adjective(self) -> str:
|
||
|
return (self._adjectives.random() if self._adjectives else "").title()
|
||
|
|
||
|
def get_affix(self) -> str:
|
||
|
return self._affixes.random() if self._affixes else ""
|
||
|
|
||
|
def get_title(self) -> str:
|
||
|
return (self._titles.random() if self._titles else "").title()
|
||
|
|
||
|
def get_the(self) -> str:
|
||
|
return "the"
|
||
|
|
||
|
def get_count(self) -> str:
|
||
|
return self._counts.random() if self._counts else ""
|
||
|
|
||
|
def get_nickname(self) -> str:
|
||
|
name = (self._nicknames.random() if self._nicknames else "").title()
|
||
|
if name:
|
||
|
return '"' + name + '"'
|
||
|
return ""
|
||
|
|
||
|
def get_initial(self) -> str:
|
||
|
return
|
||
|
|
||
|
def __str__(self) -> str:
|
||
|
return self.name()[0]["fullname"]
|
||
|
|
||
|
|
||
|
def equal_weights(terms: list, weight: float = 1.0, blank: bool = True) -> WeightedSet:
|
||
|
ws = WeightedSet(*[(term, weight) for term in terms])
|
||
|
if blank:
|
||
|
ws = WeightedSet(("", 1.0)) + ws
|
||
|
return ws
|