initial commit

This commit is contained in:
evilchili 2023-12-23 15:34:32 -08:00
parent 074c403538
commit 946aa5ea01
4 changed files with 242 additions and 0 deletions

43
pyproject.toml Normal file
View File

@ -0,0 +1,43 @@
[tool.poetry]
name = "random-sets"
version = "0.1.0"
description = "A small library of helper classes for dealing with random data using weighted distributions"
authors = ["evilchili <evilchili@gmail.com>"]
readme = "README.md"
packages = [
{ include = 'random_sets' },
]
[tool.poetry.dependencies]
python = "^3.10"
dice = "^4.0.0"
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.3"
black = "^23.3.0"
isort = "^5.12.0"
pyproject-autoflake = "^1.0.2"
[tool.black]
line-length = 120
target-version = ['py310']
[tool.isort]
multi_line_output = 3
line_length = 120
include_trailing_comma = true
[tool.autoflake]
check = false # return error code if changes are needed
in-place = true # make changes to files instead of printing diffs
recursive = true # drill down directories recursively
remove-all-unused-imports = true # remove all unused imports (not just those from the standard library)
ignore-init-module-imports = true # exclude __init__.py when removing unused imports
remove-duplicate-keys = true # remove all duplicate keys in objects
remove-unused-variables = true # remove unused variables
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

0
random_sets/__init__.py Normal file
View File

147
random_sets/datasources.py Normal file
View File

@ -0,0 +1,147 @@
import random
import yaml
from typing import IO
class DataSource:
"""
Represents a yaml data source used to generate roll tables.
Attributes:
source - the IO source to parse
frequency - the frequency distribution to apply
headers - an array of header strings
data - The parsed YAML data
Methods:
load_source - Read and parse the source, populating the attributes
"""
def __init__(self, source: IO, frequency: str = 'default') -> None:
"""
Initialize a DataSource instance.
Args:
source - an IO object to read source from
frequency - the name of the frequency distribution to use; must
be defined in the source file's metadata.
"""
self.source = source
self.frequency = frequency
self.headers = []
self.frequencies = None
self.data = None
self.metadata = None
self.load_source()
def load_source(self) -> None:
"""
Cache the yaml source and the parsed or generated metadata.
"""
if self.data:
return
self.read_source()
self.init_headers()
self.init_frequencies()
def read_source(self) -> None:
self.data = yaml.safe_load(self.source)
self.metadata = self.data.pop('metadata', {})
def init_headers(self) -> None:
if 'headers' in self.metadata:
self.headers = self.metadata['headers']
def init_frequencies(self) -> None:
num_keys = len(self.data.keys())
default_freq = num_keys / 100
frequencies = {
'default': dict([(k, default_freq) for k in self.data.keys()])
}
if 'frequencies' in self.metadata:
frequencies.update(**self.metadata['frequencies'])
self.frequencies = frequencies[self.frequency]
def random_frequencies(self, count: int = 1) -> list:
"""
Choose random option names from the frequency table.
"""
weights = []
options = []
for (option, weight) in self.frequencies.items():
weights.append(weight)
options.append(option)
return random.choices(options, weights=weights, k=count)
def random_values(self, count: int = 1) -> list:
"""
Return a list of random values from the data set, as a list of lists.
"""
return [
self.get_entries(option, rand=True) for option in self.random_frequencies(count)
]
def as_dict(self) -> dict:
"""
Return the contents of the data source as a dict.
"""
data = dict()
for name in self.data.keys():
entries = self.get_entries(name, rand=False)
items = {(k, v) for k, v in zip(self.headers, entries)}
data[name] = dict(items)
return data
def get_entries(self, option, rand: bool = False) -> list:
"""
For a random item or each item in the specified option in the data source,
return a flattened list of the option, the select item, and the item's value (if any).
"""
# If there is no data for the specified option, stop now.
flattened = [option]
if not self.data[option]:
return flattened
if hasattr(self.data[option], 'keys'):
# if the option is a dict, we assume the values are lists; we select a random item
# and prepend the key to the value list as our random selection. For example, given:
#
# >>> self.data[option] == {'One': ['bar', 'baz'], 'Two': ['qaz', 'qux']}
#
# choices might then be: ['One', 'bar', 'baz']
#
if rand:
k, v = random.choice(list(self.data[option].items()))
choices = [[k] + v]
else:
choices = [
[k] + v for k, v in list(self.data[option].items())
]
else:
# If the option is either a list or a string, just select it.
choices = self.data[option]
for choice in choices:
# If the randomly-selected choice is a dict, choose a random item and return a list consisting
# of the option name, the key, and the value, flattening the # value if it is also a list.
if hasattr(choice, 'keys'):
for (k, v) in choice.items():
if type(v) is list:
flattened.extend([k, *v])
else:
flattened.extend([k, v])
continue
# if the member is a list, return the flattened list
if type(choice) is list:
flattened.extend(choice)
continue
# otherwise, return a list consisting of option and choice
flattened.append(choice)
return flattened

52
random_sets/sets.py Normal file
View File

@ -0,0 +1,52 @@
import random
from pathlib import Path
from random_sets.datasources import DataSource
class WeightedSet:
"""
A set in which members each have a weight, used for selecting at random.
Usage:
>>> ws = WeightedSet(('foo', 1.0), ('bar', 0.5))
>>> ws.random()
('foo', 1.0)
"""
def __init__(self, *weighted_members: tuple):
self.members = []
self.weights = []
if weighted_members:
self.members, self.weights = list(zip(*weighted_members))
def random(self) -> str:
return random.choices(self.members, self.weights)[0]
def __add__(self, obj):
ws = WeightedSet()
ws.members = self.members + obj.members
ws.weights = self.weights + obj.weights
return ws
def __str__(self):
return f"{self.members}\n{self.weights}"
class DataSourceSet(WeightedSet):
def __init__(self, source: Path):
self.source = DataSource(source.read_text())
super().__init__(*[(key, value) for key, value in self.source.frequencies.items()])
def random(self):
random_key = super().random()
return self.source.as_dict()[random_key]
def equal_weights(terms: list, weight: float = 1.0, blank: bool = True) -> WeightedSet:
ws = WeightedSet(*[(term, weight) for term in terms])
if blank:
ws = WeightedSet(("", 1.0)) + ws
return ws