initial commit
This commit is contained in:
parent
074c403538
commit
946aa5ea01
43
pyproject.toml
Normal file
43
pyproject.toml
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
[tool.poetry]
|
||||||
|
name = "random-sets"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "A small library of helper classes for dealing with random data using weighted distributions"
|
||||||
|
authors = ["evilchili <evilchili@gmail.com>"]
|
||||||
|
readme = "README.md"
|
||||||
|
packages = [
|
||||||
|
{ include = 'random_sets' },
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.10"
|
||||||
|
dice = "^4.0.0"
|
||||||
|
|
||||||
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
pytest = "^7.4.3"
|
||||||
|
black = "^23.3.0"
|
||||||
|
isort = "^5.12.0"
|
||||||
|
pyproject-autoflake = "^1.0.2"
|
||||||
|
|
||||||
|
[tool.black]
|
||||||
|
line-length = 120
|
||||||
|
target-version = ['py310']
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
multi_line_output = 3
|
||||||
|
line_length = 120
|
||||||
|
include_trailing_comma = true
|
||||||
|
|
||||||
|
[tool.autoflake]
|
||||||
|
check = false # return error code if changes are needed
|
||||||
|
in-place = true # make changes to files instead of printing diffs
|
||||||
|
recursive = true # drill down directories recursively
|
||||||
|
remove-all-unused-imports = true # remove all unused imports (not just those from the standard library)
|
||||||
|
ignore-init-module-imports = true # exclude __init__.py when removing unused imports
|
||||||
|
remove-duplicate-keys = true # remove all duplicate keys in objects
|
||||||
|
remove-unused-variables = true # remove unused variables
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
0
random_sets/__init__.py
Normal file
0
random_sets/__init__.py
Normal file
147
random_sets/datasources.py
Normal file
147
random_sets/datasources.py
Normal file
|
@ -0,0 +1,147 @@
|
||||||
|
import random
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from typing import IO
|
||||||
|
|
||||||
|
|
||||||
|
class DataSource:
|
||||||
|
"""
|
||||||
|
Represents a yaml data source used to generate roll tables.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
|
||||||
|
source - the IO source to parse
|
||||||
|
frequency - the frequency distribution to apply
|
||||||
|
headers - an array of header strings
|
||||||
|
data - The parsed YAML data
|
||||||
|
|
||||||
|
Methods:
|
||||||
|
|
||||||
|
load_source - Read and parse the source, populating the attributes
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, source: IO, frequency: str = 'default') -> None:
|
||||||
|
"""
|
||||||
|
Initialize a DataSource instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source - an IO object to read source from
|
||||||
|
frequency - the name of the frequency distribution to use; must
|
||||||
|
be defined in the source file's metadata.
|
||||||
|
"""
|
||||||
|
self.source = source
|
||||||
|
self.frequency = frequency
|
||||||
|
self.headers = []
|
||||||
|
self.frequencies = None
|
||||||
|
self.data = None
|
||||||
|
self.metadata = None
|
||||||
|
self.load_source()
|
||||||
|
|
||||||
|
def load_source(self) -> None:
|
||||||
|
"""
|
||||||
|
Cache the yaml source and the parsed or generated metadata.
|
||||||
|
"""
|
||||||
|
if self.data:
|
||||||
|
return
|
||||||
|
self.read_source()
|
||||||
|
self.init_headers()
|
||||||
|
self.init_frequencies()
|
||||||
|
|
||||||
|
def read_source(self) -> None:
|
||||||
|
self.data = yaml.safe_load(self.source)
|
||||||
|
self.metadata = self.data.pop('metadata', {})
|
||||||
|
|
||||||
|
def init_headers(self) -> None:
|
||||||
|
if 'headers' in self.metadata:
|
||||||
|
self.headers = self.metadata['headers']
|
||||||
|
|
||||||
|
def init_frequencies(self) -> None:
|
||||||
|
num_keys = len(self.data.keys())
|
||||||
|
default_freq = num_keys / 100
|
||||||
|
|
||||||
|
frequencies = {
|
||||||
|
'default': dict([(k, default_freq) for k in self.data.keys()])
|
||||||
|
}
|
||||||
|
if 'frequencies' in self.metadata:
|
||||||
|
frequencies.update(**self.metadata['frequencies'])
|
||||||
|
self.frequencies = frequencies[self.frequency]
|
||||||
|
|
||||||
|
def random_frequencies(self, count: int = 1) -> list:
|
||||||
|
"""
|
||||||
|
Choose random option names from the frequency table.
|
||||||
|
"""
|
||||||
|
weights = []
|
||||||
|
options = []
|
||||||
|
for (option, weight) in self.frequencies.items():
|
||||||
|
weights.append(weight)
|
||||||
|
options.append(option)
|
||||||
|
return random.choices(options, weights=weights, k=count)
|
||||||
|
|
||||||
|
def random_values(self, count: int = 1) -> list:
|
||||||
|
"""
|
||||||
|
Return a list of random values from the data set, as a list of lists.
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
self.get_entries(option, rand=True) for option in self.random_frequencies(count)
|
||||||
|
]
|
||||||
|
|
||||||
|
def as_dict(self) -> dict:
|
||||||
|
"""
|
||||||
|
Return the contents of the data source as a dict.
|
||||||
|
"""
|
||||||
|
data = dict()
|
||||||
|
for name in self.data.keys():
|
||||||
|
entries = self.get_entries(name, rand=False)
|
||||||
|
items = {(k, v) for k, v in zip(self.headers, entries)}
|
||||||
|
data[name] = dict(items)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def get_entries(self, option, rand: bool = False) -> list:
|
||||||
|
"""
|
||||||
|
For a random item or each item in the specified option in the data source,
|
||||||
|
return a flattened list of the option, the select item, and the item's value (if any).
|
||||||
|
"""
|
||||||
|
|
||||||
|
# If there is no data for the specified option, stop now.
|
||||||
|
flattened = [option]
|
||||||
|
if not self.data[option]:
|
||||||
|
return flattened
|
||||||
|
|
||||||
|
if hasattr(self.data[option], 'keys'):
|
||||||
|
# if the option is a dict, we assume the values are lists; we select a random item
|
||||||
|
# and prepend the key to the value list as our random selection. For example, given:
|
||||||
|
#
|
||||||
|
# >>> self.data[option] == {'One': ['bar', 'baz'], 'Two': ['qaz', 'qux']}
|
||||||
|
#
|
||||||
|
# choices might then be: ['One', 'bar', 'baz']
|
||||||
|
#
|
||||||
|
if rand:
|
||||||
|
k, v = random.choice(list(self.data[option].items()))
|
||||||
|
choices = [[k] + v]
|
||||||
|
else:
|
||||||
|
choices = [
|
||||||
|
[k] + v for k, v in list(self.data[option].items())
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
# If the option is either a list or a string, just select it.
|
||||||
|
choices = self.data[option]
|
||||||
|
|
||||||
|
for choice in choices:
|
||||||
|
# If the randomly-selected choice is a dict, choose a random item and return a list consisting
|
||||||
|
# of the option name, the key, and the value, flattening the # value if it is also a list.
|
||||||
|
if hasattr(choice, 'keys'):
|
||||||
|
for (k, v) in choice.items():
|
||||||
|
if type(v) is list:
|
||||||
|
flattened.extend([k, *v])
|
||||||
|
else:
|
||||||
|
flattened.extend([k, v])
|
||||||
|
continue
|
||||||
|
|
||||||
|
# if the member is a list, return the flattened list
|
||||||
|
if type(choice) is list:
|
||||||
|
flattened.extend(choice)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# otherwise, return a list consisting of option and choice
|
||||||
|
flattened.append(choice)
|
||||||
|
return flattened
|
52
random_sets/sets.py
Normal file
52
random_sets/sets.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
import random
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from random_sets.datasources import DataSource
|
||||||
|
|
||||||
|
|
||||||
|
class WeightedSet:
|
||||||
|
"""
|
||||||
|
A set in which members each have a weight, used for selecting at random.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
>>> ws = WeightedSet(('foo', 1.0), ('bar', 0.5))
|
||||||
|
>>> ws.random()
|
||||||
|
('foo', 1.0)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *weighted_members: tuple):
|
||||||
|
self.members = []
|
||||||
|
self.weights = []
|
||||||
|
if weighted_members:
|
||||||
|
self.members, self.weights = list(zip(*weighted_members))
|
||||||
|
|
||||||
|
def random(self) -> str:
|
||||||
|
return random.choices(self.members, self.weights)[0]
|
||||||
|
|
||||||
|
def __add__(self, obj):
|
||||||
|
ws = WeightedSet()
|
||||||
|
ws.members = self.members + obj.members
|
||||||
|
ws.weights = self.weights + obj.weights
|
||||||
|
return ws
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"{self.members}\n{self.weights}"
|
||||||
|
|
||||||
|
|
||||||
|
class DataSourceSet(WeightedSet):
|
||||||
|
|
||||||
|
def __init__(self, source: Path):
|
||||||
|
self.source = DataSource(source.read_text())
|
||||||
|
super().__init__(*[(key, value) for key, value in self.source.frequencies.items()])
|
||||||
|
|
||||||
|
def random(self):
|
||||||
|
random_key = super().random()
|
||||||
|
return self.source.as_dict()[random_key]
|
||||||
|
|
||||||
|
|
||||||
|
def equal_weights(terms: list, weight: float = 1.0, blank: bool = True) -> WeightedSet:
|
||||||
|
ws = WeightedSet(*[(term, weight) for term in terms])
|
||||||
|
if blank:
|
||||||
|
ws = WeightedSet(("", 1.0)) + ws
|
||||||
|
return ws
|
Loading…
Reference in New Issue
Block a user