initial commit
This commit is contained in:
parent
074c403538
commit
946aa5ea01
43
pyproject.toml
Normal file
43
pyproject.toml
Normal file
|
@ -0,0 +1,43 @@
|
|||
[tool.poetry]
|
||||
name = "random-sets"
|
||||
version = "0.1.0"
|
||||
description = "A small library of helper classes for dealing with random data using weighted distributions"
|
||||
authors = ["evilchili <evilchili@gmail.com>"]
|
||||
readme = "README.md"
|
||||
packages = [
|
||||
{ include = 'random_sets' },
|
||||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
dice = "^4.0.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.3"
|
||||
black = "^23.3.0"
|
||||
isort = "^5.12.0"
|
||||
pyproject-autoflake = "^1.0.2"
|
||||
|
||||
[tool.black]
|
||||
line-length = 120
|
||||
target-version = ['py310']
|
||||
|
||||
[tool.isort]
|
||||
multi_line_output = 3
|
||||
line_length = 120
|
||||
include_trailing_comma = true
|
||||
|
||||
[tool.autoflake]
|
||||
check = false # return error code if changes are needed
|
||||
in-place = true # make changes to files instead of printing diffs
|
||||
recursive = true # drill down directories recursively
|
||||
remove-all-unused-imports = true # remove all unused imports (not just those from the standard library)
|
||||
ignore-init-module-imports = true # exclude __init__.py when removing unused imports
|
||||
remove-duplicate-keys = true # remove all duplicate keys in objects
|
||||
remove-unused-variables = true # remove unused variables
|
||||
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
0
random_sets/__init__.py
Normal file
0
random_sets/__init__.py
Normal file
147
random_sets/datasources.py
Normal file
147
random_sets/datasources.py
Normal file
|
@ -0,0 +1,147 @@
|
|||
import random
|
||||
import yaml
|
||||
|
||||
from typing import IO
|
||||
|
||||
|
||||
class DataSource:
|
||||
"""
|
||||
Represents a yaml data source used to generate roll tables.
|
||||
|
||||
Attributes:
|
||||
|
||||
source - the IO source to parse
|
||||
frequency - the frequency distribution to apply
|
||||
headers - an array of header strings
|
||||
data - The parsed YAML data
|
||||
|
||||
Methods:
|
||||
|
||||
load_source - Read and parse the source, populating the attributes
|
||||
|
||||
"""
|
||||
def __init__(self, source: IO, frequency: str = 'default') -> None:
|
||||
"""
|
||||
Initialize a DataSource instance.
|
||||
|
||||
Args:
|
||||
source - an IO object to read source from
|
||||
frequency - the name of the frequency distribution to use; must
|
||||
be defined in the source file's metadata.
|
||||
"""
|
||||
self.source = source
|
||||
self.frequency = frequency
|
||||
self.headers = []
|
||||
self.frequencies = None
|
||||
self.data = None
|
||||
self.metadata = None
|
||||
self.load_source()
|
||||
|
||||
def load_source(self) -> None:
|
||||
"""
|
||||
Cache the yaml source and the parsed or generated metadata.
|
||||
"""
|
||||
if self.data:
|
||||
return
|
||||
self.read_source()
|
||||
self.init_headers()
|
||||
self.init_frequencies()
|
||||
|
||||
def read_source(self) -> None:
|
||||
self.data = yaml.safe_load(self.source)
|
||||
self.metadata = self.data.pop('metadata', {})
|
||||
|
||||
def init_headers(self) -> None:
|
||||
if 'headers' in self.metadata:
|
||||
self.headers = self.metadata['headers']
|
||||
|
||||
def init_frequencies(self) -> None:
|
||||
num_keys = len(self.data.keys())
|
||||
default_freq = num_keys / 100
|
||||
|
||||
frequencies = {
|
||||
'default': dict([(k, default_freq) for k in self.data.keys()])
|
||||
}
|
||||
if 'frequencies' in self.metadata:
|
||||
frequencies.update(**self.metadata['frequencies'])
|
||||
self.frequencies = frequencies[self.frequency]
|
||||
|
||||
def random_frequencies(self, count: int = 1) -> list:
|
||||
"""
|
||||
Choose random option names from the frequency table.
|
||||
"""
|
||||
weights = []
|
||||
options = []
|
||||
for (option, weight) in self.frequencies.items():
|
||||
weights.append(weight)
|
||||
options.append(option)
|
||||
return random.choices(options, weights=weights, k=count)
|
||||
|
||||
def random_values(self, count: int = 1) -> list:
|
||||
"""
|
||||
Return a list of random values from the data set, as a list of lists.
|
||||
"""
|
||||
return [
|
||||
self.get_entries(option, rand=True) for option in self.random_frequencies(count)
|
||||
]
|
||||
|
||||
def as_dict(self) -> dict:
|
||||
"""
|
||||
Return the contents of the data source as a dict.
|
||||
"""
|
||||
data = dict()
|
||||
for name in self.data.keys():
|
||||
entries = self.get_entries(name, rand=False)
|
||||
items = {(k, v) for k, v in zip(self.headers, entries)}
|
||||
data[name] = dict(items)
|
||||
return data
|
||||
|
||||
def get_entries(self, option, rand: bool = False) -> list:
|
||||
"""
|
||||
For a random item or each item in the specified option in the data source,
|
||||
return a flattened list of the option, the select item, and the item's value (if any).
|
||||
"""
|
||||
|
||||
# If there is no data for the specified option, stop now.
|
||||
flattened = [option]
|
||||
if not self.data[option]:
|
||||
return flattened
|
||||
|
||||
if hasattr(self.data[option], 'keys'):
|
||||
# if the option is a dict, we assume the values are lists; we select a random item
|
||||
# and prepend the key to the value list as our random selection. For example, given:
|
||||
#
|
||||
# >>> self.data[option] == {'One': ['bar', 'baz'], 'Two': ['qaz', 'qux']}
|
||||
#
|
||||
# choices might then be: ['One', 'bar', 'baz']
|
||||
#
|
||||
if rand:
|
||||
k, v = random.choice(list(self.data[option].items()))
|
||||
choices = [[k] + v]
|
||||
else:
|
||||
choices = [
|
||||
[k] + v for k, v in list(self.data[option].items())
|
||||
]
|
||||
else:
|
||||
# If the option is either a list or a string, just select it.
|
||||
choices = self.data[option]
|
||||
|
||||
for choice in choices:
|
||||
# If the randomly-selected choice is a dict, choose a random item and return a list consisting
|
||||
# of the option name, the key, and the value, flattening the # value if it is also a list.
|
||||
if hasattr(choice, 'keys'):
|
||||
for (k, v) in choice.items():
|
||||
if type(v) is list:
|
||||
flattened.extend([k, *v])
|
||||
else:
|
||||
flattened.extend([k, v])
|
||||
continue
|
||||
|
||||
# if the member is a list, return the flattened list
|
||||
if type(choice) is list:
|
||||
flattened.extend(choice)
|
||||
continue
|
||||
|
||||
# otherwise, return a list consisting of option and choice
|
||||
flattened.append(choice)
|
||||
return flattened
|
52
random_sets/sets.py
Normal file
52
random_sets/sets.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
import random
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from random_sets.datasources import DataSource
|
||||
|
||||
|
||||
class WeightedSet:
|
||||
"""
|
||||
A set in which members each have a weight, used for selecting at random.
|
||||
|
||||
Usage:
|
||||
>>> ws = WeightedSet(('foo', 1.0), ('bar', 0.5))
|
||||
>>> ws.random()
|
||||
('foo', 1.0)
|
||||
"""
|
||||
|
||||
def __init__(self, *weighted_members: tuple):
|
||||
self.members = []
|
||||
self.weights = []
|
||||
if weighted_members:
|
||||
self.members, self.weights = list(zip(*weighted_members))
|
||||
|
||||
def random(self) -> str:
|
||||
return random.choices(self.members, self.weights)[0]
|
||||
|
||||
def __add__(self, obj):
|
||||
ws = WeightedSet()
|
||||
ws.members = self.members + obj.members
|
||||
ws.weights = self.weights + obj.weights
|
||||
return ws
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.members}\n{self.weights}"
|
||||
|
||||
|
||||
class DataSourceSet(WeightedSet):
|
||||
|
||||
def __init__(self, source: Path):
|
||||
self.source = DataSource(source.read_text())
|
||||
super().__init__(*[(key, value) for key, value in self.source.frequencies.items()])
|
||||
|
||||
def random(self):
|
||||
random_key = super().random()
|
||||
return self.source.as_dict()[random_key]
|
||||
|
||||
|
||||
def equal_weights(terms: list, weight: float = 1.0, blank: bool = True) -> WeightedSet:
|
||||
ws = WeightedSet(*[(term, weight) for term in terms])
|
||||
if blank:
|
||||
ws = WeightedSet(("", 1.0)) + ws
|
||||
return ws
|
Loading…
Reference in New Issue
Block a user