From 946aa5ea01a9046f78ce4d9b011c8d18eee2bd4a Mon Sep 17 00:00:00 2001 From: evilchili Date: Sat, 23 Dec 2023 15:34:32 -0800 Subject: [PATCH] initial commit --- pyproject.toml | 43 +++++++++++ random_sets/__init__.py | 0 random_sets/datasources.py | 147 +++++++++++++++++++++++++++++++++++++ random_sets/sets.py | 52 +++++++++++++ 4 files changed, 242 insertions(+) create mode 100644 pyproject.toml create mode 100644 random_sets/__init__.py create mode 100644 random_sets/datasources.py create mode 100644 random_sets/sets.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..74aff8b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,43 @@ +[tool.poetry] +name = "random-sets" +version = "0.1.0" +description = "A small library of helper classes for dealing with random data using weighted distributions" +authors = ["evilchili "] +readme = "README.md" +packages = [ + { include = 'random_sets' }, +] + +[tool.poetry.dependencies] +python = "^3.10" +dice = "^4.0.0" + +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.3" +black = "^23.3.0" +isort = "^5.12.0" +pyproject-autoflake = "^1.0.2" + +[tool.black] +line-length = 120 +target-version = ['py310'] + +[tool.isort] +multi_line_output = 3 +line_length = 120 +include_trailing_comma = true + +[tool.autoflake] +check = false # return error code if changes are needed +in-place = true # make changes to files instead of printing diffs +recursive = true # drill down directories recursively +remove-all-unused-imports = true # remove all unused imports (not just those from the standard library) +ignore-init-module-imports = true # exclude __init__.py when removing unused imports +remove-duplicate-keys = true # remove all duplicate keys in objects +remove-unused-variables = true # remove unused variables + + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/random_sets/__init__.py b/random_sets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/random_sets/datasources.py b/random_sets/datasources.py new file mode 100644 index 0000000..16638b9 --- /dev/null +++ b/random_sets/datasources.py @@ -0,0 +1,147 @@ +import random +import yaml + +from typing import IO + + +class DataSource: + """ + Represents a yaml data source used to generate roll tables. + + Attributes: + + source - the IO source to parse + frequency - the frequency distribution to apply + headers - an array of header strings + data - The parsed YAML data + + Methods: + + load_source - Read and parse the source, populating the attributes + + """ + def __init__(self, source: IO, frequency: str = 'default') -> None: + """ + Initialize a DataSource instance. + + Args: + source - an IO object to read source from + frequency - the name of the frequency distribution to use; must + be defined in the source file's metadata. + """ + self.source = source + self.frequency = frequency + self.headers = [] + self.frequencies = None + self.data = None + self.metadata = None + self.load_source() + + def load_source(self) -> None: + """ + Cache the yaml source and the parsed or generated metadata. + """ + if self.data: + return + self.read_source() + self.init_headers() + self.init_frequencies() + + def read_source(self) -> None: + self.data = yaml.safe_load(self.source) + self.metadata = self.data.pop('metadata', {}) + + def init_headers(self) -> None: + if 'headers' in self.metadata: + self.headers = self.metadata['headers'] + + def init_frequencies(self) -> None: + num_keys = len(self.data.keys()) + default_freq = num_keys / 100 + + frequencies = { + 'default': dict([(k, default_freq) for k in self.data.keys()]) + } + if 'frequencies' in self.metadata: + frequencies.update(**self.metadata['frequencies']) + self.frequencies = frequencies[self.frequency] + + def random_frequencies(self, count: int = 1) -> list: + """ + Choose random option names from the frequency table. + """ + weights = [] + options = [] + for (option, weight) in self.frequencies.items(): + weights.append(weight) + options.append(option) + return random.choices(options, weights=weights, k=count) + + def random_values(self, count: int = 1) -> list: + """ + Return a list of random values from the data set, as a list of lists. + """ + return [ + self.get_entries(option, rand=True) for option in self.random_frequencies(count) + ] + + def as_dict(self) -> dict: + """ + Return the contents of the data source as a dict. + """ + data = dict() + for name in self.data.keys(): + entries = self.get_entries(name, rand=False) + items = {(k, v) for k, v in zip(self.headers, entries)} + data[name] = dict(items) + return data + + def get_entries(self, option, rand: bool = False) -> list: + """ + For a random item or each item in the specified option in the data source, + return a flattened list of the option, the select item, and the item's value (if any). + """ + + # If there is no data for the specified option, stop now. + flattened = [option] + if not self.data[option]: + return flattened + + if hasattr(self.data[option], 'keys'): + # if the option is a dict, we assume the values are lists; we select a random item + # and prepend the key to the value list as our random selection. For example, given: + # + # >>> self.data[option] == {'One': ['bar', 'baz'], 'Two': ['qaz', 'qux']} + # + # choices might then be: ['One', 'bar', 'baz'] + # + if rand: + k, v = random.choice(list(self.data[option].items())) + choices = [[k] + v] + else: + choices = [ + [k] + v for k, v in list(self.data[option].items()) + ] + else: + # If the option is either a list or a string, just select it. + choices = self.data[option] + + for choice in choices: + # If the randomly-selected choice is a dict, choose a random item and return a list consisting + # of the option name, the key, and the value, flattening the # value if it is also a list. + if hasattr(choice, 'keys'): + for (k, v) in choice.items(): + if type(v) is list: + flattened.extend([k, *v]) + else: + flattened.extend([k, v]) + continue + + # if the member is a list, return the flattened list + if type(choice) is list: + flattened.extend(choice) + continue + + # otherwise, return a list consisting of option and choice + flattened.append(choice) + return flattened diff --git a/random_sets/sets.py b/random_sets/sets.py new file mode 100644 index 0000000..0dc2cea --- /dev/null +++ b/random_sets/sets.py @@ -0,0 +1,52 @@ +import random + +from pathlib import Path + +from random_sets.datasources import DataSource + + +class WeightedSet: + """ + A set in which members each have a weight, used for selecting at random. + + Usage: + >>> ws = WeightedSet(('foo', 1.0), ('bar', 0.5)) + >>> ws.random() + ('foo', 1.0) + """ + + def __init__(self, *weighted_members: tuple): + self.members = [] + self.weights = [] + if weighted_members: + self.members, self.weights = list(zip(*weighted_members)) + + def random(self) -> str: + return random.choices(self.members, self.weights)[0] + + def __add__(self, obj): + ws = WeightedSet() + ws.members = self.members + obj.members + ws.weights = self.weights + obj.weights + return ws + + def __str__(self): + return f"{self.members}\n{self.weights}" + + +class DataSourceSet(WeightedSet): + + def __init__(self, source: Path): + self.source = DataSource(source.read_text()) + super().__init__(*[(key, value) for key, value in self.source.frequencies.items()]) + + def random(self): + random_key = super().random() + return self.source.as_dict()[random_key] + + +def equal_weights(terms: list, weight: float = 1.0, blank: bool = True) -> WeightedSet: + ws = WeightedSet(*[(term, weight) for term in terms]) + if blank: + ws = WeightedSet(("", 1.0)) + ws + return ws