initial commit

2023-12-23 15:34:32 -08:00 · 2023-12-23 15:34:32 -08:00 · 946aa5ea01
commit 946aa5ea01
parent 074c403538
4 changed files with 242 additions and 0 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,43 @@
+[tool.poetry]
+name = "random-sets"
+version = "0.1.0"
+description = "A small library of helper classes for dealing with random data using weighted distributions"
+authors = ["evilchili <evilchili@gmail.com>"]
+readme = "README.md"
+packages = [
+    { include = 'random_sets' },
+]
+
+[tool.poetry.dependencies]
+python = "^3.10"
+dice = "^4.0.0"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.3"
+black = "^23.3.0"
+isort = "^5.12.0"
+pyproject-autoflake = "^1.0.2"
+
+[tool.black]
+line-length = 120
+target-version = ['py310']
+
+[tool.isort]
+multi_line_output = 3
+line_length = 120
+include_trailing_comma = true
+
+[tool.autoflake]
+check = false                        # return error code if changes are needed
+in-place = true                      # make changes to files instead of printing diffs
+recursive = true                     # drill down directories recursively
+remove-all-unused-imports = true     # remove all unused imports (not just those from the standard library)
+ignore-init-module-imports = true    # exclude __init__.py when removing unused imports
+remove-duplicate-keys = true         # remove all duplicate keys in objects
+remove-unused-variables = true       # remove unused variables
+
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
--- a/random_sets/init.py
+++ b/random_sets/init.py
--- a/random_sets/datasources.py
+++ b/random_sets/datasources.py
@ -0,0 +1,147 @@
+import random
+import yaml
+
+from typing import IO
+
+
+class DataSource:
+    """
+    Represents a yaml data source used to generate roll tables.
+
+    Attributes:
+
+        source      - the IO source to parse
+        frequency   - the frequency distribution to apply
+        headers     - an array of header strings
+        data        - The parsed YAML data
+
+    Methods:
+
+        load_source - Read and parse the source, populating the attributes
+
+    """
+    def __init__(self, source: IO, frequency: str = 'default') -> None:
+        """
+        Initialize a DataSource instance.
+
+        Args:
+            source      - an IO object to read source from
+            frequency   - the name of the frequency distribution to use; must
+                          be defined in the source file's metadata.
+        """
+        self.source = source
+        self.frequency = frequency
+        self.headers = []
+        self.frequencies = None
+        self.data = None
+        self.metadata = None
+        self.load_source()
+
+    def load_source(self) -> None:
+        """
+        Cache the yaml source and the parsed or generated metadata.
+        """
+        if self.data:
+            return
+        self.read_source()
+        self.init_headers()
+        self.init_frequencies()
+
+    def read_source(self) -> None:
+        self.data = yaml.safe_load(self.source)
+        self.metadata = self.data.pop('metadata', {})
+
+    def init_headers(self) -> None:
+        if 'headers' in self.metadata:
+            self.headers = self.metadata['headers']
+
+    def init_frequencies(self) -> None:
+        num_keys = len(self.data.keys())
+        default_freq = num_keys / 100
+
+        frequencies = {
+            'default': dict([(k, default_freq) for k in self.data.keys()])
+        }
+        if 'frequencies' in self.metadata:
+            frequencies.update(**self.metadata['frequencies'])
+        self.frequencies = frequencies[self.frequency]
+
+    def random_frequencies(self, count: int = 1) -> list:
+        """
+        Choose random option names from the frequency table.
+        """
+        weights = []
+        options = []
+        for (option, weight) in self.frequencies.items():
+            weights.append(weight)
+            options.append(option)
+        return random.choices(options, weights=weights, k=count)
+
+    def random_values(self, count: int = 1) -> list:
+        """
+        Return a list of random values from the data set, as a list of lists.
+        """
+        return [
+            self.get_entries(option, rand=True) for option in self.random_frequencies(count)
+        ]
+
+    def as_dict(self) -> dict:
+        """
+        Return the contents of the data source as a dict.
+        """
+        data = dict()
+        for name in self.data.keys():
+            entries = self.get_entries(name, rand=False)
+            items = {(k, v) for k, v in zip(self.headers, entries)}
+            data[name] = dict(items)
+        return data
+
+    def get_entries(self, option, rand: bool = False) -> list:
+        """
+        For a random item or each item in the specified option in the data source,
+        return a flattened list of the option, the select item, and the item's value (if any).
+        """
+
+        # If there is no data for the specified option, stop now.
+        flattened = [option]
+        if not self.data[option]:
+            return flattened
+
+        if hasattr(self.data[option], 'keys'):
+            # if the option is a dict, we assume the values are lists; we select a random item
+            # and prepend the key to the value list as our random selection. For example, given:
+            #
+            #  >>> self.data[option] == {'One': ['bar', 'baz'], 'Two': ['qaz', 'qux']}
+            #
+            # choices might then be: ['One', 'bar', 'baz']
+            #
+            if rand:
+                k, v = random.choice(list(self.data[option].items()))
+                choices = [[k] + v]
+            else:
+                choices = [
+                    [k] + v for k, v in list(self.data[option].items())
+                ]
+        else:
+            # If the option is either a list or a string, just select it.
+            choices = self.data[option]
+
+        for choice in choices:
+            # If the randomly-selected choice is a dict, choose a random item and return a list consisting
+            # of the option name, the key, and the value, flattening the # value if it is also a list.
+            if hasattr(choice, 'keys'):
+                for (k, v) in choice.items():
+                    if type(v) is list:
+                        flattened.extend([k, *v])
+                    else:
+                        flattened.extend([k, v])
+                continue
+
+            # if the member is a list, return the flattened list
+            if type(choice) is list:
+                flattened.extend(choice)
+                continue
+
+            # otherwise, return a list consisting of option and choice
+            flattened.append(choice)
+        return flattened
--- a/random_sets/sets.py
+++ b/random_sets/sets.py
@ -0,0 +1,52 @@
+import random
+
+from pathlib import Path
+
+from random_sets.datasources import DataSource
+
+
+class WeightedSet:
+    """
+    A set in which members each have a weight, used for selecting at random.
+
+    Usage:
+        >>> ws = WeightedSet(('foo', 1.0), ('bar', 0.5))
+        >>> ws.random()
+        ('foo', 1.0)
+    """
+
+    def __init__(self, *weighted_members: tuple):
+        self.members = []
+        self.weights = []
+        if weighted_members:
+            self.members, self.weights = list(zip(*weighted_members))
+
+    def random(self) -> str:
+        return random.choices(self.members, self.weights)[0]
+
+    def __add__(self, obj):
+        ws = WeightedSet()
+        ws.members = self.members + obj.members
+        ws.weights = self.weights + obj.weights
+        return ws
+
+    def __str__(self):
+        return f"{self.members}\n{self.weights}"
+
+
+class DataSourceSet(WeightedSet):
+
+    def __init__(self, source: Path):
+        self.source = DataSource(source.read_text())
+        super().__init__(*[(key, value) for key, value in self.source.frequencies.items()])
+
+    def random(self):
+        random_key = super().random()
+        return self.source.as_dict()[random_key]
+
+
+def equal_weights(terms: list, weight: float = 1.0, blank: bool = True) -> WeightedSet:
+    ws = WeightedSet(*[(term, weight) for term in terms])
+    if blank:
+        ws = WeightedSet(("", 1.0)) + ws
+    return ws