initial commit

2023-12-23 15:34:32 -08:00 · 2023-12-23 15:34:32 -08:00 · 946aa5ea01
commit 946aa5ea01
parent 074c403538
4 changed files with 242 additions and 0 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,43 @@
 [tool.poetry]
 name = "random-sets"
 version = "0.1.0"
 description = "A small library of helper classes for dealing with random data using weighted distributions"
 authors = ["evilchili <evilchili@gmail.com>"]
 readme = "README.md"
 packages = [
    { include = 'random_sets' },
 ]
 [tool.poetry.dependencies]
 python = "^3.10"
 dice = "^4.0.0"
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.3"
 black = "^23.3.0"
 isort = "^5.12.0"
 pyproject-autoflake = "^1.0.2"
 [tool.black]
 line-length = 120
 target-version = ['py310']
 [tool.isort]
 multi_line_output = 3
 line_length = 120
 include_trailing_comma = true
 [tool.autoflake]
 check = false                        # return error code if changes are needed
 in-place = true                      # make changes to files instead of printing diffs
 recursive = true                     # drill down directories recursively
 remove-all-unused-imports = true     # remove all unused imports (not just those from the standard library)
 ignore-init-module-imports = true    # exclude __init__.py when removing unused imports
 remove-duplicate-keys = true         # remove all duplicate keys in objects
 remove-unused-variables = true       # remove unused variables
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
--- a/random_sets/init.py
+++ b/random_sets/init.py
--- a/random_sets/datasources.py
+++ b/random_sets/datasources.py
@ -0,0 +1,147 @@
 import random
 import yaml
 from typing import IO
 class DataSource:
    """
    Represents a yaml data source used to generate roll tables.
    Attributes:
        source      - the IO source to parse
        frequency   - the frequency distribution to apply
        headers     - an array of header strings
        data        - The parsed YAML data
    Methods:
        load_source - Read and parse the source, populating the attributes
    """
    def __init__(self, source: IO, frequency: str = 'default') -> None:
        """
        Initialize a DataSource instance.
        Args:
            source      - an IO object to read source from
            frequency   - the name of the frequency distribution to use; must
                          be defined in the source file's metadata.
        """
        self.source = source
        self.frequency = frequency
        self.headers = []
        self.frequencies = None
        self.data = None
        self.metadata = None
        self.load_source()
    def load_source(self) -> None:
        """
        Cache the yaml source and the parsed or generated metadata.
        """
        if self.data:
            return
        self.read_source()
        self.init_headers()
        self.init_frequencies()
    def read_source(self) -> None:
        self.data = yaml.safe_load(self.source)
        self.metadata = self.data.pop('metadata', {})
    def init_headers(self) -> None:
        if 'headers' in self.metadata:
            self.headers = self.metadata['headers']
    def init_frequencies(self) -> None:
        num_keys = len(self.data.keys())
        default_freq = num_keys / 100
        frequencies = {
            'default': dict([(k, default_freq) for k in self.data.keys()])
        }
        if 'frequencies' in self.metadata:
            frequencies.update(**self.metadata['frequencies'])
        self.frequencies = frequencies[self.frequency]
    def random_frequencies(self, count: int = 1) -> list:
        """
        Choose random option names from the frequency table.
        """
        weights = []
        options = []
        for (option, weight) in self.frequencies.items():
            weights.append(weight)
            options.append(option)
        return random.choices(options, weights=weights, k=count)
    def random_values(self, count: int = 1) -> list:
        """
        Return a list of random values from the data set, as a list of lists.
        """
        return [
            self.get_entries(option, rand=True) for option in self.random_frequencies(count)
        ]
    def as_dict(self) -> dict:
        """
        Return the contents of the data source as a dict.
        """
        data = dict()
        for name in self.data.keys():
            entries = self.get_entries(name, rand=False)
            items = {(k, v) for k, v in zip(self.headers, entries)}
            data[name] = dict(items)
        return data
    def get_entries(self, option, rand: bool = False) -> list:
        """
        For a random item or each item in the specified option in the data source,
        return a flattened list of the option, the select item, and the item's value (if any).
        """
        # If there is no data for the specified option, stop now.
        flattened = [option]
        if not self.data[option]:
            return flattened
        if hasattr(self.data[option], 'keys'):
            # if the option is a dict, we assume the values are lists; we select a random item
            # and prepend the key to the value list as our random selection. For example, given:
            #
            #  >>> self.data[option] == {'One': ['bar', 'baz'], 'Two': ['qaz', 'qux']}
            #
            # choices might then be: ['One', 'bar', 'baz']
            #
            if rand:
                k, v = random.choice(list(self.data[option].items()))
                choices = [[k] + v]
            else:
                choices = [
                    [k] + v for k, v in list(self.data[option].items())
                ]
        else:
            # If the option is either a list or a string, just select it.
            choices = self.data[option]
        for choice in choices:
            # If the randomly-selected choice is a dict, choose a random item and return a list consisting
            # of the option name, the key, and the value, flattening the # value if it is also a list.
            if hasattr(choice, 'keys'):
                for (k, v) in choice.items():
                    if type(v) is list:
                        flattened.extend([k, *v])
                    else:
                        flattened.extend([k, v])
                continue
            # if the member is a list, return the flattened list
            if type(choice) is list:
                flattened.extend(choice)
                continue
            # otherwise, return a list consisting of option and choice
            flattened.append(choice)
        return flattened
--- a/random_sets/sets.py
+++ b/random_sets/sets.py
@ -0,0 +1,52 @@
 import random
 from pathlib import Path
 from random_sets.datasources import DataSource
 class WeightedSet:
    """
    A set in which members each have a weight, used for selecting at random.
    Usage:
        >>> ws = WeightedSet(('foo', 1.0), ('bar', 0.5))
        >>> ws.random()
        ('foo', 1.0)
    """
    def __init__(self, *weighted_members: tuple):
        self.members = []
        self.weights = []
        if weighted_members:
            self.members, self.weights = list(zip(*weighted_members))
    def random(self) -> str:
        return random.choices(self.members, self.weights)[0]
    def __add__(self, obj):
        ws = WeightedSet()
        ws.members = self.members + obj.members
        ws.weights = self.weights + obj.weights
        return ws
    def __str__(self):
        return f"{self.members}\n{self.weights}"
 class DataSourceSet(WeightedSet):
    def __init__(self, source: Path):
        self.source = DataSource(source.read_text())
        super().__init__(*[(key, value) for key, value in self.source.frequencies.items()])
    def random(self):
        random_key = super().random()
        return self.source.as_dict()[random_key]
 def equal_weights(terms: list, weight: float = 1.0, blank: bool = True) -> WeightedSet:
    ws = WeightedSet(*[(term, weight) for term in terms])
    if blank:
        ws = WeightedSet(("", 1.0)) + ws
    return ws