Source code for lightcurvelynx.math_nodes.scipy_random

"""Wrapper classes for some of scipy's sampling functions."""

import warnings
from os import urandom

import numpy as np
import scipy.stats
from scipy.stats.sampling import NumericalInversePolynomial, UNURANError

from lightcurvelynx.base_models import FunctionNode
from lightcurvelynx.graph_state import transpose_dict_of_list


def _safe_make_inv_polynomial(*args, **kwargs):
    """A wrapper function around the NumericalInversePolynomial constructor that:
    1) catches a potential error that can arise when the domain is too small.
    2) ignores a potential warning about the mean of the sampling distribution being moved.

    Parameters
    ----------
    *args
        The positional arguments to use for the NumericalInversePolynomial constructor.
    **kwargs
        The keyword arguments to use for the NumericalInversePolynomial constructor.

    Returns
    -------
    inv_poly : NumericalInversePolynomial
        The created NumericalInversePolynomial object.
    """
    with warnings.catch_warnings():
        # Catch a potential warning about the mean of the sampling distribution being moved.
        warnings.filterwarnings("ignore", category=RuntimeWarning, message=r".*moved.*")

        # Catch the potential error (internal scipy type is not public so we have to check the message)
        # That can result from the domain being too small for the sampling distribution to find.
        try:
            inv_poly = NumericalInversePolynomial(*args, **kwargs)
        except UNURANError as err:
            if "condition for method violated" in str(err):
                raise ValueError(
                    "Error creating the NumericalInversePolynomial object. This can arise when "
                    "the domain is too small for the sampling distribution to find. You can use the "
                    "'domain' parameter to explicitly set the bounds for the sampling distribution. "
                    f"Original error message: {str(err)}. "
                ) from err
            else:
                raise
    return inv_poly


[docs] class NumericalInversePolynomialFunc(FunctionNode): """A class for sampling from scipy's NumericalInversePolynomial given a distribution function, an object with a pdf function, or a class from which to create such an object. Note ---- If a class is provided, then the sampling function will create a new object (with the sampled parameters) for each sampling. This is very expensive. Parameters ---------- dist : object or class An object or class with either a pdf() or logpdf() method that defines the distribution from which to sample. domain : tuple, optional A tuple of (min, max) values to use as bounds for the sampling. If not provided, scipy will try to infer the domain. seed : int, optional The seed to use. """ def __init__(self, dist=None, *, domain=None, seed=None, **kwargs): # Check that the distribution object/class has a pdf or logpdf function # or that we have provided a function directly. if not hasattr(dist, "pdf") and not hasattr(dist, "logpdf"): raise ValueError("Distribution must have either pdf() or logpdf().") self._dist = dist # Save the domain. self._domain = domain # Classes show up as type="type". In this case we will need to create # a concrete object from the class and any given parameters. if isinstance(dist, type): self._inv_poly = None self._vect_sample = np.vectorize(self._create_and_sample) else: self._inv_poly = _safe_make_inv_polynomial(self._dist, domain=self._domain) self._vect_sample = None # Get a default random number generator for this object, using the # given seed if one is provided. if seed is None: seed = int.from_bytes(urandom(4), "big") self._rng = np.random.default_rng(seed=seed) # Set the function and add all the kwargs as parameters. super().__init__(self._rvs, **kwargs) def _rvs(self): """A place holder function to use for object naming.""" pass
[docs] def set_seed(self, new_seed): """Update the random number generator's seed to a given value. Parameters ---------- new_seed : int The given seed """ self._rng = np.random.default_rng(seed=new_seed)
def _create_and_sample(self, args, rng): """Create the distribution function and sample it. This is only needed if our distribution is in the form of a class that must be instantiated with different parameters each sampling run. Parameters ---------- args : dict A dictionary mapping argument name to individual values. rng : numpy.random._generator.Generator The random number generator to use. Returns ------- sample : float The result of sampling the function. """ dist = self._dist(**args) sample = _safe_make_inv_polynomial(dist, domain=self._domain).rvs(1, rng)[0] return sample
[docs] def compute(self, graph_state, rng_info=None, **kwargs): """Execute the wrapped function. The input arguments are taken from the current graph_state and the outputs are written to graph_state. Parameters ---------- graph_state : GraphState An object mapping graph parameters to their values. This object is modified in place as it is sampled. rng_info : numpy.random._generator.Generator, optional A given numpy random number generator to use for this computation. If not provided, the function uses the node's random number generator. **kwargs : dict, optional Additional function arguments. Returns ------- results : any The result of the computation. This return value is provided so that testing functions can easily access the results. """ rng = rng_info if rng_info is not None else self._rng if self._inv_poly is not None: # Batch sample all the results. num_samples = None if graph_state.num_samples == 1 else graph_state.num_samples results = self._inv_poly.rvs(num_samples, rng) else: # This is a class so we will need to create a new distribution object # for each sample (with a single instance of the input parameters). args = self._build_inputs(graph_state, **kwargs) if graph_state.num_samples == 1: dist = self._dist(**args) results = _safe_make_inv_polynomial(dist, domain=self._domain).rvs(1, rng)[0] else: # Transpose the dict of arrays to a list of dicts. arg_list = transpose_dict_of_list(args, graph_state.num_samples) results = self._vect_sample(arg_list, rng) # Save and return the results. self._save_results(results, graph_state) return results
[docs] class PDFFunctionWrapper: """A class that just wraps a given PDF function. Attributes ---------- _pdf : function The PDF function. """ def __init__(self, func):
[docs] self._pdf = func
[docs] self.pdf = self._pdf
[docs] class SamplePDF(NumericalInversePolynomialFunc): """A node for sampling from a given PDF function. Parameters ---------- dist : function, class, or object The pdf function from which to sample or a class/object with that function. domain : tuple, optional A tuple of (min, max) values to use as bounds for the sampling. If not provided, scipy will try to infer the domain. """ def __init__(self, dist, *, domain=None, **kwargs): if hasattr(dist, "pdf"): self.dist_obj = dist elif callable(dist): self.dist_obj = PDFFunctionWrapper(dist) else: raise ValueError("No pdf function detected.") super().__init__(self.dist_obj, domain=domain, **kwargs)
[docs] class LogPDFFunctionWrapper: """A class that just wraps a given Log PDF function. Attributes ---------- _log_pdf : function The log PDF function. """ def __init__(self, func):
[docs] self._log_pdf = func
[docs] self.logpdf = self._log_pdf
[docs] class SampleLogPDF(NumericalInversePolynomialFunc): """A node for sampling from a given Log PDF function. Parameters ---------- dist : function, class, or object The pdf function from which to sample or a class/object with that function. """ def __init__(self, dist, **kwargs): if hasattr(dist, "logpdf"): self.dist_obj = dist elif callable(dist): self.dist_obj = LogPDFFunctionWrapper(dist) else: raise ValueError("No logpdf function detected.") super().__init__(self.dist_obj, **kwargs)
[docs] class ScipyRandomDist(FunctionNode): """The base class sampling from scipy.stats distributions. Attributes ---------- dist_class : scipy.stats distribution object The distribution from which to sample. _rng : numpy.random._generator.Generator This object's random number generator. sample_size : tuple The shape of the array to generate for each sample. The actual returned value will be ``(num_samples, *size)``. If an empty tuple will generate a single value per sample. params_names : list of str The names of the parameters used to create the distribution object. Parameters ---------- dist_name : str The name of the distribution from which to sample. seed : int, optional The seed to use. node_label : str, optional An optional label for the node. If not provided, a default label will be created based on the function name and parameters. **kwargs : dict, optional The parameters to use to create the distribution object. """ def __init__(self, dist_name, seed=None, node_label=None, **kwargs): super().__init__(self._non_func, node_label=node_label) # Create the template of the distribution class and save the parameters. if not hasattr(scipy.stats, dist_name): raise ValueError(f"The distribution {dist_name} is not found in scipy.stats.")
[docs] self.dist_class = getattr(scipy.stats, dist_name)
[docs] self.params_names = []
for key, val in kwargs.items(): self.add_parameter(key, val) self.params_names.append(key) # Get a default random number generator for this object, using the # given seed if one is provided. if seed is None: seed = int.from_bytes(urandom(4), "big")
[docs] self._rng = np.random.default_rng(seed=seed)
[docs] def set_seed(self, new_seed): """Update the random number generator's seed to a given value. Parameters ---------- new_seed : int The given seed """ self._rng = np.random.default_rng(seed=new_seed)
[docs] def compute(self, graph_state, rng_info=None, **kwargs): """Execute the wrapped function. The input arguments are taken from the current graph_state and the outputs are written to graph_state. Parameters ---------- graph_state : GraphState An object mapping graph parameters to their values. This object is modified in place as it is sampled. rng_info : numpy.random._generator.Generator, optional A given numpy random number generator to use for this computation. If not provided, the function uses the node's random number generator. **kwargs : dict, optional Additional function arguments. Returns ------- results : any The result of the computation. This return value is provided so that testing functions can easily access the results. Raises ------ ValueError is func attribute is None. """ # If a random number generator is given use that. Otherwise use the default one. rng = rng_info if rng_info is not None else self._rng # Set the size according to the number of samples. size_param = graph_state.num_samples if graph_state.num_samples > 1 else None # Create the distribution object using the sampled parameters. param_values = {name: self.get_param(graph_state, name) for name in self.params_names} print(param_values) dist = self.dist_class(**param_values) # Generate the values. Then save and return the results. results = dist.rvs(size=size_param, random_state=rng) self._save_results(results, graph_state) return results