Source code for lightcurvelynx.math_nodes.given_sampler

"""Samplers used for testing that produces precomputed results. These
can be used in testing to produce known results or to use data previously
sampled from another method (such as pzflow).
"""

import warnings

import numpy as np
import pandas as pd
from astropy.table import Table

from lightcurvelynx.base_models import FunctionNode
from lightcurvelynx.math_nodes.np_random import NumpyRandomFunc


[docs] class BinarySampler(NumpyRandomFunc): """A FunctionNode that randomly returns True or False according to a given probability. This function is particularly useful in probabilistically applying effects or making decisions in the simulation. Attributes ---------- probability : float The probability of returning True. """ def __init__(self, probability, seed=None, **kwargs): if probability < 0 or probability > 1: raise ValueError(f"Probability must be between 0 and 1. Got {probability}.")
[docs] self.probability = probability
super().__init__("uniform", seed=seed, **kwargs)
[docs] def compute(self, graph_state, rng_info=None, **kwargs): """Return the given values. Parameters ---------- graph_state : GraphState An object mapping graph parameters to their values. This object is modified in place as it is sampled. rng_info : numpy.random._generator.Generator, optional A given numpy random number generator to use for this computation. If not provided, the function uses the node's random number generator. **kwargs : dict, optional Additional function arguments. Returns ------- results : any The result of the computation. This return value is provided so that testing functions can easily access the results. """ rng = rng_info if rng_info is not None else self._rng if graph_state.num_samples == 1: results = rng.random() < self.probability else: results = rng.random(graph_state.num_samples) < self.probability self._save_results(results, graph_state) return results
[docs] class GivenValueList(FunctionNode): """A FunctionNode that returns given results for a single parameter in the order in which they are provided. This node can be used as either stateful or stateless. If stateful, the node will keep track of the next index to return and will return the values in order. If stateless, the node will always return the first N values in the list. Note ---- The stateful version of this node does not support parallel sampling, because it keeps track of a single index for the next value to return. If you need to use this node in parallel sampling, you should set stateful=False, but be aware that this will cause the node to always return the first N values in the list, where N is the number of samples requested, instead of iterating through the list. Attributes ---------- values : list or numpy.ndarray The values to return. next_ind : int The index of the next value. stateful : bool Whether this node is stateful. If True, the node will keep track of the next index to return. If False, the node will always return the first N values in the list, where N is the number of samples requested. Default: True """ def __init__(self, values, *, stateful=True, **kwargs):
[docs] self.values = np.asarray(values)
if len(values) == 0: raise ValueError("No values provided for GivenValueList")
[docs] self.next_ind = 0
self._stateful = stateful super().__init__(self._non_func, **kwargs)
[docs] def __getstate__(self): if self._stateful: raise RuntimeError( "A stateful GivenValueList cannot be pickled. This node does not support parallel sampling." ) return super().__getstate__()
[docs] def reset(self): """Reset the next index to use.""" self.next_ind = 0
[docs] def compute(self, graph_state, rng_info=None, **kwargs): """Return the given values. Parameters ---------- graph_state : GraphState An object mapping graph parameters to their values. This object is modified in place as it is sampled. rng_info : numpy.random._generator.Generator, optional Unused in this function, but included to provide consistency with other compute functions. **kwargs : dict, optional Additional function arguments. Returns ------- results : any The result of the computation. This return value is provided so that testing functions can easily access the results. """ sample_ind = self.next_ind if graph_state.sample_offset is not None: sample_ind += graph_state.sample_offset if graph_state.num_samples == 1: if sample_ind >= len(self.values): raise IndexError( f"GivenValueList ran out of entries to sample. Index {sample_ind} out " f"of bounds for a list with {len(self.values)} entries." ) results = self.values[sample_ind] if self._stateful: self.next_ind += 1 else: end_ind = sample_ind + graph_state.num_samples if end_ind > len(self.values): raise IndexError( f"GivenValueList ran out of entries to sample. Index {sample_ind} out " f"of bounds for a list with {len(self.values)} entries." ) results = self.values[sample_ind:end_ind] if self._stateful: self.next_ind += graph_state.num_samples # Save and return the results. self._save_results(results, graph_state) return results
[docs] class GivenValueSampler(NumpyRandomFunc): """A FunctionNode that returns randomly selected items from a given list with replacement. Note that this node does not support parameterized (chained) inputs. If you need to select from a list of parameterized inputs, use the RandomChoiceNode. Attributes ---------- values : int, list, or numpy.ndarray The values to select from. If an integer is provided, it is treated as a range from 0 to that value - 1. _num_values : int The number of values that can be sampled. _weights : numpy.ndarray, optional The weights for each value, if provided. If None, all values are equally likely. """ def __init__(self, values, weights=None, seed=None, **kwargs): if isinstance(values, int): values = np.arange(values)
[docs] self.values = np.asarray(values)
[docs] self._num_values = len(values)
if self._num_values == 0: raise ValueError("No values provided for NumpySamplerNode") # Compute the normalized weights for each value. if weights is not None: self._weights = np.asarray(weights) if len(self._weights) != self._num_values: raise ValueError( f"Number of weights ({len(self._weights)}) must match the number " f"of values provided ({self._num_values})." ) if np.any(self._weights < 0) or not np.isfinite(self._weights).all(): raise ValueError("Weights must be non-negative and finite.") weight_sum = np.sum(self._weights) if weight_sum <= 0: raise ValueError("Weights must sum to a positive value.") self._weights /= weight_sum else: self._weights = None super().__init__("uniform", seed=seed, **kwargs)
[docs] def compute(self, graph_state, rng_info=None, **kwargs): """Return the given values. Parameters ---------- graph_state : GraphState An object mapping graph parameters to their values. This object is modified in place as it is sampled. rng_info : numpy.random._generator.Generator, optional A given numpy random number generator to use for this computation. If not provided, the function uses the node's random number generator. **kwargs : dict, optional Additional function arguments. Returns ------- results : any The result of the computation. This return value is provided so that testing functions can easily access the results. """ rng = rng_info if rng_info is not None else self._rng if graph_state.num_samples == 1: inds = rng.choice(self._num_values, p=self._weights) else: inds = rng.choice(self._num_values, size=graph_state.num_samples, p=self._weights) results = self.values[inds] self._save_results(results, graph_state) return results
[docs] class GivenValueSelector(FunctionNode): """A FunctionNode that selects a single value from a list of parameters. Parameters ---------- values : float, list, or numpy.ndarray The values that can be selected. index : parameter The parameter that selects which value to return. This should return an integer index corresponding to the position in `values`. **kwargs : dict, optional Any additional keyword arguments. """ def __init__(self, values, index, **kwargs): # The index parameter will automatically be added as input by the FunctionNode constructor. super().__init__(self._select, index=index, **kwargs)
[docs] self.values = np.asarray(values)
if len(values) == 0: raise ValueError("No values provided for GivenValueList") def _select(self, index): """Select the value at the given index.""" if np.any(index < 0) or np.any(index >= len(self.values)): raise IndexError(f"Index {index} out of bounds for values of length {len(self.values)}") return self.values[index]
[docs] class TableSampler(FunctionNode): """A FunctionNode that returns values from a table-like data, including a Pandas DataFrame or AstroPy Table. The results returned can be in-order (for testing) or randomly selected with replacement. Note ---- This is NOT a stateful node. When in_order=True the node will always return the first N rows of the table, where N is the number of samples requested. Parameters ---------- data : pandas.DataFrame, astropy.table.Table, or dict The object containing the data to sample. in_order : bool Return the given data in order of the rows (True). If False, performs random sampling with replacement. Default: False Attributes ---------- columns : list of str The names of the columns in the table. data : astropy.table.Table The object containing the data to sample. in_order : bool Return the given data in order of the rows (True). If False, performs random sampling with replacement. Default: False num_values : int The total number of items from which to draw the data. """ def __init__(self, data, in_order=False, **kwargs):
[docs] self.in_order = in_order
self._last_start_index = -1 if isinstance(data, dict): self.data = Table(data) elif isinstance(data, Table): self.data = data.copy() elif isinstance(data, pd.DataFrame): self.data = Table.from_pandas(data) else: raise TypeError("Unsupported data type for TableSampler.") # Check there are some rows. self._num_values = len(self.data) if self._num_values == 0: raise ValueError("No data provided to TableSampler.") # Save a list of the column names.
[docs] self.columns = [col for col in self.data.colnames]
# Initialize the FunctionNode with each column as an output. super().__init__(self._non_func, outputs=self.data.colnames, **kwargs) # If we are using random sampling, add a random index generator. if not self.in_order: self.add_parameter( "selected_table_index", NumpyRandomFunc("integers", low=0, high=self._num_values), "The index of the selected row in the table.", )
[docs] def __len__(self): """Return the number of items in the table.""" return self._num_values
[docs] def reset(self): """Reset the next index to use. Only used for in-order sampling.""" self.next_ind = 0
[docs] def compute(self, graph_state, rng_info=None, **kwargs): """Return the given values. Parameters ---------- graph_state : GraphState An object mapping graph parameters to their values. This object is modified in place as it is sampled. rng_info : numpy.random._generator.Generator, optional A given numpy random number generator to use for this computation. If not provided, the function uses the node's random number generator. **kwargs : dict, optional Additional function arguments. Returns ------- results : any The result of the computation. This return value is provided so that testing functions can easily access the results. """ # Compute the indices to sample. if self.in_order: start_ind = 0 if graph_state.sample_offset is not None: start_ind += graph_state.sample_offset if start_ind == self._last_start_index: warnings.warn( "TableSampler in_order sampling called multiple times with the same sample_offset. " "This may indicate unintended behavior, because the same parameter values are used " "multiple times instead of iterating over the table. Consider to set different " "sample_offset values for different objects or chunks." ) self._last_start_index = start_ind # Check that we have enough points left to sample. end_index = start_ind + graph_state.num_samples if end_index > len(self.data): raise IndexError( f"TableSampler ran out of entries to sample. Index {end_index} out " f"of bounds for a table with {len(self.data)} entries." ) sample_inds = np.arange(start_ind, end_index) else: sample_inds = self.get_param(graph_state, "selected_table_index") # Parse out each column into a separate parameter with the column name as its name. results = [] for attr_name in self.outputs: # If we only have a single sample, return it directly as a scalar. # Otherwise cast it to a numpy array. if graph_state.num_samples == 1: attr_values = self.data[attr_name][sample_inds.item()] else: attr_values = np.asarray(self.data[attr_name][sample_inds]) results.append(attr_values) # Save and return the results. self._save_results(results, graph_state) return results