Source code for pisces.math_utils.sampling

"""Module for sampling from various statistical distributions.

This module provides functions to sample particle positions from a probability density function (PDF)
and a cumulative distribution function (CDF) defined at discrete positions. It implements
inverse transform sampling using linear interpolation, allowing for efficient sampling from
discrete distributions.
"""

import numpy as np
from scipy.integrate import cumulative_trapezoid

from pisces.utilities.rng import __RNG__


[docs] def sample_from_pdf(x: np.ndarray, y: np.ndarray, num_particles: int) -> np.ndarray: """Sample particle positions from a probability density function (PDF) defined at discrete positions. This function first constructs a normalized cumulative distribution function (CDF) from the input PDF using the trapezoidal rule, and then uses inverse transform sampling to generate samples. Parameters ---------- x : np.ndarray A strictly increasing 1D array representing the domain of the PDF. y : np.ndarray A 1D array of the same length as `x`, containing the PDF values at each position. Values must be non-negative. num_particles : int The number of samples to draw from the distribution. Returns ------- np.ndarray A 1D array of shape `(num_particles,)` containing particle positions sampled from the input distribution. Raises ------ ValueError If inputs are not 1D arrays of the same length, if `x` is not strictly increasing, or if `y` contains negative values. Notes ----- This function uses inverse transform sampling: it constructs a CDF via cumulative trapezoidal integration, normalizes it to [0, 1], and maps uniform random values through the inverse CDF (using linear interpolation). """ # Validate inputs. We need to check the dimensionality and the lengths. Then we # ensure that `x` is increasing and `y` is non-negative. if x.ndim != 1 or y.ndim != 1: raise ValueError("x and y must be 1D arrays.") if len(x) != len(y): raise ValueError("x and y must have the same length.") if not np.all(np.diff(x) > 0): raise ValueError("x must be strictly increasing.") if np.any(y < 0): raise ValueError("PDF values must be non-negative.") # Compute the cumulative integral using the trapezoidal rule cumulative_integral = cumulative_trapezoid(y, x, initial=0) return sample_from_cdf(x, cumulative_integral, num_particles)
[docs] def sample_from_cdf(x: np.ndarray, cdf: np.ndarray, num_particles: int) -> np.ndarray: """Sample particle positions from a cumulative distribution function (CDF) defined at discrete positions. This function implements inverse transform sampling using linear interpolation. It assumes the input CDF is monotonically increasing and defined on a 1D grid `x`. Parameters ---------- x : np.ndarray A strictly increasing 1D array of positions where the CDF is defined. cdf : np.ndarray A 1D array of the same length as `x`, containing the CDF values at each position. Values must be non-decreasing and bounded in [0, 1] (after normalization). num_particles : int The number of samples to draw from the distribution. Returns ------- np.ndarray A 1D array of shape `(num_particles,)` containing particle positions sampled from the input distribution. Raises ------ ValueError If inputs are not 1D arrays of the same length, if `x` is not strictly increasing, or if `cdf` is not non-decreasing. Notes ----- This function uses inverse transform sampling: uniformly distributed random values on [0, 1] are mapped through the inverse CDF (using linear interpolation) to obtain samples from the target distribution. """ # Input validation if x.ndim != 1 or cdf.ndim != 1: raise ValueError("x and cdf must be 1D arrays.") if len(x) != len(cdf): raise ValueError("x and cdf must have the same length.") if not np.all(np.diff(x) > 0): raise ValueError("x must be strictly increasing.") if not np.all(np.diff(cdf) >= 0): raise ValueError("cdf must be non-decreasing.") # Normalize the CDF to [0, 1] if not already cdf_min = cdf[0] cdf_max = cdf[-1] if cdf_max == cdf_min: raise ValueError("CDF has zero dynamic range (all values are equal).") cdf = (cdf - cdf_min) / (cdf_max - cdf_min) # Draw uniform samples and map them through the inverse CDF uniform_samples = __RNG__.uniform(0.0, 1.0, num_particles) return np.interp(uniform_samples, cdf, x)