Source code for fast_bocpd.models

"""
Observation model parameter wrappers.

The actual implementations are in C, these are just Python wrappers for validation.
"""
import numpy as np
from typing import Optional, Union, List


[docs] class GaussianNIG: """ 1D Gaussian likelihood with Normal-Inverse-Gamma prior. Prior hyperparameters: mu0: Prior mean kappa0: Prior precision scaling (must be > 0) alpha0: Prior shape parameter (must be > 0) beta0: Prior scale parameter (must be > 0) """ def __init__(self, mu0: float, kappa0: float, alpha0: float, beta0: float): if kappa0 <= 0: raise ValueError("kappa0 must be > 0") if alpha0 <= 0: raise ValueError("alpha0 must be > 0") if beta0 <= 0: raise ValueError("beta0 must be > 0") self.mu0 = float(mu0) self.kappa0 = float(kappa0) self.alpha0 = float(alpha0) self.beta0 = float(beta0)
[docs] class StudentTNG: r""" Student-t observation model with Normal-Gamma prior. This model is more robust to outliers than Gaussian-NIG. Two modes are supported: * **Fixed :math:`\nu`** – pass a scalar ``nu`` (standard Student-t). * **Grid :math:`\nu`** – pass ``nu`` as a list/array and optionally ``nu_prior`` to place a discrete prior over different degrees of freedom. Parameters ---------- mu0 : float Prior mean. kappa0 : float Prior precision scaling (> 0). alpha0 : float Prior shape parameter (> 0). beta0 : float Prior rate parameter (> 0). nu : float or array-like, optional Degrees of freedom. When ``nu`` is an array we infer the best value online via a grid mixture. ``nu = 1`` behaves like Cauchy, ``nu`` in [3, 5] is often used for financial data, and :math:`\nu \to \infty` approaches Gaussian. nu_prior : array-like, optional Prior weights over ``nu`` (grid mode). Defaults to uniform weights. Examples -------- Fixed ν:: >>> model = StudentTNG(mu0=0, kappa0=1, alpha0=1, beta0=1, nu=3.0) Grid ν:: >>> model = StudentTNG(mu0=0, kappa0=1, alpha0=1, beta0=1, ... nu=[2, 3, 5, 10, 20]) Grid ν with custom prior:: >>> model = StudentTNG(mu0=0, kappa0=1, alpha0=1, beta0=1, ... nu=[2, 3, 5], nu_prior=[0.2, 0.5, 0.3]) """ def __init__( self, mu0: float, kappa0: float, alpha0: float, beta0: float, nu: Union[float, List[float], np.ndarray] = 3.0, nu_prior: Optional[Union[List[float], np.ndarray]] = None ): if kappa0 <= 0: raise ValueError("kappa0 must be > 0") if alpha0 <= 0: raise ValueError("alpha0 must be > 0") if beta0 <= 0: raise ValueError("beta0 must be > 0") self.mu0 = float(mu0) self.kappa0 = float(kappa0) self.alpha0 = float(alpha0) self.beta0 = float(beta0) # Handle both fixed ν and grid ν (support scalar, list, array, tuple) nu_arr = np.asarray(nu, dtype=np.float64) if nu_arr.ndim == 0: # Fixed ν mode (scalar) nu_val = float(nu_arr) if nu_val <= 0: raise ValueError("nu (degrees of freedom) must be > 0") self.nu = nu_val self.is_grid = False self.nu_grid = None self.nu_prior = None self.K = None else: # Grid mode (array-like) self.nu_grid = np.ascontiguousarray(nu_arr, dtype=np.float64) if len(self.nu_grid) == 0: raise ValueError("nu_grid cannot be empty") if np.any(self.nu_grid <= 0): raise ValueError("All nu values must be > 0") if not np.all(np.isfinite(self.nu_grid)): raise ValueError("All nu values must be finite") self.K = len(self.nu_grid) self.is_grid = True self.nu = None # Not used in grid mode # Handle prior if nu_prior is None: # Uniform prior self.nu_prior = np.ones(self.K, dtype=np.float64) self.nu_prior /= self.K else: prior_arr = np.asarray(nu_prior, dtype=np.float64) if prior_arr.ndim != 1: raise ValueError("nu_prior must be 1-D array-like") self.nu_prior = np.ascontiguousarray(prior_arr, dtype=np.float64) if len(self.nu_prior) != self.K: raise ValueError(f"nu_prior length ({len(self.nu_prior)}) must match nu_grid length ({self.K})") if np.any(self.nu_prior < 0): raise ValueError("All nu_prior values must be >= 0") if not np.all(np.isfinite(self.nu_prior)): raise ValueError("All nu_prior values must be finite") if np.sum(self.nu_prior) == 0: raise ValueError("nu_prior must have at least one positive value") # Normalize self.nu_prior = self.nu_prior / np.sum(self.nu_prior)
[docs] class PoissonGamma: """ Poisson likelihood with Gamma prior on rate parameter (count data). Conjugate Bayesian model for non-negative integer count data. Predictive distribution is Negative Binomial. Use this for: - Event counts (clicks, transactions, arrivals) - Discrete data with λ > 0 - Overdispersed counts (vs. fixed-rate Poisson) Prior hyperparameters: alpha0: Gamma prior shape (must be > 0) Controls prior belief about rate variability Larger values = more concentrated prior beta0: Gamma prior rate (must be > 0) Controls prior belief about expected rate Prior mean rate = alpha0 / beta0 Data requirements: - Must be non-negative integers (0, 1, 2, ...) - In Python, pass as int, float with .0, or integer array - Non-integer or negative values will be rejected (strict=True) Examples: # Event counts with vague prior >>> model = PoissonGamma(alpha0=1.0, beta0=1.0) # Prior belief: mean rate ≈ 5.0, concentrated >>> model = PoissonGamma(alpha0=50.0, beta0=10.0) # mean = 50/10 = 5 # Disable strict validation (use with caution) >>> model = PoissonGamma(alpha0=1.0, beta0=1.0, strict=False) """ def __init__( self, alpha0: float, beta0: float, *, strict: bool = True ): # Validate hyperparameters (always, even if strict=False) if not isinstance(alpha0, (int, float, np.number)): raise TypeError(f"alpha0 must be numeric, got {type(alpha0)}") if not isinstance(beta0, (int, float, np.number)): raise TypeError(f"beta0 must be numeric, got {type(beta0)}") alpha0 = float(alpha0) beta0 = float(beta0) if not np.isfinite(alpha0): raise ValueError("alpha0 must be finite") if not np.isfinite(beta0): raise ValueError("beta0 must be finite") if alpha0 <= 0: raise ValueError("alpha0 must be > 0") if beta0 <= 0: raise ValueError("beta0 must be > 0") self.alpha0 = alpha0 self.beta0 = beta0 self.strict = bool(strict)
[docs] def validate_data(self, x): """ Validate a single observation (used in update()). Args: x: Observation (should be non-negative integer) Raises: ValueError: If x is invalid and strict=True """ if not self.strict: return # Skip validation # Check finite if not np.isfinite(x): raise ValueError(f"Observation must be finite, got {x}") # Check non-negative if x < 0: raise ValueError(f"Poisson counts must be >= 0, got {x}") # Check integer-ness (tolerance for floating point) if abs(x - round(x)) > 1e-9: raise ValueError( f"Poisson counts must be integers, got {x}. " f"If your data is truly continuous, use GaussianNIG or StudentTNG instead." )
[docs] def validate_batch(self, data: np.ndarray) -> np.ndarray: """ Validate and convert batch data to contiguous float64 array. Args: data: Array-like of observations Returns: Validated, contiguous float64 array Raises: ValueError: If data is invalid and strict=True """ # Convert to numpy array if not already data = np.asarray(data) # Fast-path for integer dtypes (common case) if np.issubdtype(data.dtype, np.integer): if self.strict and np.any(data < 0): raise ValueError("Poisson counts must be >= 0") return np.ascontiguousarray(data, dtype=np.float64) # Float array validation (if strict) if self.strict: if not np.all(np.isfinite(data)): raise ValueError("All observations must be finite") if np.any(data < 0): raise ValueError("Poisson counts must be >= 0") # Check integer-ness if not np.allclose(data, np.round(data), atol=1e-9): raise ValueError( "Poisson counts must be integers. " "If your data is truly continuous, use GaussianNIG or StudentTNG instead." ) return np.ascontiguousarray(data, dtype=np.float64)
[docs] class BernoulliBeta: """ Bernoulli likelihood with Beta prior on success probability (binary data). Conjugate Bayesian model for binary outcomes (0/1, success/failure). Predictive distribution is Beta-Bernoulli. Use this for: - Binary classification (yes/no, pass/fail) - Conversion rates (click/no-click) - A/B testing outcomes - Any {0, 1} data Prior hyperparameters: alpha0: Beta prior successes (must be > 0) Controls prior belief about success probability Larger values = more weight on high success rates beta0: Beta prior failures (must be > 0) Controls prior belief about failure probability Larger values = more weight on low success rates Prior mean = alpha0 / (alpha0 + beta0) Data requirements: - Must be binary: 0 or 1 - In Python, pass as int, bool, or float with .0 - Non-binary values will be rejected (strict=True) Examples: # Vague prior (uniform over [0,1]) >>> model = BernoulliBeta(alpha0=1.0, beta0=1.0) # Prior belief: success rate ≈ 0.3, concentrated >>> model = BernoulliBeta(alpha0=30.0, beta0=70.0) # mean = 30/100 = 0.3 # Disable strict validation (use with caution) >>> model = BernoulliBeta(alpha0=1.0, beta0=1.0, strict=False) """ def __init__( self, alpha0: float, beta0: float, *, strict: bool = True ): # Validate hyperparameters (always, even if strict=False) if not isinstance(alpha0, (int, float, np.number)): raise TypeError(f"alpha0 must be numeric, got {type(alpha0)}") if not isinstance(beta0, (int, float, np.number)): raise TypeError(f"beta0 must be numeric, got {type(beta0)}") alpha0 = float(alpha0) beta0 = float(beta0) if not np.isfinite(alpha0): raise ValueError("alpha0 must be finite") if not np.isfinite(beta0): raise ValueError("beta0 must be finite") if alpha0 <= 0: raise ValueError("alpha0 must be > 0") if beta0 <= 0: raise ValueError("beta0 must be > 0") self.alpha0 = alpha0 self.beta0 = beta0 self.strict = bool(strict)
[docs] def validate_data(self, x): """ Validate a single observation (used in update()). Args: x: Observation (should be 0 or 1) Raises: ValueError: If x is invalid and strict=True """ if not self.strict: return # Skip validation # Check finite if not np.isfinite(x): raise ValueError(f"Observation must be finite, got {x}") # Check binary-ness (tolerance for floating point) x_rounded = round(x) if abs(x - x_rounded) > 1e-9: raise ValueError( f"Bernoulli data must be binary (0 or 1), got {x}. " f"If your data is continuous, use GaussianNIG or StudentTNG instead." ) # Check range if x_rounded not in (0, 1): raise ValueError( f"Bernoulli data must be 0 or 1, got {x}. " f"If your data is continuous, use GaussianNIG or StudentTNG instead." )
[docs] def validate_batch(self, data: np.ndarray) -> np.ndarray: """ Validate and convert batch data to contiguous float64 array. Args: data: Array-like of observations Returns: Validated, contiguous float64 array Raises: ValueError: If data is invalid and strict=True """ # Convert to numpy array if not already data = np.asarray(data) # Fast-path for boolean dtype if data.dtype == np.bool_: return np.ascontiguousarray(data, dtype=np.float64) # Fast-path for integer dtypes (common case) if np.issubdtype(data.dtype, np.integer): if self.strict: unique_vals = np.unique(data) if not np.all((unique_vals == 0) | (unique_vals == 1)): raise ValueError("Bernoulli data must be binary (0 or 1)") return np.ascontiguousarray(data, dtype=np.float64) # Float array validation (if strict) if self.strict: if not np.all(np.isfinite(data)): raise ValueError("All observations must be finite") # Check binary-ness if not np.allclose(data, np.round(data), atol=1e-9): raise ValueError( "Bernoulli data must be binary (0 or 1). " "If your data is continuous, use GaussianNIG or StudentTNG instead." ) # Check range rounded = np.round(data) if not np.all((rounded == 0) | (rounded == 1)): raise ValueError( "Bernoulli data must be 0 or 1. " "If your data is continuous, use GaussianNIG or StudentTNG instead." ) return np.ascontiguousarray(data, dtype=np.float64)
[docs] class BinomialBeta: """ Binomial likelihood with Beta prior on success probability (fixed-N count data). Conjugate Bayesian model for binomial outcomes with fixed number of trials. Predictive distribution is Beta-Binomial. Use this for: - Conversion rates per period (k successes in N trials) - A/B testing with fixed sample sizes - Aggregated binary outcomes - Any {0, 1, ..., N} count data Prior hyperparameters: alpha0: Beta prior successes (must be > 0) Controls prior belief about success probability Larger values = more weight on high success rates beta0: Beta prior failures (must be > 0) Controls prior belief about failure probability Larger values = more weight on low success rates Prior mean = alpha0 / (alpha0 + beta0) n_trials: Fixed number of trials per observation (must be >= 1) Each observation is k ∈ {0, 1, ..., n_trials} Data requirements: - Must be integers: 0, 1, 2, ..., n_trials - In Python, pass as int or float with .0 - Values > n_trials or non-integer will be rejected (strict=True) Special case: - n_trials=1 reduces to Bernoulli-Beta (identical predictive) Examples: # Conversion rate: 10 trials per period >>> model = BinomialBeta(alpha0=1.0, beta0=1.0, n_trials=10) # Prior belief: success rate ≈ 0.3, N=20 trials >>> model = BinomialBeta(alpha0=30.0, beta0=70.0, n_trials=20) # Disable strict validation (use with caution) >>> model = BinomialBeta(alpha0=1.0, beta0=1.0, n_trials=10, strict=False) """ def __init__( self, alpha0: float, beta0: float, n_trials: int, *, strict: bool = True ): # Validate hyperparameters (always, even if strict=False) if not isinstance(alpha0, (int, float, np.number)): raise TypeError(f"alpha0 must be numeric, got {type(alpha0)}") if not isinstance(beta0, (int, float, np.number)): raise TypeError(f"beta0 must be numeric, got {type(beta0)}") if not isinstance(n_trials, (int, np.integer)): raise TypeError(f"n_trials must be integer, got {type(n_trials)}") alpha0 = float(alpha0) beta0 = float(beta0) n_trials = int(n_trials) if not np.isfinite(alpha0): raise ValueError("alpha0 must be finite") if not np.isfinite(beta0): raise ValueError("beta0 must be finite") if alpha0 <= 0: raise ValueError("alpha0 must be > 0") if beta0 <= 0: raise ValueError("beta0 must be > 0") if n_trials < 1: raise ValueError("n_trials must be >= 1") self.alpha0 = alpha0 self.beta0 = beta0 self.n_trials = n_trials self.strict = bool(strict)
[docs] def validate_data(self, k): """ Validate a single observation (used in update()). Args: k: Observation (should be integer in 0..n_trials) Raises: ValueError: If k is invalid and strict=True """ if not self.strict: return # Skip validation # Check finite if not np.isfinite(k): raise ValueError(f"Observation must be finite, got {k}") # Check non-negative if k < 0: raise ValueError(f"Binomial counts must be >= 0, got {k}") # Check integer-ness (tolerance for floating point) if abs(k - round(k)) > 1e-9: raise ValueError( f"Binomial counts must be integers, got {k}. " f"If your data is truly continuous, use GaussianNIG or StudentTNG instead." ) # Check range if round(k) > self.n_trials: raise ValueError( f"Binomial count must be <= n_trials ({self.n_trials}), got {k}" )
[docs] def validate_batch(self, data: np.ndarray) -> np.ndarray: """ Validate and convert batch data to contiguous float64 array. Args: data: Array-like of observations Returns: Validated, contiguous float64 array Raises: ValueError: If data is invalid and strict=True """ # Convert to numpy array if not already data = np.asarray(data) # Fast-path for integer dtypes (common case) if np.issubdtype(data.dtype, np.integer): if self.strict: if np.any(data < 0): raise ValueError("Binomial counts must be >= 0") if np.any(data > self.n_trials): raise ValueError(f"Binomial counts must be <= n_trials ({self.n_trials})") return np.ascontiguousarray(data, dtype=np.float64) # Float array validation (if strict) if self.strict: if not np.all(np.isfinite(data)): raise ValueError("All observations must be finite") if np.any(data < 0): raise ValueError("Binomial counts must be >= 0") # Check integer-ness if not np.allclose(data, np.round(data), atol=1e-9): raise ValueError( "Binomial counts must be integers. " "If your data is truly continuous, use GaussianNIG or StudentTNG instead." ) # Check range on rounded values (to handle near-integer floats correctly) rounded = np.round(data) if np.any(rounded > self.n_trials): raise ValueError(f"Binomial counts must be <= n_trials ({self.n_trials})") return np.ascontiguousarray(data, dtype=np.float64)
[docs] class GammaGamma: """ Gamma likelihood with Gamma prior on rate parameter (fixed-shape scale data). Conjugate Bayesian model for positive continuous data with fixed shape. Predictive distribution is Beta-prime-like. Likelihood: x ~ Gamma(shape=k, rate=λ) [fixed k, unknown λ] Prior: λ ~ Gamma(alpha0, beta0) Use this for: - Positive continuous data with fixed shape - Exponential data (special case k=1) - Waiting times, survival data, reliability analysis - Data with constant coefficient of variation Prior hyperparameters: alpha0: Prior shape parameter on rate λ (must be > 0) Controls prior belief about the rate Larger values = stronger prior belief beta0: Prior rate parameter on rate λ (must be > 0) Controls scale of prior on rate Prior mean E[λ] = alpha0 / beta0 shape: Fixed shape parameter k of Gamma likelihood (default 1.0) Recommended: k >= 1 for well-behaved densities Special case: k=1 gives Exponential distribution Data requirements: - Must be positive: x > 0 - x=0 is rejected with -∞ log density (k > 1) or special case (k ≈ 1) - In strict mode: enforces shape >= 1 (recommended for stability) Parameterization note: Uses RATE parameterization (not scale): - Higher rate → smaller values - Mean of Gamma(k, λ) = k/λ - Variance = k/λ² Special case: - shape=1.0: Reduces to Exponential-Gamma (conjugate for exponential data) Examples: # Exponential waiting times (default) >>> model = GammaGamma(alpha0=1.0, beta0=1.0) # shape=1.0 # Prior: mean rate ≈ 2.0, shape=2.0 >>> model = GammaGamma(alpha0=10.0, beta0=5.0, shape=2.0) # Disable strict mode (allow shape < 1, use with caution) >>> model = GammaGamma(alpha0=1.0, beta0=1.0, shape=0.5, strict=False) """ def __init__( self, alpha0: float, beta0: float, shape: float = 1.0, *, strict: bool = True ): # Validate hyperparameters (always, even if strict=False) if not isinstance(alpha0, (int, float, np.number)): raise TypeError(f"alpha0 must be numeric, got {type(alpha0)}") if not isinstance(beta0, (int, float, np.number)): raise TypeError(f"beta0 must be numeric, got {type(beta0)}") if not isinstance(shape, (int, float, np.number)): raise TypeError(f"shape must be numeric, got {type(shape)}") alpha0 = float(alpha0) beta0 = float(beta0) shape = float(shape) if not np.isfinite(alpha0): raise ValueError("alpha0 must be finite") if not np.isfinite(beta0): raise ValueError("beta0 must be finite") if not np.isfinite(shape): raise ValueError("shape must be finite") if alpha0 <= 0: raise ValueError("alpha0 must be > 0") if beta0 <= 0: raise ValueError("beta0 must be > 0") if shape <= 0: raise ValueError("shape must be > 0") # Strict mode: enforce shape >= 1 recommendation if strict and shape < 1.0: raise ValueError( "shape must be >= 1.0 in strict mode (recommended for well-behaved densities). " "Set strict=False to allow shape < 1, but use with caution." ) self.alpha0 = alpha0 self.beta0 = beta0 self.shape = shape self.strict = bool(strict)
[docs] def validate_data(self, x): """ Validate a single observation (used in update()). Args: x: Observation (should be positive) Raises: ValueError: If x is invalid and strict=True """ if not self.strict: return # Skip validation # Check finite if not np.isfinite(x): raise ValueError(f"Observation must be finite, got {x}") # Check positive (x=0 is allowed but gives -∞ density in most cases) if x < 0: raise ValueError(f"Gamma data must be non-negative, got {x}")
[docs] def validate_batch(self, data: np.ndarray) -> np.ndarray: """ Validate and convert batch data to contiguous float64 array. Args: data: Array-like of observations Returns: Validated, contiguous float64 array Raises: ValueError: If data is invalid and strict=True """ # Convert to numpy array if not already data = np.asarray(data) # Validation (if strict) if self.strict: if not np.all(np.isfinite(data)): raise ValueError("All observations must be finite") if np.any(data < 0): raise ValueError("Gamma data must be non-negative") return np.ascontiguousarray(data, dtype=np.float64)