"""
Scattering effects configuration for synthetic NIRS data generation.
This module provides configuration classes for light scattering effects in NIR
spectra, including particle size effects and scattering coefficient generation.
Note:
For applying scattering effects to spectra, use the operators in
`nirs4all.operators.augmentation.scattering`:
- ParticleSizeAugmenter: Particle size-dependent scattering
- EMSCDistortionAugmenter: EMSC-style scatter distortions
Key Features:
- EMSC-style (Extended Multiplicative Scatter Correction) transformations
- Particle size-dependent scattering simulation
- Scattering coefficient generation for Kubelka-Munk
- Sample-to-sample scatter variation
- Wavelength-dependent scattering (Rayleigh-like)
Physics Background:
Light scattering in particulate samples is complex and depends on:
- Particle size relative to wavelength (Mie vs Rayleigh regimes)
- Particle shape and surface roughness
- Refractive index differences
- Packing density
Rather than implementing full Mie theory (computationally expensive and
may not match real data), this module uses empirical EMSC-style models
that approximate the distortions that chemometric preprocessing corrects.
References:
- Martens, H., Nielsen, J. P., & Engelsen, S. B. (2003). Light scattering
and light absorbance separated by extended multiplicative signal
correction. Application to near-infrared transmission analysis of
powder mixtures. Analytical Chemistry, 75(3), 394-404.
- Kubelka, P. (1948). New contributions to the optics of intensely
light-scattering materials. Part I. JOSA, 38(5), 448-457.
- Dahm, D. J., & Dahm, K. D. (2007). Interpreting Diffuse Reflectance
and Transmittance. NIR Publications.
- Burger, J., & Geladi, P. (2005). Hyperspectral NIR image regression
part I: calibration and correction. Journal of Chemometrics, 19(5‐7),
355-363.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from typing import Optional
import numpy as np
[docs]
class ScatteringModel(str, Enum):
"""Available scattering models."""
EMSC = "emsc" # Extended Multiplicative Scatter Correction style
RAYLEIGH = "rayleigh" # Rayleigh-like (λ⁻⁴ dependence)
MIE_APPROX = "mie_approx" # Simplified Mie approximation
KUBELKA_MUNK = "kubelka_munk" # K-M scattering coefficient model
POLYNOMIAL = "polynomial" # Polynomial baseline scattering
# ============================================================================
# Scattering Model Parameters
# ============================================================================
[docs]
@dataclass
class ParticleSizeDistribution:
"""
Particle size distribution parameters.
Models particle size as a log-normal distribution, which is common
for ground/milled samples in NIR analysis.
Attributes:
mean_size_um: Mean particle size in micrometers.
std_size_um: Standard deviation of particle size in micrometers.
min_size_um: Minimum particle size (lower truncation).
max_size_um: Maximum particle size (upper truncation).
distribution: Type of distribution ('lognormal', 'normal', 'uniform').
"""
mean_size_um: float = 50.0
std_size_um: float = 15.0
min_size_um: float = 5.0
max_size_um: float = 200.0
distribution: str = "lognormal"
[docs]
def sample(self, n_samples: int, rng: np.random.Generator) -> np.ndarray:
"""Sample particle sizes from the distribution."""
if self.distribution == "lognormal":
# Convert to log-space parameters
mu = np.log(self.mean_size_um)
sigma = self.std_size_um / self.mean_size_um
sizes = rng.lognormal(mu, sigma, n_samples)
elif self.distribution == "normal":
sizes = rng.normal(self.mean_size_um, self.std_size_um, n_samples)
elif self.distribution == "uniform":
sizes = rng.uniform(self.min_size_um, self.max_size_um, n_samples)
else:
sizes = rng.normal(self.mean_size_um, self.std_size_um, n_samples)
# Clip to valid range
return np.clip(sizes, self.min_size_um, self.max_size_um)
[docs]
@dataclass
class ParticleSizeConfig:
"""
Configuration for particle size effects.
Attributes:
distribution: Particle size distribution parameters.
reference_size_um: Reference particle size for baseline scattering.
size_effect_strength: How strongly size affects scattering (0-1).
wavelength_exponent: Exponent for wavelength dependence of scattering.
- 4.0 = Rayleigh (particles << wavelength)
- 0.0 = No wavelength dependence
- 1.0-2.0 = Typical for NIR powder samples
include_path_length_effect: Whether particle size affects optical path.
path_length_sensitivity: How strongly size affects path length.
"""
distribution: ParticleSizeDistribution = field(
default_factory=ParticleSizeDistribution
)
reference_size_um: float = 50.0
size_effect_strength: float = 1.0
wavelength_exponent: float = 1.5 # Empirical value for powder samples
include_path_length_effect: bool = True
path_length_sensitivity: float = 0.5
[docs]
@dataclass
class EMSCConfig:
"""
Configuration for EMSC-style scattering transformation.
EMSC models scattering distortion as:
x = a + b*x_ref + d*λ + e*λ² + ...
where a, b are multiplicative/additive scatter, and higher terms
model baseline curvature due to scattering.
Attributes:
polynomial_order: Order of polynomial for wavelength-dependent scatter.
multiplicative_scatter_std: Std dev of multiplicative scatter factor b.
additive_scatter_std: Std dev of additive scatter offset a.
include_wavelength_terms: Whether to include λ, λ² terms.
wavelength_coef_std: Std dev of wavelength coefficient.
reference_spectrum: Optional reference spectrum for EMSC.
"""
polynomial_order: int = 2
multiplicative_scatter_std: float = 0.15
additive_scatter_std: float = 0.05
include_wavelength_terms: bool = True
wavelength_coef_std: float = 0.02
reference_spectrum: Optional[np.ndarray] = None
[docs]
@dataclass
class ScatteringCoefficientConfig:
"""
Configuration for scattering coefficient (S) generation.
For Kubelka-Munk reflectance, we need both absorption (K) and
scattering (S) coefficients. This config controls S(λ) generation.
Attributes:
baseline_scattering: Base scattering coefficient value.
wavelength_exponent: Exponent for wavelength dependence.
S(λ) ∝ λ^(-exponent)
particle_size_factor: How strongly particle size affects S.
sample_variation: Sample-to-sample variation in S.
wavelength_reference_nm: Reference wavelength for normalization.
"""
baseline_scattering: float = 1.0
wavelength_exponent: float = 1.0
particle_size_factor: float = 0.5
sample_variation: float = 0.15
wavelength_reference_nm: float = 1500.0
[docs]
@dataclass
class ScatteringEffectsConfig:
"""
Combined configuration for all scattering effects.
Attributes:
model: Which scattering model to use.
particle_size: Particle size effect configuration.
emsc: EMSC-style transformation configuration.
scattering_coefficient: Scattering coefficient generation config.
enable_particle_size: Whether to apply particle size effects.
enable_emsc: Whether to apply EMSC-style transformation.
"""
model: ScatteringModel = ScatteringModel.EMSC
particle_size: ParticleSizeConfig = field(default_factory=ParticleSizeConfig)
emsc: EMSCConfig = field(default_factory=EMSCConfig)
scattering_coefficient: ScatteringCoefficientConfig = field(
default_factory=ScatteringCoefficientConfig
)
enable_particle_size: bool = True
enable_emsc: bool = True
# ============================================================================
# Module-level exports
# ============================================================================
__all__ = [
# Enums
"ScatteringModel",
# Dataclasses
"ParticleSizeDistribution",
"ParticleSizeConfig",
"EMSCConfig",
"ScatteringCoefficientConfig",
"ScatteringEffectsConfig",
]