Source code for nirs4all.data.synthetic.components

"""
Spectral components for synthetic NIRS spectra generation.

This module provides the core building blocks for defining NIR absorption bands
and spectral components based on physical spectroscopy principles.

Classes:
    NIRBand: Represents a single NIR absorption band with Voigt profile.
    SpectralComponent: A chemical compound or functional group with multiple bands.
    ComponentLibrary: Collection of spectral components for generation.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple

import numpy as np
from scipy.special import voigt_profile


[docs] @dataclass class NIRBand: """ Represents a single NIR absorption band. This class models an absorption band using a Voigt profile, which is the convolution of Gaussian (thermal broadening) and Lorentzian (pressure broadening) line shapes. Attributes: center: Central wavelength in nm. sigma: Gaussian width (standard deviation) in nm. gamma: Lorentzian width (HWHM) in nm. Use 0 for pure Gaussian. amplitude: Peak amplitude in absorbance units. name: Descriptive name of the band (e.g., "O-H 1st overtone"). Example: >>> band = NIRBand(center=1450, sigma=25, gamma=3, amplitude=0.8) >>> wavelengths = np.arange(1400, 1500, 1) >>> spectrum = band.compute(wavelengths) """ center: float sigma: float gamma: float = 0.0 amplitude: float = 1.0 name: str = ""
[docs] def compute(self, wavelengths: np.ndarray) -> np.ndarray: """ Compute the band profile at given wavelengths using Voigt profile. Args: wavelengths: Array of wavelengths in nm at which to evaluate the band. Returns: Array of absorbance values at each wavelength. Note: When gamma=0, a pure Gaussian profile is used for efficiency. Otherwise, the full Voigt profile (Gaussian ⊗ Lorentzian) is computed. """ if self.gamma <= 0: # Pure Gaussian for efficiency return self.amplitude * np.exp(-0.5 * ((wavelengths - self.center) / self.sigma) ** 2) else: # Voigt profile (convolution of Gaussian and Lorentzian) return self.amplitude * voigt_profile( wavelengths - self.center, self.sigma, self.gamma ) * self.sigma * np.sqrt(2 * np.pi)
[docs] @dataclass class SpectralComponent: """ A spectral component representing a chemical compound or functional group. Each component consists of multiple absorption bands that together define the characteristic NIR signature of the compound. Attributes: name: Component name (e.g., "water", "protein", "lipid"). bands: List of NIRBand objects defining the spectral signature. correlation_group: Optional group ID for components that should have correlated concentrations (e.g., protein and nitrogen compounds). Example: >>> water = SpectralComponent( ... name="water", ... bands=[ ... NIRBand(center=1450, sigma=25, gamma=3, amplitude=0.8), ... NIRBand(center=1940, sigma=30, gamma=4, amplitude=1.0), ... ], ... correlation_group=1 ... ) >>> wavelengths = np.arange(1000, 2500, 2) >>> spectrum = water.compute(wavelengths) """ name: str bands: List[NIRBand] = field(default_factory=list) correlation_group: Optional[int] = None
[docs] def compute(self, wavelengths: np.ndarray) -> np.ndarray: """ Compute the full component spectrum by summing all bands. Args: wavelengths: Array of wavelengths in nm at which to evaluate. Returns: Array of absorbance values representing the combined spectrum. """ spectrum = np.zeros_like(wavelengths, dtype=np.float64) for band in self.bands: spectrum += band.compute(wavelengths) return spectrum
[docs] class ComponentLibrary: """ Library of spectral components for synthetic NIRS generation. Supports both predefined components (based on known NIR band assignments) and programmatically generated random components for research purposes. Attributes: rng: NumPy random generator for reproducibility. Example: >>> # Create from predefined components >>> library = ComponentLibrary.from_predefined( ... ["water", "protein", "lipid"], ... random_state=42 ... ) >>> >>> # Or generate random components >>> library = ComponentLibrary(random_state=42) >>> library.generate_random_library(n_components=5) >>> >>> # Compute all component spectra >>> wavelengths = np.arange(1000, 2500, 2) >>> E = library.compute_all(wavelengths) # shape: (n_components, n_wavelengths) """ def __init__(self, random_state: Optional[int] = None) -> None: """ Initialize the component library. Args: random_state: Random seed for reproducibility. """ self.rng = np.random.default_rng(random_state) self._components: Dict[str, SpectralComponent] = {}
[docs] @classmethod def from_predefined( cls, component_names: Optional[List[str]] = None, random_state: Optional[int] = None, ) -> ComponentLibrary: """ Create a library from predefined spectral components. Args: component_names: List of component names to include. If None, includes all predefined components. random_state: Random seed for reproducibility. Returns: ComponentLibrary instance populated with predefined components. Raises: ValueError: If an unknown component name is specified. Example: >>> library = ComponentLibrary.from_predefined( ... ["water", "protein", "lipid"] ... ) """ from ._constants import get_predefined_components library = cls(random_state=random_state) predefined = get_predefined_components() if component_names is None: component_names = list(predefined.keys()) for name in component_names: if name in predefined: library._components[name] = predefined[name] else: available = list(predefined.keys()) raise ValueError( f"Unknown predefined component: '{name}'. " f"Available components: {available}" ) return library
[docs] def add_component(self, component: SpectralComponent) -> ComponentLibrary: """ Add a spectral component to the library. Args: component: SpectralComponent to add. Returns: Self for method chaining. """ self._components[component.name] = component return self
[docs] def add_random_component( self, name: str, n_bands: int = 3, wavelength_range: Tuple[float, float] = (1000, 2500), zones: Optional[List[Tuple[float, float]]] = None, ) -> SpectralComponent: """ Generate and add a random spectral component. Creates a component with randomly placed absorption bands within the specified wavelength range or zones. Args: name: Component name. n_bands: Number of absorption bands to generate. wavelength_range: Overall wavelength range for band placement. zones: Optional list of (min, max) wavelength zones for band centers. If None, uses default NIR-relevant zones. Returns: The generated SpectralComponent. Example: >>> library = ComponentLibrary(random_state=42) >>> component = library.add_random_component( ... "random_compound", ... n_bands=4, ... wavelength_range=(1000, 2500) ... ) """ from ._constants import DEFAULT_NIR_ZONES if zones is None: zones = DEFAULT_NIR_ZONES bands = [] for i in range(n_bands): zone = zones[self.rng.integers(0, len(zones))] center = self.rng.uniform(*zone) sigma = self.rng.uniform(10, 30) gamma = self.rng.uniform(0, 5) amplitude = self.rng.lognormal(mean=-0.5, sigma=0.5) bands.append( NIRBand( center=center, sigma=sigma, gamma=gamma, amplitude=amplitude, name=f"band_{i}", ) ) component = SpectralComponent(name=name, bands=bands) self._components[name] = component return component
[docs] def generate_random_library( self, n_components: int = 5, n_bands_range: Tuple[int, int] = (2, 6), ) -> ComponentLibrary: """ Generate a library of random spectral components. Args: n_components: Number of components to generate. n_bands_range: Range (min, max) for number of bands per component. Returns: Self for method chaining. Example: >>> library = ComponentLibrary(random_state=42) >>> library.generate_random_library(n_components=5, n_bands_range=(2, 5)) """ for i in range(n_components): n_bands = self.rng.integers(*n_bands_range) self.add_random_component(f"component_{i}", n_bands=n_bands) return self
@property def components(self) -> Dict[str, SpectralComponent]: """Get all components in the library.""" return self._components @property def n_components(self) -> int: """Number of components in the library.""" return len(self._components) @property def component_names(self) -> List[str]: """Get list of component names in order.""" return list(self._components.keys())
[docs] def compute_all(self, wavelengths: np.ndarray) -> np.ndarray: """ Compute spectra for all components at given wavelengths. Args: wavelengths: Array of wavelengths in nm. Returns: Array of shape (n_components, n_wavelengths) containing the spectrum of each component. Example: >>> library = ComponentLibrary.from_predefined(["water", "protein"]) >>> wavelengths = np.arange(1000, 2500, 2) >>> E = library.compute_all(wavelengths) >>> print(E.shape) (2, 751) """ return np.array([comp.compute(wavelengths) for comp in self._components.values()])
[docs] def __len__(self) -> int: """Return number of components.""" return self.n_components
[docs] def __iter__(self): """Iterate over components.""" return iter(self._components.values())
[docs] def __getitem__(self, name: str) -> SpectralComponent: """Get component by name.""" return self._components[name]
[docs] def __contains__(self, name: str) -> bool: """Check if component exists by name.""" return name in self._components