"""
Instrument archetype simulation for synthetic NIRS data generation.
This module provides realistic simulation of different NIR instrument types,
including their optical characteristics, noise models, and measurement
configurations. It also supports multi-sensor systems that stitch together
signal chunks from different wavelength ranges, and multi-scan averaging.
Key Features:
- 20+ instrument archetypes covering benchtop, handheld, process, and embedded
- Multi-sensor stitching simulation (combining multiple detector ranges)
- Multi-scan averaging with realistic noise reduction
- Detector-specific noise models (shot, thermal, 1/f)
- Wavelength calibration effects
- Stray light and etalon interference
References:
- Workman Jr, J., & Weyer, L. (2012). Practical Guide and Spectral Atlas
for Interpretive Near-Infrared Spectroscopy. CRC Press.
- Siesler, H. W., Ozaki, Y., Kawata, S., & Heise, H. M. (2002). Near-Infrared
Spectroscopy: Principles, Instruments, Applications. Wiley-VCH.
- ASTM E1944-98(2017): Standard Practice for Describing and Measuring
Performance of NIR Instruments.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np
from scipy.ndimage import gaussian_filter1d
from .wavenumber import wavenumber_to_wavelength, wavelength_to_wavenumber
[docs]
class InstrumentCategory(str, Enum):
"""Categories of NIR instruments."""
BENCHTOP = "benchtop" # High-end laboratory instruments
HANDHELD = "handheld" # Portable/mobile instruments
PROCESS = "process" # Industrial inline/atline
EMBEDDED = "embedded" # MEMS-based compact modules
FT_NIR = "ft_nir" # Fourier-Transform NIR
FILTER = "filter" # Discrete filter instruments
DIODE_ARRAY = "diode_array" # Diode array detectors
[docs]
class DetectorType(str, Enum):
"""Types of NIR detectors."""
SI = "si" # Silicon (400-1100 nm)
INGAAS = "ingaas" # InGaAs (900-1700 nm)
INGAAS_EXTENDED = "ingaas_ext" # Extended InGaAs (900-2500 nm)
PBS = "pbs" # Lead sulfide (1000-3000 nm)
PBSE = "pbse" # Lead selenide (1500-5000 nm)
MEMS = "mems" # MEMS-based spectrometers
MCT = "mct" # Mercury cadmium telluride (cooled)
[docs]
class MonochromatorType(str, Enum):
"""Types of wavelength selection mechanisms."""
GRATING = "grating" # Diffraction grating
FT = "fourier_transform" # Interferometer (FTIR)
FILTER_WHEEL = "filter_wheel" # Discrete filters
AOTF = "aotf" # Acousto-optic tunable filter
LVF = "lvf" # Linear variable filter
DMD = "dmd" # Digital micromirror device
FABRY_PEROT = "fabry_perot" # MEMS Fabry-Perot
[docs]
@dataclass
class SensorConfig:
"""
Configuration for a single sensor/detector in a multi-sensor system.
Multi-sensor instruments use multiple detectors with different wavelength
ranges, then stitch the signals together. This is common in extended-range
instruments (e.g., 400-2500 nm coverage using Si + InGaAs detectors).
Attributes:
detector_type: Type of detector for this sensor.
wavelength_range: (start, end) wavelength range in nm.
spectral_resolution: Resolution in nm (FWHM).
noise_level: Relative noise level (1.0 = standard).
gain: Detector gain multiplier.
overlap_range: Wavelength overlap with adjacent sensor for stitching (nm).
"""
detector_type: DetectorType
wavelength_range: Tuple[float, float]
spectral_resolution: float = 8.0
noise_level: float = 1.0
gain: float = 1.0
overlap_range: float = 20.0 # nm of overlap for smooth stitching
[docs]
@dataclass
class MultiSensorConfig:
"""
Configuration for multi-sensor spectral stitching.
Modern NIR instruments often use multiple sensors/detectors to cover
wide wavelength ranges. This config controls how the signals are combined.
Attributes:
enabled: Whether multi-sensor mode is enabled.
sensors: List of SensorConfig for each sensor.
stitch_method: Method for combining overlapping regions.
Options: 'weighted', 'average', 'first', 'last', 'optimal'
stitch_smoothing: Smoothing window (nm) at stitch boundaries.
add_stitch_artifacts: Whether to simulate stitching artifacts.
artifact_intensity: Intensity of stitching artifacts (0-1).
"""
enabled: bool = False
sensors: List[SensorConfig] = field(default_factory=list)
stitch_method: str = "weighted" # weighted, average, first, last, optimal
stitch_smoothing: float = 10.0 # nm
add_stitch_artifacts: bool = True
artifact_intensity: float = 0.02
[docs]
@dataclass
class MultiScanConfig:
"""
Configuration for multi-scan averaging/accumulation.
Real instruments often acquire multiple scans per sample and average
them to improve signal-to-noise ratio. This config simulates that process.
Attributes:
enabled: Whether multi-scan mode is enabled.
n_scans: Number of scans to simulate and average.
averaging_method: How to combine scans.
Options: 'mean', 'median', 'weighted', 'savgol'
scan_to_scan_noise: Additional noise between scans (simulates drift).
wavelength_jitter: Random wavelength shift between scans (nm).
discard_outliers: Whether to discard outlier scans.
outlier_threshold: Z-score threshold for outlier detection.
"""
enabled: bool = False
n_scans: int = 16
averaging_method: str = "mean" # mean, median, weighted, savgol
scan_to_scan_noise: float = 0.001 # Additional noise between scans
wavelength_jitter: float = 0.05 # nm of wavelength shift between scans
discard_outliers: bool = False
outlier_threshold: float = 3.0 # Z-score threshold
[docs]
@dataclass
class EdgeArtifactsConfig:
"""
Configuration for edge artifact effects in synthetic NIRS spectra.
Edge artifacts are common in NIR spectra and arise from various sources:
- Detector sensitivity roll-off at spectral extremes
- Stray light contamination
- Truncated absorption peaks at measurement boundaries
- Baseline curvature/bending at spectrum edges
These artifacts are well-documented in the literature:
- Workman Jr, J., & Weyer, L. (2012). Practical Guide and Spectral Atlas
for Interpretive Near-Infrared Spectroscopy. CRC Press. Chapters 4-5.
- Burns, D. A., & Ciurczak, E. W. (2007). Handbook of Near-Infrared
Analysis. CRC Press. Chapters on instrumentation.
- ASTM E1944-98(2017): Standard Practice for Describing and Measuring
Performance of NIR Instruments.
Attributes:
enable_detector_rolloff: Enable detector sensitivity roll-off.
enable_stray_light: Enable stray light effects.
enable_truncated_peaks: Enable truncated absorption peaks.
enable_edge_curvature: Enable baseline curvature at edges.
detector_model: Detector model for roll-off ('generic_nir', 'ingaas',
'pbs', 'silicon_ccd'). Defaults to 'generic_nir'.
rolloff_severity: Severity of detector roll-off (0.0-1.0).
stray_fraction: Stray light fraction (0.0-0.02 typical).
stray_wavelength_dependent: Whether stray light varies with wavelength.
left_peak_amplitude: Amplitude of truncated peak at low wavelength edge.
right_peak_amplitude: Amplitude of truncated peak at high wavelength edge.
curvature_type: Type of edge curvature ('concave', 'convex', 'asymmetric').
left_curvature_severity: Severity of left edge curvature (0.0-1.0).
right_curvature_severity: Severity of right edge curvature (0.0-1.0).
"""
# Master switches
enable_detector_rolloff: bool = False
enable_stray_light: bool = False
enable_truncated_peaks: bool = False
enable_edge_curvature: bool = False
# Detector roll-off parameters
detector_model: str = "generic_nir"
rolloff_severity: float = 0.3
# Stray light parameters
stray_fraction: float = 0.001
stray_wavelength_dependent: bool = True
# Truncated peaks parameters
left_peak_amplitude: float = 0.0
right_peak_amplitude: float = 0.0
# Edge curvature parameters
curvature_type: str = "concave" # concave, convex, asymmetric
left_curvature_severity: float = 0.0
right_curvature_severity: float = 0.0
[docs]
@dataclass
class InstrumentArchetype:
"""
Parameterized NIR instrument simulation.
Represents a complete instrument model with optical, electronic, and
measurement characteristics. Can be used to generate realistic synthetic
spectra that match specific instrument types.
Attributes:
name: Instrument archetype name.
category: Instrument category (benchtop, handheld, etc.).
detector_type: Primary detector type.
monochromator_type: Wavelength selection mechanism.
wavelength_range: Nominal wavelength range (nm).
spectral_resolution: Spectral resolution (FWHM in nm).
wavelength_accuracy: Wavelength accuracy (nm).
photometric_noise: Photometric noise level (AU).
photometric_range: Photometric range (min, max AU).
snr: Signal-to-noise ratio at 1 AU.
stray_light: Stray light level (fraction).
warm_up_drift: Intensity drift during warm-up (%/hour).
temperature_sensitivity: Wavelength shift per °C.
scan_speed: Scans per second.
integration_time_ms: Integration time in milliseconds.
optical_path: Optical path type ('transmission', 'reflection', etc.).
multi_sensor: Multi-sensor configuration.
multi_scan: Multi-scan averaging configuration.
description: Human-readable description.
"""
name: str
category: InstrumentCategory
detector_type: DetectorType
monochromator_type: MonochromatorType
wavelength_range: Tuple[float, float]
spectral_resolution: float = 8.0
wavelength_accuracy: float = 0.5
photometric_noise: float = 0.0001 # AU
photometric_range: Tuple[float, float] = (0.0, 3.0)
snr: float = 10000.0
stray_light: float = 0.0001
warm_up_drift: float = 0.1
temperature_sensitivity: float = 0.01 # nm/°C
scan_speed: float = 1.0
integration_time_ms: float = 100.0
optical_path: str = "transmission"
multi_sensor: MultiSensorConfig = field(default_factory=MultiSensorConfig)
multi_scan: MultiScanConfig = field(default_factory=MultiScanConfig)
description: str = ""
[docs]
def get_noise_model_params(self) -> Dict[str, float]:
"""Get noise model parameters based on detector type."""
params = {
"shot_noise_factor": 1.0,
"thermal_noise_factor": 1.0,
"read_noise_factor": 1.0,
"flicker_noise_factor": 0.0, # 1/f noise
}
if self.detector_type == DetectorType.SI:
params["shot_noise_factor"] = 0.8
params["thermal_noise_factor"] = 0.5
elif self.detector_type == DetectorType.INGAAS:
params["shot_noise_factor"] = 1.0
params["thermal_noise_factor"] = 0.8
elif self.detector_type == DetectorType.INGAAS_EXTENDED:
params["shot_noise_factor"] = 1.2
params["thermal_noise_factor"] = 1.2
elif self.detector_type == DetectorType.PBS:
params["shot_noise_factor"] = 1.5
params["thermal_noise_factor"] = 1.8
params["flicker_noise_factor"] = 0.3 # PbS has significant 1/f noise
elif self.detector_type == DetectorType.MEMS:
params["shot_noise_factor"] = 1.5
params["thermal_noise_factor"] = 1.0
params["read_noise_factor"] = 1.5
return params
# ============================================================================
# Predefined Instrument Archetypes
# ============================================================================
def _create_benchtop_foss_xds() -> InstrumentArchetype:
"""FOSS XDS-style benchtop dispersive NIR."""
return InstrumentArchetype(
name="foss_xds",
category=InstrumentCategory.BENCHTOP,
detector_type=DetectorType.SI,
monochromator_type=MonochromatorType.GRATING,
wavelength_range=(400, 2500),
spectral_resolution=0.5,
wavelength_accuracy=0.05,
photometric_noise=0.00005,
snr=50000,
stray_light=0.00005,
scan_speed=2.0,
multi_sensor=MultiSensorConfig(
enabled=True,
sensors=[
SensorConfig(DetectorType.SI, (400, 1100), 0.5, 0.8),
SensorConfig(DetectorType.PBS, (1100, 2500), 0.5, 1.2),
],
stitch_method="weighted",
add_stitch_artifacts=True,
artifact_intensity=0.01,
),
multi_scan=MultiScanConfig(
enabled=True,
n_scans=32,
averaging_method="mean",
),
description="High-end benchtop dispersive NIR with Si+PbS dual detector",
)
def _create_benchtop_bruker_mpa() -> InstrumentArchetype:
"""Bruker MPA-style FT-NIR benchtop."""
return InstrumentArchetype(
name="bruker_mpa",
category=InstrumentCategory.FT_NIR,
detector_type=DetectorType.INGAAS_EXTENDED,
monochromator_type=MonochromatorType.FT,
wavelength_range=(800, 2778), # 12500-3600 cm⁻¹
spectral_resolution=2.0, # cm⁻¹ resolution
wavelength_accuracy=0.01,
photometric_noise=0.00003,
snr=80000,
stray_light=0.00001,
scan_speed=10.0,
multi_scan=MultiScanConfig(
enabled=True,
n_scans=64,
averaging_method="mean",
),
description="Research-grade FT-NIR with extended InGaAs detector",
)
def _create_benchtop_perkin_spectrum() -> InstrumentArchetype:
"""PerkinElmer Spectrum Two-style FTIR/NIR."""
return InstrumentArchetype(
name="perkin_spectrum_two",
category=InstrumentCategory.FT_NIR,
detector_type=DetectorType.INGAAS_EXTENDED,
monochromator_type=MonochromatorType.FT,
wavelength_range=(780, 2500),
spectral_resolution=4.0,
wavelength_accuracy=0.02,
photometric_noise=0.00005,
snr=40000,
stray_light=0.0001,
scan_speed=4.0,
multi_scan=MultiScanConfig(enabled=True, n_scans=32),
description="General-purpose benchtop FT-NIR",
)
def _create_handheld_viavi_micronir() -> InstrumentArchetype:
"""VIAVI MicroNIR-style handheld dispersive."""
return InstrumentArchetype(
name="viavi_micronir",
category=InstrumentCategory.HANDHELD,
detector_type=DetectorType.INGAAS,
monochromator_type=MonochromatorType.LVF,
wavelength_range=(908, 1676),
spectral_resolution=12.0,
wavelength_accuracy=1.0,
photometric_noise=0.0005,
snr=5000,
stray_light=0.001,
scan_speed=100.0, # Very fast
integration_time_ms=10.0,
multi_scan=MultiScanConfig(
enabled=True,
n_scans=50,
averaging_method="mean",
scan_to_scan_noise=0.003,
),
description="Compact handheld NIR with LVF technology",
)
def _create_handheld_scio() -> InstrumentArchetype:
"""SCiO-style consumer handheld."""
return InstrumentArchetype(
name="scio",
category=InstrumentCategory.HANDHELD,
detector_type=DetectorType.MEMS,
monochromator_type=MonochromatorType.FABRY_PEROT,
wavelength_range=(740, 1070),
spectral_resolution=15.0,
wavelength_accuracy=2.0,
photometric_noise=0.002,
snr=1000,
stray_light=0.005,
scan_speed=200.0,
integration_time_ms=5.0,
multi_scan=MultiScanConfig(
enabled=True,
n_scans=100,
averaging_method="median",
scan_to_scan_noise=0.005,
),
description="Consumer-grade MEMS-based miniature NIR",
)
def _create_handheld_tellspec() -> InstrumentArchetype:
"""TellSpec-style food scanner."""
return InstrumentArchetype(
name="tellspec",
category=InstrumentCategory.HANDHELD,
detector_type=DetectorType.INGAAS,
monochromator_type=MonochromatorType.GRATING,
wavelength_range=(900, 1700),
spectral_resolution=10.0,
wavelength_accuracy=1.5,
photometric_noise=0.001,
snr=3000,
stray_light=0.002,
scan_speed=50.0,
multi_scan=MultiScanConfig(enabled=True, n_scans=30),
description="Handheld food analysis NIR scanner",
)
def _create_handheld_linkam() -> InstrumentArchetype:
"""LinkSquare-style portable NIR."""
return InstrumentArchetype(
name="linksquare",
category=InstrumentCategory.HANDHELD,
detector_type=DetectorType.INGAAS,
monochromator_type=MonochromatorType.GRATING,
wavelength_range=(750, 1050),
spectral_resolution=10.0,
wavelength_accuracy=1.0,
photometric_noise=0.0015,
snr=2000,
stray_light=0.003,
scan_speed=100.0,
multi_scan=MultiScanConfig(enabled=True, n_scans=50),
description="Compact portable NIR for material identification",
)
def _create_process_niro() -> InstrumentArchetype:
"""NIR-O-style process NIR probe."""
return InstrumentArchetype(
name="nir_o_process",
category=InstrumentCategory.PROCESS,
detector_type=DetectorType.INGAAS_EXTENDED,
monochromator_type=MonochromatorType.GRATING,
wavelength_range=(1000, 2200),
spectral_resolution=6.0,
wavelength_accuracy=0.5,
photometric_noise=0.0002,
snr=15000,
stray_light=0.0005,
temperature_sensitivity=0.02,
scan_speed=5.0,
optical_path="reflection",
multi_scan=MultiScanConfig(
enabled=True,
n_scans=16,
averaging_method="mean",
wavelength_jitter=0.1,
),
description="Robust process NIR with fiber-coupled probe",
)
def _create_process_asd_fieldspec() -> InstrumentArchetype:
"""ASD FieldSpec-style portable/process spectrometer."""
return InstrumentArchetype(
name="asd_fieldspec",
category=InstrumentCategory.PROCESS,
detector_type=DetectorType.INGAAS_EXTENDED,
monochromator_type=MonochromatorType.GRATING,
wavelength_range=(350, 2500),
spectral_resolution=3.0,
wavelength_accuracy=0.5,
photometric_noise=0.0001,
snr=25000,
stray_light=0.0002,
scan_speed=10.0,
multi_sensor=MultiSensorConfig(
enabled=True,
sensors=[
SensorConfig(DetectorType.SI, (350, 1000), 3.0, 0.6),
SensorConfig(DetectorType.INGAAS, (1000, 1830), 8.0, 1.0),
SensorConfig(DetectorType.INGAAS, (1830, 2500), 8.0, 1.2),
],
stitch_method="weighted",
add_stitch_artifacts=True,
artifact_intensity=0.015,
),
description="Field portable full-range spectrometer with 3 detectors",
)
def _create_embedded_neospectra() -> InstrumentArchetype:
"""NeoSpectra Micro-style MEMS FT-NIR module."""
return InstrumentArchetype(
name="neospectra_micro",
category=InstrumentCategory.EMBEDDED,
detector_type=DetectorType.MEMS,
monochromator_type=MonochromatorType.FT,
wavelength_range=(1350, 2500),
spectral_resolution=16.0,
wavelength_accuracy=1.0,
photometric_noise=0.001,
snr=5000,
stray_light=0.002,
scan_speed=20.0,
integration_time_ms=50.0,
multi_scan=MultiScanConfig(
enabled=True,
n_scans=20,
averaging_method="mean",
),
description="Ultra-compact MEMS FT-NIR chip module",
)
def _create_embedded_innospectra() -> InstrumentArchetype:
"""InnoSpectra-style compact MEMS spectrometer."""
return InstrumentArchetype(
name="innospectra",
category=InstrumentCategory.EMBEDDED,
detector_type=DetectorType.MEMS,
monochromator_type=MonochromatorType.FABRY_PEROT,
wavelength_range=(900, 1700),
spectral_resolution=10.0,
wavelength_accuracy=1.5,
photometric_noise=0.0008,
snr=4000,
stray_light=0.003,
scan_speed=50.0,
multi_scan=MultiScanConfig(enabled=True, n_scans=25),
description="Compact MEMS NIR for embedded applications",
)
def _create_ft_thermo_antaris() -> InstrumentArchetype:
"""Thermo Antaris-style research FT-NIR."""
return InstrumentArchetype(
name="thermo_antaris",
category=InstrumentCategory.FT_NIR,
detector_type=DetectorType.INGAAS_EXTENDED,
monochromator_type=MonochromatorType.FT,
wavelength_range=(800, 2500),
spectral_resolution=1.0, # Very high resolution
wavelength_accuracy=0.01,
photometric_noise=0.00002,
snr=100000,
stray_light=0.00001,
scan_speed=30.0,
multi_scan=MultiScanConfig(
enabled=True,
n_scans=64,
averaging_method="mean",
),
description="High-resolution research-grade FT-NIR",
)
def _create_ft_abb_mb3600() -> InstrumentArchetype:
"""ABB MB3600-style FT-NIR analyzer."""
return InstrumentArchetype(
name="abb_mb3600",
category=InstrumentCategory.FT_NIR,
detector_type=DetectorType.INGAAS_EXTENDED,
monochromator_type=MonochromatorType.FT,
wavelength_range=(833, 2632), # 12000-3800 cm⁻¹
spectral_resolution=4.0,
wavelength_accuracy=0.02,
photometric_noise=0.00004,
snr=70000,
stray_light=0.00002,
scan_speed=20.0,
multi_scan=MultiScanConfig(enabled=True, n_scans=32),
description="QC/QA laboratory FT-NIR analyzer",
)
def _create_filter_foss_infratec() -> InstrumentArchetype:
"""FOSS Infratec-style discrete filter instrument."""
return InstrumentArchetype(
name="foss_infratec",
category=InstrumentCategory.FILTER,
detector_type=DetectorType.PBS,
monochromator_type=MonochromatorType.FILTER_WHEEL,
wavelength_range=(850, 1050), # Limited by filter selection
spectral_resolution=15.0, # Broad filter bandwidth
wavelength_accuracy=2.0,
photometric_noise=0.0005,
snr=10000,
stray_light=0.001,
scan_speed=3.0, # Time per filter
multi_scan=MultiScanConfig(enabled=True, n_scans=5),
description="Discrete filter grain analyzer",
)
def _create_filter_perten_da7200() -> InstrumentArchetype:
"""Perten DA7200-style diode array."""
return InstrumentArchetype(
name="perten_da7200",
category=InstrumentCategory.DIODE_ARRAY,
detector_type=DetectorType.SI,
monochromator_type=MonochromatorType.GRATING,
wavelength_range=(950, 1650),
spectral_resolution=5.0,
wavelength_accuracy=0.3,
photometric_noise=0.0002,
snr=20000,
stray_light=0.0003,
scan_speed=40.0,
multi_scan=MultiScanConfig(
enabled=True,
n_scans=10,
averaging_method="mean",
),
description="Diode array NIR for grain/food analysis",
)
def _create_benchtop_unity() -> InstrumentArchetype:
"""Unity Scientific SpectraStar-style benchtop."""
return InstrumentArchetype(
name="unity_spectrastar",
category=InstrumentCategory.BENCHTOP,
detector_type=DetectorType.INGAAS_EXTENDED,
monochromator_type=MonochromatorType.GRATING,
wavelength_range=(680, 2500),
spectral_resolution=5.0,
wavelength_accuracy=0.2,
photometric_noise=0.0001,
snr=35000,
stray_light=0.0002,
scan_speed=3.0,
multi_scan=MultiScanConfig(enabled=True, n_scans=16),
description="Post-dispersive benchtop NIR analyzer",
)
def _create_handheld_si_ware() -> InstrumentArchetype:
"""Si-Ware NeoSpectra Scanner-style handheld."""
return InstrumentArchetype(
name="siware_neoscanner",
category=InstrumentCategory.HANDHELD,
detector_type=DetectorType.MEMS,
monochromator_type=MonochromatorType.FT,
wavelength_range=(1350, 2500),
spectral_resolution=16.0,
wavelength_accuracy=1.5,
photometric_noise=0.001,
snr=4000,
stray_light=0.003,
scan_speed=10.0,
multi_scan=MultiScanConfig(
enabled=True,
n_scans=20,
averaging_method="mean",
scan_to_scan_noise=0.004,
),
description="MEMS FT-NIR handheld scanner",
)
def _create_process_buchi() -> InstrumentArchetype:
"""BUCHI NIRMaster-style process NIR."""
return InstrumentArchetype(
name="buchi_nirmaster",
category=InstrumentCategory.PROCESS,
detector_type=DetectorType.INGAAS,
monochromator_type=MonochromatorType.GRATING,
wavelength_range=(1000, 2500),
spectral_resolution=4.0,
wavelength_accuracy=0.3,
photometric_noise=0.0002,
snr=25000,
stray_light=0.0003,
temperature_sensitivity=0.01,
scan_speed=5.0,
multi_scan=MultiScanConfig(enabled=True, n_scans=16),
description="Industrial process NIR analyzer",
)
def _create_benchtop_metrohm() -> InstrumentArchetype:
"""Metrohm NIRS DS2500-style benchtop."""
return InstrumentArchetype(
name="metrohm_ds2500",
category=InstrumentCategory.BENCHTOP,
detector_type=DetectorType.PBS,
monochromator_type=MonochromatorType.GRATING,
wavelength_range=(400, 2500),
spectral_resolution=0.5,
wavelength_accuracy=0.05,
photometric_noise=0.00005,
snr=50000,
stray_light=0.00005,
scan_speed=2.0,
multi_sensor=MultiSensorConfig(
enabled=True,
sensors=[
SensorConfig(DetectorType.SI, (400, 1100), 0.5, 0.7),
SensorConfig(DetectorType.PBS, (1100, 2500), 0.5, 1.0),
],
stitch_method="weighted",
),
multi_scan=MultiScanConfig(enabled=True, n_scans=32),
description="Vis-NIR benchtop with dual detector",
)
# Registry of all predefined instrument archetypes
INSTRUMENT_ARCHETYPES: Dict[str, InstrumentArchetype] = {}
def _register_archetypes() -> None:
"""Register all predefined instrument archetypes."""
global INSTRUMENT_ARCHETYPES
creators = [
_create_benchtop_foss_xds,
_create_benchtop_bruker_mpa,
_create_benchtop_perkin_spectrum,
_create_handheld_viavi_micronir,
_create_handheld_scio,
_create_handheld_tellspec,
_create_handheld_linkam,
_create_process_niro,
_create_process_asd_fieldspec,
_create_embedded_neospectra,
_create_embedded_innospectra,
_create_ft_thermo_antaris,
_create_ft_abb_mb3600,
_create_filter_foss_infratec,
_create_filter_perten_da7200,
_create_benchtop_unity,
_create_handheld_si_ware,
_create_process_buchi,
_create_benchtop_metrohm,
]
for creator in creators:
archetype = creator()
INSTRUMENT_ARCHETYPES[archetype.name] = archetype
# Register archetypes on module load
_register_archetypes()
[docs]
def get_instrument_archetype(name: str) -> InstrumentArchetype:
"""
Get a predefined instrument archetype by name.
Args:
name: Instrument archetype name.
Returns:
InstrumentArchetype instance.
Raises:
KeyError: If archetype name not found.
Example:
>>> archetype = get_instrument_archetype("foss_xds")
>>> print(archetype.wavelength_range)
(400, 2500)
"""
if name not in INSTRUMENT_ARCHETYPES:
available = list(INSTRUMENT_ARCHETYPES.keys())
raise KeyError(
f"Unknown instrument archetype: '{name}'. "
f"Available: {available}"
)
return INSTRUMENT_ARCHETYPES[name]
[docs]
def list_instrument_archetypes(
category: Optional[InstrumentCategory] = None
) -> List[str]:
"""
List available instrument archetype names.
Args:
category: Optional filter by category.
Returns:
List of archetype names.
Example:
>>> list_instrument_archetypes(InstrumentCategory.HANDHELD)
['viavi_micronir', 'scio', 'tellspec', 'linksquare', 'siware_neoscanner']
"""
if category is None:
return list(INSTRUMENT_ARCHETYPES.keys())
return [
name for name, arch in INSTRUMENT_ARCHETYPES.items()
if arch.category == category
]
[docs]
def get_instruments_by_category() -> Dict[str, List[str]]:
"""
Get all instruments organized by category.
Returns:
Dictionary mapping category name to list of instrument names.
"""
result: Dict[str, List[str]] = {}
for name, arch in INSTRUMENT_ARCHETYPES.items():
cat_name = arch.category.value
if cat_name not in result:
result[cat_name] = []
result[cat_name].append(name)
return result
# ============================================================================
# Phase 6: Instrument Wavelength Grids
# ============================================================================
# Predefined wavelength grids for common NIR instruments.
# These allow generating synthetic data that exactly matches real instrument wavelengths.
INSTRUMENT_WAVELENGTHS: Dict[str, np.ndarray] = {
# Handheld/portable instruments
"micronir_onsite": np.linspace(908, 1676, 125), # VIAVI MicroNIR OnSite
"scio": np.linspace(740, 1070, 331), # Consumer Scio scanner
"neospectra_micro": np.linspace(1350, 2500, 228), # Si-Ware NeoSpectra Micro
"linksquare": np.linspace(750, 1050, 301), # LinkSquare portable
# Benchtop dispersive
"foss_xds": np.arange(400, 2498, 2), # FOSS XDS II (2nm step)
"foss_nirs_ds2500": np.arange(400, 2500, 0.5), # FOSS NIRS DS2500 (0.5nm step)
# FT-NIR benchtop (wavenumber-based, converted to wavelength)
"bruker_mpa": np.arange(800, 2778, 4), # Bruker MPA FT-NIR
# High-resolution field portable
"asd_fieldspec": np.arange(350, 2500, 1), # ASD FieldSpec (1nm step)
# Process NIR
"abb_ftpa2000": np.arange(1000, 2500, 1), # ABB FT-NIR process analyzer
# Embedded/MEMS
"texas_dlp_nirscan": np.linspace(900, 1700, 228), # TI DLP NIRscan Nano
"hamamatsu_c14384ma": np.linspace(1350, 2150, 256), # Hamamatsu micro spectrometer
# Specialty instruments
"buchi_nirflex": np.arange(1000, 2500, 4), # BUCHI NIRFlex FT-NIR
"thermo_antaris": np.arange(833, 2500, 1), # Thermo Antaris II
}
[docs]
def get_instrument_wavelengths(instrument: str) -> np.ndarray:
"""
Get the wavelength grid for a known instrument.
Returns a copy of the predefined wavelength array for the specified
instrument, enabling generation of synthetic data that matches real
instrument wavelength grids exactly.
Args:
instrument: Instrument identifier (e.g., "micronir_onsite", "foss_xds").
Returns:
NumPy array of wavelengths in nm.
Raises:
ValueError: If the instrument is not recognized.
Example:
>>> wl = get_instrument_wavelengths("micronir_onsite")
>>> print(f"MicroNIR: {len(wl)} wavelengths from {wl[0]:.0f} to {wl[-1]:.0f} nm")
MicroNIR: 125 wavelengths from 908 to 1676 nm
>>> # Use with SyntheticNIRSGenerator
>>> from nirs4all.data.synthetic import SyntheticNIRSGenerator
>>> gen = SyntheticNIRSGenerator(wavelengths=wl)
"""
instrument = instrument.lower().replace("-", "_").replace(" ", "_")
if instrument not in INSTRUMENT_WAVELENGTHS:
available = list(INSTRUMENT_WAVELENGTHS.keys())
raise ValueError(
f"Unknown instrument: '{instrument}'. "
f"Available instruments: {available}"
)
return INSTRUMENT_WAVELENGTHS[instrument].copy()
[docs]
def list_instrument_wavelength_grids() -> List[str]:
"""
List all available predefined instrument wavelength grids.
Returns:
List of instrument identifiers.
Example:
>>> grids = list_instrument_wavelength_grids()
>>> print(grids[:3])
['micronir_onsite', 'scio', 'neospectra_micro']
"""
return list(INSTRUMENT_WAVELENGTHS.keys())
[docs]
def get_instrument_wavelength_info() -> Dict[str, Dict[str, Any]]:
"""
Get detailed information about all instrument wavelength grids.
Returns:
Dictionary mapping instrument names to info dicts containing:
- n_wavelengths: Number of wavelength points
- wavelength_start: Start wavelength (nm)
- wavelength_end: End wavelength (nm)
- mean_step: Mean wavelength step (nm)
Example:
>>> info = get_instrument_wavelength_info()
>>> print(info["micronir_onsite"])
{'n_wavelengths': 125, 'wavelength_start': 908.0, ...}
"""
result = {}
for name, wl in INSTRUMENT_WAVELENGTHS.items():
wl_arr = np.asarray(wl)
result[name] = {
"n_wavelengths": len(wl_arr),
"wavelength_start": float(wl_arr[0]),
"wavelength_end": float(wl_arr[-1]),
"mean_step": float(np.mean(np.diff(wl_arr))) if len(wl_arr) > 1 else 0.0,
}
return result
# ============================================================================
# Instrument Simulation
# ============================================================================
[docs]
class InstrumentSimulator:
"""
Apply instrument-specific effects to synthetic spectra.
Simulates the complete instrument response including:
- Spectral resolution (instrumental broadening)
- Multi-sensor stitching
- Multi-scan averaging
- Detector noise (shot, thermal, 1/f)
- Wavelength calibration errors
- Stray light effects
- Etalon/fringing interference
Attributes:
archetype: The instrument archetype being simulated.
rng: Random number generator for reproducibility.
Example:
>>> archetype = get_instrument_archetype("viavi_micronir")
>>> simulator = InstrumentSimulator(archetype, random_state=42)
>>> spectra_out = simulator.apply(spectra, wavelengths)
"""
def __init__(
self,
archetype: InstrumentArchetype,
random_state: Optional[int] = None
) -> None:
"""
Initialize the instrument simulator.
Args:
archetype: Instrument archetype to simulate.
random_state: Random seed for reproducibility.
"""
self.archetype = archetype
self.rng = np.random.default_rng(random_state)
self._random_state = random_state
[docs]
def apply(
self,
spectra: np.ndarray,
wavelengths: np.ndarray,
temperature_offset: float = 0.0,
) -> Tuple[np.ndarray, np.ndarray]:
"""
Apply all instrument effects to spectra.
Args:
spectra: Input spectra array (n_samples, n_wavelengths).
wavelengths: Wavelength array in nm.
temperature_offset: Temperature deviation from calibration (°C).
Returns:
Tuple of (modified_spectra, output_wavelengths).
Output wavelengths may differ if resampled to instrument grid.
"""
# Start with input spectra
result = spectra.copy()
output_wl = wavelengths.copy()
# 1. Resample to instrument wavelength range if needed
result, output_wl = self._resample_to_instrument_range(result, wavelengths)
# 2. Apply multi-sensor stitching effects
if self.archetype.multi_sensor.enabled:
result = self._apply_multi_sensor_effects(result, output_wl)
# 3. Apply instrumental broadening (spectral resolution)
result = self._apply_instrumental_broadening(result, output_wl)
# 4. Apply wavelength calibration effects
result = self._apply_wavelength_effects(result, output_wl, temperature_offset)
# 5. Apply stray light
result = self._apply_stray_light(result)
# 6. Apply multi-scan simulation
if self.archetype.multi_scan.enabled:
result = self._apply_multi_scan_averaging(result, output_wl)
else:
# Apply detector noise (single scan)
result = self._apply_detector_noise(result, output_wl)
# 7. Apply photometric range limiting
result = self._apply_photometric_range(result)
return result, output_wl
def _resample_to_instrument_range(
self,
spectra: np.ndarray,
wavelengths: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
"""Resample spectra to instrument wavelength range."""
wl_min, wl_max = self.archetype.wavelength_range
# Determine output wavelength grid
step = np.median(np.diff(wavelengths))
grid_start = max(wl_min, wavelengths.min())
grid_end = min(wl_max, wavelengths.max())
output_wl = np.arange(grid_start, grid_end + step / 2, step)
# Ensure we don't exceed instrument range
output_wl = output_wl[output_wl <= wl_max]
# Interpolate if dimensions don't match or wavelengths differ
if len(wavelengths) != len(output_wl) or not np.allclose(wavelengths, output_wl):
result = np.zeros((spectra.shape[0], len(output_wl)))
for i in range(spectra.shape[0]):
result[i] = np.interp(output_wl, wavelengths, spectra[i])
return result, output_wl
# Mask to instrument range
mask = (wavelengths >= wl_min) & (wavelengths <= wl_max)
return spectra[:, mask], wavelengths[mask]
def _apply_instrumental_broadening(
self,
spectra: np.ndarray,
wavelengths: np.ndarray
) -> np.ndarray:
"""Apply spectral resolution broadening via Gaussian convolution."""
fwhm = self.archetype.spectral_resolution
step = np.median(np.diff(wavelengths))
# Convert FWHM to sigma in pixel units
sigma_pts = (fwhm / 2.355) / step
if sigma_pts < 0.5:
return spectra # No significant broadening needed
result = np.zeros_like(spectra)
for i in range(spectra.shape[0]):
result[i] = gaussian_filter1d(spectra[i], sigma_pts)
return result
def _apply_multi_sensor_effects(
self,
spectra: np.ndarray,
wavelengths: np.ndarray
) -> np.ndarray:
"""Apply multi-sensor stitching simulation."""
config = self.archetype.multi_sensor
n_samples, n_wl = spectra.shape
result = spectra.copy()
for sensor in config.sensors:
# Get mask for this sensor's range
wl_min, wl_max = sensor.wavelength_range
mask = (wavelengths >= wl_min) & (wavelengths <= wl_max)
if not np.any(mask):
continue
# Apply sensor-specific gain variation
gain_variation = self.rng.normal(sensor.gain, sensor.gain * 0.01, n_samples)
result[:, mask] *= gain_variation[:, np.newaxis]
# Apply sensor-specific noise level
noise_scale = sensor.noise_level * self.archetype.photometric_noise
result[:, mask] += self.rng.normal(0, noise_scale, (n_samples, mask.sum()))
# Add stitching artifacts at sensor boundaries
if config.add_stitch_artifacts:
result = self._add_stitch_artifacts(
result, wavelengths, config
)
return result
def _add_stitch_artifacts(
self,
spectra: np.ndarray,
wavelengths: np.ndarray,
config: MultiSensorConfig
) -> np.ndarray:
"""Add artifacts at sensor stitch boundaries."""
result = spectra.copy()
step = np.median(np.diff(wavelengths))
for i, sensor in enumerate(config.sensors[:-1]):
next_sensor = config.sensors[i + 1]
# Find stitch point (end of current sensor range)
stitch_wl = sensor.wavelength_range[1]
stitch_idx = np.argmin(np.abs(wavelengths - stitch_wl))
# Define transition region
half_width = int(config.stitch_smoothing / step / 2)
start_idx = max(0, stitch_idx - half_width)
end_idx = min(len(wavelengths), stitch_idx + half_width)
# Add small offset artifact
artifact_offset = self.rng.normal(
0, config.artifact_intensity, spectra.shape[0]
)
# Create smooth transition for artifact
x = np.linspace(0, 1, end_idx - start_idx)
transition = 0.5 * (1 - np.cos(np.pi * x)) # Smooth S-curve
for sample_idx in range(spectra.shape[0]):
result[sample_idx, start_idx:end_idx] += (
artifact_offset[sample_idx] * transition
)
return result
def _apply_wavelength_effects(
self,
spectra: np.ndarray,
wavelengths: np.ndarray,
temperature_offset: float
) -> np.ndarray:
"""Apply wavelength calibration effects."""
n_samples = spectra.shape[0]
result = np.zeros_like(spectra)
for i in range(n_samples):
# Random wavelength shift within accuracy specification
shift = self.rng.normal(0, self.archetype.wavelength_accuracy)
# Add temperature-induced shift
shift += temperature_offset * self.archetype.temperature_sensitivity
# Random stretch (ppm-level)
stretch = self.rng.normal(1.0, 0.0001)
# Apply shift and stretch via interpolation
wl_shifted = stretch * wavelengths + shift
result[i] = np.interp(wavelengths, wl_shifted, spectra[i])
return result
def _apply_stray_light(self, spectra: np.ndarray) -> np.ndarray:
"""Apply stray light offset."""
stray_offset = self.archetype.stray_light
# Stray light appears as a constant offset that varies slightly
offset = self.rng.normal(
stray_offset,
stray_offset * 0.2,
spectra.shape[0]
)
return spectra + offset[:, np.newaxis]
def _apply_multi_scan_averaging(
self,
spectra: np.ndarray,
wavelengths: np.ndarray
) -> np.ndarray:
"""Simulate multi-scan acquisition and averaging."""
config = self.archetype.multi_scan
n_samples, n_wl = spectra.shape
result = np.zeros_like(spectra)
for sample_idx in range(n_samples):
# Generate multiple scans
scans = np.zeros((config.n_scans, n_wl))
for scan_idx in range(config.n_scans):
# Start with base spectrum
scan = spectra[sample_idx].copy()
# Add scan-to-scan noise
scan += self.rng.normal(0, config.scan_to_scan_noise, n_wl)
# Add wavelength jitter
if config.wavelength_jitter > 0:
jitter = self.rng.normal(0, config.wavelength_jitter)
scan = np.interp(
wavelengths,
wavelengths + jitter,
scan
)
# Add detector noise for this scan
scan = self._apply_detector_noise_single(scan, wavelengths)
scans[scan_idx] = scan
# Discard outlier scans if configured
if config.discard_outliers:
# Z-score based outlier detection
mean_scan = np.mean(scans, axis=0)
std_scan = np.std(scans, axis=0)
valid_mask = np.ones(config.n_scans, dtype=bool)
for scan_idx in range(config.n_scans):
z_scores = np.abs((scans[scan_idx] - mean_scan) / (std_scan + 1e-10))
if np.mean(z_scores) > config.outlier_threshold:
valid_mask[scan_idx] = False
scans = scans[valid_mask]
# Average scans
if config.averaging_method == "mean":
result[sample_idx] = np.mean(scans, axis=0)
elif config.averaging_method == "median":
result[sample_idx] = np.median(scans, axis=0)
elif config.averaging_method == "weighted":
# Weight by inverse variance
weights = 1.0 / (np.var(scans, axis=1) + 1e-10)
weights /= weights.sum()
result[sample_idx] = np.average(scans, axis=0, weights=weights)
else:
result[sample_idx] = np.mean(scans, axis=0)
return result
def _apply_detector_noise(
self,
spectra: np.ndarray,
wavelengths: np.ndarray
) -> np.ndarray:
"""Apply detector noise to all spectra."""
result = np.zeros_like(spectra)
for i in range(spectra.shape[0]):
result[i] = self._apply_detector_noise_single(spectra[i], wavelengths)
return result
def _apply_detector_noise_single(
self,
spectrum: np.ndarray,
wavelengths: np.ndarray
) -> np.ndarray:
"""Apply detector noise to a single spectrum."""
noise_params = self.archetype.get_noise_model_params()
base_noise = self.archetype.photometric_noise
n_wl = len(wavelengths)
total_noise = np.zeros(n_wl)
# Shot noise (signal-dependent)
shot = noise_params["shot_noise_factor"] * base_noise
total_noise += self.rng.normal(0, shot * np.sqrt(np.abs(spectrum) + 0.01))
# Thermal noise (constant)
thermal = noise_params["thermal_noise_factor"] * base_noise
total_noise += self.rng.normal(0, thermal, n_wl)
# Read noise (constant)
read = noise_params["read_noise_factor"] * base_noise * 0.5
total_noise += self.rng.normal(0, read, n_wl)
# 1/f (flicker) noise - correlated
if noise_params["flicker_noise_factor"] > 0:
flicker = noise_params["flicker_noise_factor"] * base_noise
# Generate correlated noise with 1/f spectrum
pink_noise = self._generate_pink_noise(n_wl, flicker)
total_noise += pink_noise
return spectrum + total_noise
def _generate_pink_noise(self, n_points: int, amplitude: float) -> np.ndarray:
"""Generate 1/f (pink) noise."""
white = self.rng.normal(0, 1, n_points)
# Create 1/f filter in frequency domain
freqs = np.fft.fftfreq(n_points)
freqs[0] = 1e-10 # Avoid division by zero
fft_filter = 1.0 / np.sqrt(np.abs(freqs))
fft_filter[0] = 0 # Remove DC component
# Apply filter
pink_fft = np.fft.fft(white) * fft_filter
pink = np.real(np.fft.ifft(pink_fft))
return pink * amplitude
def _apply_photometric_range(self, spectra: np.ndarray) -> np.ndarray:
"""Clip spectra to instrument photometric range."""
pmin, pmax = self.archetype.photometric_range
return np.clip(spectra, pmin, pmax)
# ============================================================================
# Module-level exports
# ============================================================================
__all__ = [
# Enums
"InstrumentCategory",
"DetectorType",
"MonochromatorType",
# Configuration dataclasses
"SensorConfig",
"MultiSensorConfig",
"MultiScanConfig",
"EdgeArtifactsConfig",
"InstrumentArchetype",
# Registry
"INSTRUMENT_ARCHETYPES",
"get_instrument_archetype",
"list_instrument_archetypes",
"get_instruments_by_category",
# Phase 6: Instrument wavelength grids
"INSTRUMENT_WAVELENGTHS",
"get_instrument_wavelengths",
"list_instrument_wavelength_grids",
"get_instrument_wavelength_info",
# Simulator
"InstrumentSimulator",
]