Source code for nirs4all.data.signal_type

"""
Signal type management for spectroscopy data.

This module provides:
- SignalType enum for absorbance, reflectance, transmittance
- Autodetection heuristics based on value ranges and band directions
- Conversion utilities between signal types
"""

from enum import Enum
from typing import Optional, Union, Tuple, List
import numpy as np


[docs] class SignalType(str, Enum): """ Spectral signal types for NIRS/spectroscopy data. Defines the measurement type of spectral data. String values ensure backward compatibility with config files. """ # Core types ABSORBANCE = "absorbance" # A, typically [0, 3+], log(1/R) or log(1/T) REFLECTANCE = "reflectance" # R, fractional [0, 1] REFLECTANCE_PERCENT = "reflectance%" # %R, percentage [0, 100] TRANSMITTANCE = "transmittance" # T, fractional [0, 1] TRANSMITTANCE_PERCENT = "transmittance%" # %T, percentage [0, 100] # Special types KUBELKA_MUNK = "kubelka_munk" # F(R) = (1-R)²/(2R) LOG_1_R = "log_1_r" # log(1/R) - pseudo-absorbance from reflectance LOG_1_T = "log_1_t" # log(1/T) - absorbance from transmittance # Detection states AUTO = "auto" # Auto-detect on first use UNKNOWN = "unknown" # Cannot be determined (preprocessed data) PREPROCESSED = "preprocessed" # Data has been preprocessed (derivative, SNV, etc.) @property def is_percent(self) -> bool: """Check if this is a percentage-based signal type.""" return self in (SignalType.REFLECTANCE_PERCENT, SignalType.TRANSMITTANCE_PERCENT) @property def is_fraction(self) -> bool: """Check if this is a fractional [0, 1] signal type.""" return self in (SignalType.REFLECTANCE, SignalType.TRANSMITTANCE) @property def is_absorbance_like(self) -> bool: """Check if this is absorbance or pseudo-absorbance.""" return self in ( SignalType.ABSORBANCE, SignalType.LOG_1_R, SignalType.LOG_1_T, SignalType.KUBELKA_MUNK ) @property def is_reflectance_based(self) -> bool: """Check if this is any reflectance-based signal.""" return self in (SignalType.REFLECTANCE, SignalType.REFLECTANCE_PERCENT) @property def is_transmittance_based(self) -> bool: """Check if this is any transmittance-based signal.""" return self in (SignalType.TRANSMITTANCE, SignalType.TRANSMITTANCE_PERCENT) @property def is_determinable(self) -> bool: """Check if this is a known, determinable signal type.""" return self not in (SignalType.AUTO, SignalType.UNKNOWN, SignalType.PREPROCESSED)
[docs] @classmethod def from_string(cls, value: str) -> "SignalType": """ Parse signal type from various string representations. Args: value: String representation (e.g., "A", "R", "%R", "absorbance", etc.) Returns: SignalType enum value """ if isinstance(value, SignalType): return value value_lower = value.lower().strip() # Common abbreviations and variations mappings = { # Absorbance "a": cls.ABSORBANCE, "abs": cls.ABSORBANCE, "absorbance": cls.ABSORBANCE, "absorption": cls.ABSORBANCE, # Reflectance "r": cls.REFLECTANCE, "ref": cls.REFLECTANCE, "refl": cls.REFLECTANCE, "reflectance": cls.REFLECTANCE, # Reflectance percent "%r": cls.REFLECTANCE_PERCENT, "r%": cls.REFLECTANCE_PERCENT, "reflectance%": cls.REFLECTANCE_PERCENT, "percent_reflectance": cls.REFLECTANCE_PERCENT, "reflectance_percent": cls.REFLECTANCE_PERCENT, # Transmittance "t": cls.TRANSMITTANCE, "trans": cls.TRANSMITTANCE, "transmittance": cls.TRANSMITTANCE, "transmission": cls.TRANSMITTANCE, # Transmittance percent "%t": cls.TRANSMITTANCE_PERCENT, "t%": cls.TRANSMITTANCE_PERCENT, "transmittance%": cls.TRANSMITTANCE_PERCENT, "percent_transmittance": cls.TRANSMITTANCE_PERCENT, "transmittance_percent": cls.TRANSMITTANCE_PERCENT, # Kubelka-Munk "km": cls.KUBELKA_MUNK, "kubelka_munk": cls.KUBELKA_MUNK, "kubelka-munk": cls.KUBELKA_MUNK, "f(r)": cls.KUBELKA_MUNK, # Log transforms "log(1/r)": cls.LOG_1_R, "log_1_r": cls.LOG_1_R, "-log(r)": cls.LOG_1_R, "-log10(r)": cls.LOG_1_R, "log(1/t)": cls.LOG_1_T, "log_1_t": cls.LOG_1_T, "-log(t)": cls.LOG_1_T, "-log10(t)": cls.LOG_1_T, # Special "auto": cls.AUTO, "unknown": cls.UNKNOWN, "preprocessed": cls.PREPROCESSED, } if value_lower in mappings: return mappings[value_lower] # Try direct enum value match try: return cls(value_lower) except ValueError: raise ValueError( f"Unknown signal type '{value}'. Valid options: " f"{[e.value for e in cls]}" )
# Type alias for input flexibility SignalTypeInput = Union[str, SignalType]
[docs] def normalize_signal_type(signal_type: SignalTypeInput) -> SignalType: """ Normalize a signal type input to SignalType enum. Args: signal_type: String or SignalType enum Returns: SignalType enum value """ if isinstance(signal_type, SignalType): return signal_type return SignalType.from_string(signal_type)
[docs] class SignalTypeDetector: """ Heuristic detector for spectral signal types. Uses value ranges and optionally wavelength information to determine whether data is absorbance, reflectance, or transmittance. """ # NIR water absorption bands (nm) - strong absorbers WATER_BANDS_NM = [1450, 1940, 2500] # O-H stretching # Corresponding wavenumbers (cm-1) WATER_BANDS_CM1 = [6897, 5155, 4000] # 10^7 / nm def __init__( self, wavelengths: Optional[np.ndarray] = None, wavelength_unit: str = "nm" ): """ Initialize the detector. Args: wavelengths: Array of wavelength/wavenumber values for band analysis wavelength_unit: Unit of wavelengths ("nm" or "cm-1") """ self.wavelengths = wavelengths self.wavelength_unit = wavelength_unit
[docs] def detect( self, spectra: np.ndarray, confidence_threshold: float = 0.7 ) -> Tuple[SignalType, float, str]: """ Detect the signal type of spectral data. Args: spectra: Spectral data array of shape (n_samples, n_features) confidence_threshold: Minimum confidence to return a definite type Returns: Tuple of (SignalType, confidence, reason_string) """ if spectra.size == 0: return SignalType.UNKNOWN, 0.0, "Empty data" # Flatten if needed for statistics data = spectra.flatten() if spectra.ndim == 1 else spectra # Calculate statistics - cast to float for type safety min_val = float(np.nanmin(data)) max_val = float(np.nanmax(data)) mean_val = float(np.nanmean(data)) std_val = float(np.nanstd(data)) # Check for preprocessing indicators if self._is_preprocessed(min_val, max_val, mean_val, std_val): return SignalType.PREPROCESSED, 0.9, "Data appears preprocessed (centered/normalized)" # Score each signal type scores = {} # Reflectance fraction: values in [0, 1] scores[SignalType.REFLECTANCE] = self._score_reflectance_fraction( min_val, max_val, mean_val ) # Reflectance percent: values in [0, 100] scores[SignalType.REFLECTANCE_PERCENT] = self._score_reflectance_percent( min_val, max_val, mean_val ) # Transmittance fraction: values in [0, 1] scores[SignalType.TRANSMITTANCE] = self._score_transmittance_fraction( min_val, max_val, mean_val ) # Transmittance percent: values in [0, 100] scores[SignalType.TRANSMITTANCE_PERCENT] = self._score_transmittance_percent( min_val, max_val, mean_val ) # Absorbance: typically [0, 3+], can be slightly negative scores[SignalType.ABSORBANCE] = self._score_absorbance( min_val, max_val, mean_val ) # Use wavelength information as tiebreaker if available if self.wavelengths is not None and spectra.ndim == 2: band_hints = self._analyze_water_bands(spectra) for signal_type, hint_score in band_hints.items(): if signal_type in scores: scores[signal_type] += hint_score * 0.2 # Weight band hints # Find best match best_type = max(scores.keys(), key=lambda k: scores[k]) best_score = scores[best_type] # Normalize confidence total_score = sum(scores.values()) confidence = best_score / total_score if total_score > 0 else 0.0 # Build reason string reason = self._build_reason(min_val, max_val, mean_val, best_type, confidence) if confidence < confidence_threshold: return SignalType.UNKNOWN, confidence, reason return best_type, confidence, reason
def _is_preprocessed( self, min_val: float, max_val: float, mean_val: float, std_val: float ) -> bool: """Check if data shows signs of preprocessing.""" # Mean-centered data has mean close to 0 if abs(mean_val) < 0.01 and std_val > 0.1: return True # SNV/standardized data has std close to 1 if abs(std_val - 1.0) < 0.1 and abs(mean_val) < 0.1: return True # Derivative data often has negative values with mean near 0 if min_val < -0.5 and max_val < 0.5 and abs(mean_val) < 0.01: return True return False def _score_reflectance_fraction( self, min_val: float, max_val: float, mean_val: float ) -> float: """Score likelihood of reflectance in [0, 1].""" score = 0.0 # Values should be in [0, 1] if 0 <= min_val and max_val <= 1.2: score += 0.5 # Typical reflectance range if 0.1 <= mean_val <= 0.8: score += 0.3 # Very strong match if max is close to 1 if max_val <= 1.0: score += 0.2 return score def _score_reflectance_percent( self, min_val: float, max_val: float, mean_val: float ) -> float: """Score likelihood of reflectance in [0, 100].""" score = 0.0 # Values should be in [0, 100] if 0 <= min_val and 1.5 < max_val <= 120: score += 0.5 # Typical percent reflectance range if 10 <= mean_val <= 80: score += 0.3 # Very strong match if max is close to 100 if max_val <= 100: score += 0.2 return score def _score_transmittance_fraction( self, min_val: float, max_val: float, mean_val: float ) -> float: """Score likelihood of transmittance in [0, 1].""" score = 0.0 # Very similar to reflectance fraction # Without band direction info, hard to distinguish if 0 <= min_val and max_val <= 1.2: score += 0.4 # Transmittance often has lower values than reflectance if 0.05 <= mean_val <= 0.5: score += 0.2 return score def _score_transmittance_percent( self, min_val: float, max_val: float, mean_val: float ) -> float: """Score likelihood of transmittance in [0, 100].""" score = 0.0 # Similar to reflectance percent if 0 <= min_val and 1.5 < max_val <= 120: score += 0.4 # Transmittance percent if 5 <= mean_val <= 50: score += 0.2 return score def _score_absorbance( self, min_val: float, max_val: float, mean_val: float ) -> float: """Score likelihood of absorbance.""" score = 0.0 # Absorbance typically [0, 3+], can be slightly negative if -0.5 <= min_val and 0.5 <= max_val <= 5.0: score += 0.4 # Typical absorbance range if 0.2 <= mean_val <= 2.0: score += 0.3 # Absorbance peaks are positive if min_val >= -0.2: score += 0.2 # High max suggests absorbance peaks if max_val >= 1.0: score += 0.1 return score def _analyze_water_bands( self, spectra: np.ndarray ) -> dict: """ Analyze water band directions to distinguish R/T from A. In reflectance/transmittance, water bands show as DIPS (lower values). In absorbance, water bands show as PEAKS (higher values). Returns: Dict mapping SignalType to score adjustment """ hints = {} if self.wavelengths is None or len(self.wavelengths) != spectra.shape[1]: return hints # Get wavelengths in nm for comparison if self.wavelength_unit == "cm-1": # Convert cm-1 to nm wl_nm = 1e7 / self.wavelengths bands_to_check = self.WATER_BANDS_NM else: wl_nm = self.wavelengths bands_to_check = self.WATER_BANDS_NM # Find indices closest to water bands peak_count = 0 dip_count = 0 mean_spectrum = np.nanmean(spectra, axis=0) for band_nm in bands_to_check: # Find closest wavelength index if wl_nm.min() <= band_nm <= wl_nm.max(): idx = np.argmin(np.abs(wl_nm - band_nm)) # Compare to local neighborhood window = 10 start = max(0, idx - window) end = min(len(mean_spectrum), idx + window) local_mean = np.nanmean(mean_spectrum[start:end]) band_value = mean_spectrum[idx] if band_value > local_mean * 1.05: peak_count += 1 # Peak at water band -> absorbance elif band_value < local_mean * 0.95: dip_count += 1 # Dip at water band -> R or T if peak_count > dip_count: hints[SignalType.ABSORBANCE] = 0.3 hints[SignalType.REFLECTANCE] = -0.1 hints[SignalType.TRANSMITTANCE] = -0.1 elif dip_count > peak_count: hints[SignalType.ABSORBANCE] = -0.1 hints[SignalType.REFLECTANCE] = 0.2 hints[SignalType.TRANSMITTANCE] = 0.2 return hints def _build_reason( self, min_val: float, max_val: float, mean_val: float, detected_type: SignalType, confidence: float ) -> str: """Build human-readable detection reason.""" parts = [ f"Range: [{min_val:.3f}, {max_val:.3f}]", f"Mean: {mean_val:.3f}", f"Detected: {detected_type.value}", f"Confidence: {confidence:.1%}" ] return " | ".join(parts)
[docs] def detect_signal_type( spectra: np.ndarray, wavelengths: Optional[np.ndarray] = None, wavelength_unit: str = "nm" ) -> Tuple[SignalType, float, str]: """ Convenience function to detect signal type. Args: spectra: Spectral data array (n_samples, n_features) wavelengths: Optional wavelength values for band analysis wavelength_unit: Unit of wavelengths ("nm" or "cm-1") Returns: Tuple of (SignalType, confidence, reason) Example: >>> spectra = np.random.rand(100, 500) * 0.8 # Values in [0, 0.8] >>> signal_type, confidence, reason = detect_signal_type(spectra) >>> print(f"Detected: {signal_type.value} ({confidence:.0%})") """ detector = SignalTypeDetector(wavelengths, wavelength_unit) return detector.detect(spectra)