Source code for nirs4all.operators.models.sklearn.oplsda

"""Orthogonal PLS Discriminant Analysis (OPLS-DA) classifier for nirs4all.

See pls.py for full documentation and usage examples.
"""
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import LabelEncoder

from .plsda import PLSDA

def _check_pyopls_available():
    """Check if pyopls package is available."""
    try:
        import pyopls
        return True
    except ImportError:
        return False

[docs] class OPLSDA(BaseEstimator, ClassifierMixin): """Orthogonal PLS Discriminant Analysis (OPLS-DA) classifier. # Explicitly declare estimator type for sklearn compatibility (e.g., StackingClassifier) _estimator_type = "classifier" OPLS-DA combines OPLS filtering with PLS-DA classification. It removes Y-orthogonal variation from X before applying PLS-DA, improving class separation and model interpretability. Parameters ---------- n_components : int, default=1 Number of orthogonal components to remove. pls_components : int, default=5 Number of PLS components for the discriminant model. scale : bool, default=True Whether to scale X before fitting. Attributes ---------- classes_ : ndarray of shape (n_classes,) Unique class labels. n_features_in_ : int Number of features seen during fit. opls_ : pyopls.OPLS Fitted OPLS transformer. plsda_ : PLSDA Fitted PLS-DA model on filtered data. Examples -------- >>> from nirs4all.operators.models.sklearn.pls import OPLSDA >>> from sklearn.datasets import make_classification >>> X, y = make_classification(n_samples=100, n_features=50, n_classes=2, ... n_informative=10, random_state=42) >>> model = OPLSDA(n_components=1, pls_components=5) >>> model.fit(X, y) OPLSDA(n_components=1, pls_components=5) >>> predictions = model.predict(X) Notes ----- Requires the `pyopls` package: ``pip install pyopls`` See Also -------- PLSDA : Standard PLS-DA without orthogonal filtering. OPLS : OPLS for regression tasks. References ---------- - Bylesjö, M., et al. (2006). OPLS discriminant analysis: combining the strengths of PLS-DA and SIMCA classification. Journal of Chemometrics, 20(8-10), 341-351. """ def __init__( self, n_components: int = 1, pls_components: int = 5, scale: bool = True, ): """Initialize OPLSDA classifier. Parameters ---------- n_components : int, default=1 Number of orthogonal components to remove. pls_components : int, default=5 Number of PLS components for the discriminant model. scale : bool, default=True Whether to scale X before fitting. """ self.n_components = n_components self.pls_components = pls_components self.scale = scale
[docs] def fit(self, X, y): """Fit the OPLS-DA model. Parameters ---------- X : array-like of shape (n_samples, n_features) Training data. y : array-like of shape (n_samples,) Target class labels. Returns ------- self : OPLSDA Fitted estimator. Raises ------ ImportError If pyopls package is not installed. """ if not _check_pyopls_available(): raise ImportError( "pyopls package is required for OPLSDA. " "Install it with: pip install pyopls" ) from pyopls import OPLS as PyOPLS X = np.asarray(X) y = np.asarray(y).ravel() self.n_features_in_ = X.shape[1] self.classes_ = np.unique(y) # Encode y for OPLS fitting (use numeric encoding) self._label_encoder = LabelEncoder() y_encoded = self._label_encoder.fit_transform(y) # Limit components max_ortho = min(self.n_components, X.shape[1] - 1, X.shape[0] - 2) n_ortho = max(1, max_ortho) # Fit OPLS transformer self.opls_ = PyOPLS(n_components=n_ortho, scale=self.scale) X_filtered = self.opls_.fit_transform(X, y_encoded) # Fit PLS-DA on filtered data self.plsda_ = PLSDA(n_components=self.pls_components) self.plsda_.fit(X_filtered, y) return self
[docs] def predict(self, X): """Predict class labels for samples in X. Parameters ---------- X : array-like of shape (n_samples, n_features) Samples to predict. Returns ------- y_pred : ndarray of shape (n_samples,) Predicted class labels. """ X = np.asarray(X) # Transform X to remove orthogonal variation X_filtered = self.opls_.transform(X) # Predict with PLS-DA return self.plsda_.predict(X_filtered)
[docs] def predict_proba(self, X): """Return pseudo-probabilities (PLS responses). Parameters ---------- X : array-like of shape (n_samples, n_features) Samples. Returns ------- proba : ndarray of shape (n_samples, n_classes) Pseudo-probability estimates. """ X = np.asarray(X) # Transform X to remove orthogonal variation X_filtered = self.opls_.transform(X) # Get probabilities from PLS-DA return self.plsda_.predict_proba(X_filtered)
[docs] def transform(self, X): """Transform X by removing orthogonal variation. Parameters ---------- X : array-like of shape (n_samples, n_features) Samples to transform. Returns ------- X_filtered : ndarray of shape (n_samples, n_features) Transformed samples with orthogonal variation removed. """ X = np.asarray(X) return self.opls_.transform(X)
[docs] def get_params(self, deep=True): """Get parameters for this estimator. Parameters ---------- deep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : dict Parameter names mapped to their values. """ return { 'n_components': self.n_components, 'pls_components': self.pls_components, 'scale': self.scale, }
[docs] def set_params(self, **params): """Set the parameters of this estimator. Parameters ---------- **params : dict Estimator parameters. Returns ------- self : OPLSDA Estimator instance. """ for key, value in params.items(): setattr(self, key, value) return self