Source code for nirs4all.operators.augmentation.splines

import numpy as np
import scipy.interpolate as interpolate

from .abc_augmenter import Augmenter


[docs] def segment_length(x1, y1, x2, y2): """ Compute the length of a line segment given its coordinates. Parameters ---------- x1 : float x-coordinate of the first point. y1 : float y-coordinate of the first point. x2 : float x-coordinate of the second point. y2 : float y-coordinate of the second point. Returns ------- float Length of the line segment. """ return np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
[docs] def X_length(x, y): """ Compute the total length, segment lengths, and cumulative segment lengths of a curve. Vectorized implementation without np.vectorize. Parameters ---------- x : ndarray Array of x-coordinates of the curve. y : ndarray Array of y-coordinates of the curve. Returns ------- tuple A tuple containing the total length, segment lengths, and cumulative segment lengths. """ x1 = x[:-1] y1 = y[:-1] x2 = x[1:] y2 = y[1:] # Vectorized segment length computation (no np.vectorize needed) SpecLen_seg = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) SpecLen = np.sum(SpecLen_seg) SpecLen_seg_cumsum = np.cumsum(SpecLen_seg) return SpecLen, SpecLen_seg, SpecLen_seg_cumsum
[docs] def segment_pt_coord(x1, y1, x2, y2, fracL, L): """ Compute the coordinates of a point on a line segment given the fraction of its length. Parameters ---------- x1 : float x-coordinate of the first point of the line segment. y1 : float y-coordinate of the first point of the line segment. x2 : float x-coordinate of the second point of the line segment. y2 : float y-coordinate of the second point of the line segment. fracL : float Fraction of the length of the line segment. L : float Length of the line segment. Returns ------- tuple A tuple containing the x and y coordinates of the point on the line segment. """ propL = fracL / L xp = x1 + propL * (x2 - x1) yp = y1 + propL * (y2 - y1) return xp, yp
[docs] def interval_selection(n_l, CumVect): """ Select the interval indices that bound a given value in an array. Parameters ---------- n_l : float Value to be bounded. CumVect : ndarray Cumulative array of values. Returns ------- tuple A tuple containing the minimum and maximum indices of the bounding interval. """ i1 = np.where(n_l <= CumVect) i2 = np.where(n_l >= CumVect) return np.min(i1), np.max(i2)
[docs] class Spline_Smoothing(Augmenter): """ Class to apply a smoothing spline to a 1D signal. Parameters ---------- X : ndarray Input data. apply_on : str, optional Apply augmentation on "samples" or "global" (default: "samples"). """
[docs] def augment(self, X, apply_on="samples"): """ Apply a smoothing spline to the data. Optimized implementation with pre-allocated output array. Parameters ---------- X : ndarray Input data. apply_on : str, optional Apply augmentation on "samples" or "global" (default: "samples"). Returns ------- ndarray Augmented data. """ n_samples, n_features = X.shape x_abs = np.arange(n_features) result = np.empty_like(X) s_param = 1 / n_features for i in range(n_samples): spl = interpolate.UnivariateSpline(x_abs, X[i], s=s_param) result[i] = spl(x_abs) return result
[docs] class Spline_X_Perturbations(Augmenter): """ Class to apply a perturbation to a 1D signal using B-spline interpolation. Optimized implementation with pre-generated random parameters. Parameters ---------- X : ndarray Input data. apply_on : str, optional Apply augmentation on "samples" or "global" (default: "samples"). spline_degree : int, optional Degree of the spline. Default is 3 (cubic). perturbation_density : float, optional Density of perturbation points relative to data size. Default is 0.05. perturbation_range : tuple, optional Range of perturbation values (min, max). Default is (-10, 10). """ def __init__(self, apply_on="samples", random_state=None, *, copy=True, spline_degree=3, perturbation_density=0.05, perturbation_range=(-10, 10)): self.spline_degree = spline_degree self.perturbation_density = perturbation_density self.perturbation_range = perturbation_range super().__init__(apply_on, random_state, copy=copy)
[docs] def augment(self, X, apply_on="samples"): """ Augment the data with a perturbation using B-spline interpolation. Optimized with pre-allocated arrays and batch random generation. Parameters ---------- X : ndarray Input data to be augmented. apply_on : str, optional Apply augmentation on "samples" or "global" data. Default is "samples". Returns ------- ndarray Augmented data. """ if not 0 <= self.perturbation_density <= 1: raise ValueError("Perturbation density must be between 0 and 1") n_samples, n_features = X.shape x_range = np.arange(n_features) result = np.empty_like(X) # Get spline representation for first sample to determine perturbation size t, c, k = interpolate.splrep(x_range, X[0], s=0, k=self.spline_degree) delta_x_size = max(int(np.around(len(t) * self.perturbation_density)), 2) delta_x = np.linspace(np.min(x_range), np.max(x_range), delta_x_size) if apply_on == "global": # Single perturbation for all samples delta_y = self.random_gen.uniform( self.perturbation_range[0], self.perturbation_range[1], delta_x_size ) delta = np.interp(t, delta_x, delta_y) t_perturbed = t + delta for i in range(n_samples): t_i, c_i, _ = interpolate.splrep(x_range, X[i], s=0, k=self.spline_degree) perturbed_spline = interpolate.BSpline(t_perturbed, c_i, k, extrapolate=True) result[i] = perturbed_spline(x_range) else: # Pre-generate all random perturbations at once all_delta_y = self.random_gen.uniform( self.perturbation_range[0], self.perturbation_range[1], size=(n_samples, delta_x_size) ) for i in range(n_samples): t_i, c_i, k_i = interpolate.splrep(x_range, X[i], s=0, k=self.spline_degree) delta = np.interp(t_i, delta_x, all_delta_y[i]) t_perturbed = t_i + delta perturbed_spline = interpolate.BSpline(t_perturbed, c_i, k_i, extrapolate=True) result[i] = perturbed_spline(x_range) return result
[docs] class Spline_Y_Perturbations(Augmenter): """ Augment the data with a perturbation on the y-axis using B-spline interpolation. Optimized implementation with pre-generated random parameters. Parameters ---------- X : ndarray Input data. apply_on : str, optional Apply augmentation on "samples" or "global" (default: "samples"). spline_points : int, optional Number of spline points. Default is None (uses sample length / 2). perturbation_intensity : float, optional Intensity of perturbation relative to max value. Default is 0.005. """ def __init__(self, apply_on="samples", random_state=None, *, copy=True, spline_points=None, perturbation_intensity=0.005): self.spline_points = spline_points self.perturbation_intensity = perturbation_intensity super().__init__(apply_on, random_state, copy=copy)
[docs] def augment(self, X, apply_on="samples"): """ Augment the data with a perturbation on the y-axis using B-spline interpolation. Optimized with pre-allocated arrays and batch random generation. Parameters ---------- X : ndarray Input data to be augmented. apply_on : str, optional Apply augmentation on "samples" or "global" data. Default is "samples". Returns ------- ndarray Augmented data. """ n_samples, n_features = X.shape x_range = np.arange(n_features) variation = np.max(X) * self.perturbation_intensity nb_spline_points = int(n_features / 2) if self.spline_points is None else self.spline_points x_points = np.linspace(0, n_features, nb_spline_points) # Pre-generate baseline for all samples (or single for global) baseline = self.random_gen.uniform(-variation, variation) interval_min = -variation + baseline interval_max = variation + baseline if apply_on == "global": # Single distortion for all samples y_points = self.random_gen.uniform(interval_min, interval_max, nb_spline_points) x_gen = np.sort(x_points) t, c, k = interpolate.splrep(x_gen, y_points, s=0, k=3) spline = interpolate.BSpline(t, c, k, extrapolate=False) distor = spline(x_range) return X + distor # Pre-generate all random y_points at once for all samples all_y_points = self.random_gen.uniform( interval_min, interval_max, size=(n_samples, nb_spline_points) ) result = np.empty_like(X) x_gen = np.sort(x_points) for i in range(n_samples): y_points = all_y_points[i] t, c, k = interpolate.splrep(x_gen, y_points, s=0, k=3) spline = interpolate.BSpline(t, c, k, extrapolate=False) distor = spline(x_range) result[i] = X[i] + distor return result
[docs] class Spline_X_Simplification(Augmenter): """ Class to simplify a 1D signal using B-spline interpolation along the x-axis. Optimized implementation with pre-generated random parameters. Parameters ---------- X : ndarray Input data. apply_on : str, optional Apply augmentation on "samples" or "global" (default: "samples"). spline_points : int, optional Number of spline points for simplification. Default is None: the length of the sample / 4. uniform : bool, optional If True, the spline points are uniformly spaced. Default is False. """ def __init__(self, apply_on="samples", random_state=None, *, copy=True, spline_points=None, uniform=False): self.spline_points = spline_points self.uniform = uniform super().__init__(apply_on, random_state, copy=copy)
[docs] def augment(self, X, apply_on="samples"): """ Select randomly spaced points along the x-axis and adjust a spline. Optimized with pre-allocated arrays and batch random generation. Parameters ---------- X : ndarray Input data. apply_on : str, optional Apply augmentation on "samples" or "global" (default: "samples"). Returns ------- ndarray Augmented data. """ n_samples, n_features = X.shape x_range = np.arange(n_features) nb_points = self.spline_points if self.spline_points is not None else int(n_features / 4) result = np.empty_like(X) if self.uniform: # Uniform points are the same for all samples ctrl_points = np.linspace(0, n_features - 1, nb_points).astype(int) for i in range(n_samples): if apply_on == "samples": # Still use same uniform points for each sample pass x_subrange = x_range[ctrl_points] y = X[i, ctrl_points] t, c, k = interpolate.splrep(x_subrange, y, s=0, k=3) spline = interpolate.BSpline(t, c, k, extrapolate=False) result[i] = spline(x_range) else: if apply_on == "global": # Same random control points for all samples ctrl_points = np.unique(np.concatenate(( [0], self.random_gen.choice(range(n_features), nb_points, replace=False), [n_features - 1] ))) for i in range(n_samples): x_subrange = x_range[ctrl_points] y = X[i, ctrl_points] t, c, k = interpolate.splrep(x_subrange, y, s=0, k=3) spline = interpolate.BSpline(t, c, k, extrapolate=False) result[i] = spline(x_range) else: # Pre-generate random control points for all samples # Note: Each sample gets different random points for i in range(n_samples): ctrl_points = np.unique(np.concatenate(( [0], self.random_gen.choice(range(n_features), nb_points, replace=False), [n_features - 1] ))) x_subrange = x_range[ctrl_points] y = X[i, ctrl_points] t, c, k = interpolate.splrep(x_subrange, y, s=0, k=3) spline = interpolate.BSpline(t, c, k, extrapolate=False) result[i] = spline(x_range) return result
[docs] class Spline_Curve_Simplification(Augmenter): """ Class to simplify a 1D signal using B-spline interpolation along the curve. Optimized implementation with pre-allocated output arrays. Parameters ---------- X : ndarray Input data. apply_on : str, optional Apply augmentation on "samples" or "global" (default: "samples"). spline_points : int, optional Number of spline points for simplification. Default is None: the length of the sample / 4. uniform : bool, optional If True, the spline points are uniformly spaced. Default is False. """ def __init__(self, apply_on="samples", random_state=None, *, copy=True, spline_points=None, uniform=False): self.spline_points = spline_points self.uniform = uniform super().__init__(apply_on, random_state, copy=copy)
[docs] def augment(self, X, apply_on="samples"): """ Select regularly spaced points on the x-axis and adjust a spline. Optimized with pre-allocated output array. Parameters ---------- X : ndarray Input data. apply_on : str, optional Apply augmentation on "samples" or "features" (default: "samples"). Returns ------- ndarray Augmented data. """ n_samples, n_features = X.shape nb_points = self.spline_points if self.spline_points is not None else int(n_features / 4) x = np.arange(n_features) simplified_X = np.empty_like(X) if self.uniform: control_point_indices = np.linspace(0, n_features - 1, nb_points).astype(int) else: control_point_indices = np.unique(np.concatenate(( [0], self.random_gen.choice(range(n_features), nb_points, replace=False), [n_features - 1] ))) for i in range(n_samples): if apply_on == "samples" and not self.uniform: control_point_indices = np.unique(np.concatenate(( [0], self.random_gen.choice(range(n_features), nb_points, replace=False), [n_features - 1] ))) control_point_indices = np.unique(control_point_indices) y = X[i] # Fit a cubic B-spline to the control points t, c, k = interpolate.splrep(x[control_point_indices], y[control_point_indices], s=0, k=3) # Evaluate the B-spline at all wavelengths to get simplified signal simplified_X[i] = interpolate.BSpline(t, c, k, extrapolate=False)(x) return simplified_X