Source code for nirs4all.data.synthetic.exporter

"""
Dataset export utilities for synthetic NIRS data.

This module provides tools for exporting synthetic datasets to various
file formats and folder structures compatible with nirs4all loaders.

Key Features:
    - Export to CSV files (single or multi-file format)
    - Export to nirs4all standard folder structure (Xcal, Ycal, Xval, Yval)
    - Export with metadata (sample IDs, groups, etc.)
    - Generate CSV variations for loader testing

Example:
    >>> from nirs4all.data.synthetic import SyntheticDatasetBuilder, DatasetExporter
    >>>
    >>> builder = SyntheticDatasetBuilder(n_samples=1000, random_state=42)
    >>> X, y = builder.build_arrays()
    >>>
    >>> exporter = DatasetExporter()
    >>> path = exporter.to_folder(
    ...     "output/synthetic_data",
    ...     X, y,
    ...     train_ratio=0.8,
    ...     wavelengths=builder.state._wavelengths
    ... )
"""

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Tuple, Union

import numpy as np

try:
    import pandas as pd
    HAS_PANDAS = True
except ImportError:
    HAS_PANDAS = False



[docs]
@dataclass
class ExportConfig:
    """
    Configuration for dataset export.

    Attributes:
        format: Export format ('standard', 'single', 'fragmented').
            - 'standard': Separate Xcal, Ycal, Xval, Yval files.
            - 'single': All data in one file with partition column.
            - 'fragmented': Multiple small files (for loader testing).
        separator: CSV delimiter character.
        float_precision: Decimal precision for floating point values.
        include_headers: Whether to include column headers in CSV.
        include_index: Whether to include row index.
        compression: Optional compression ('gzip', 'zip', None).
        file_extension: File extension to use.
    """

    format: Literal["standard", "single", "fragmented"] = "standard"
    separator: str = ";"
    float_precision: int = 6
    include_headers: bool = True
    include_index: bool = False
    compression: Optional[Literal["gzip", "zip"]] = None
    file_extension: str = ".csv"




[docs]
class DatasetExporter:
    """
    Export synthetic datasets to various file formats.

    This class provides methods for exporting synthetic NIRS datasets
    to files and folders compatible with nirs4all's data loaders.

    Attributes:
        config: Export configuration settings.

    Args:
        config: Optional ExportConfig. Uses defaults if None.

    Example:
        >>> exporter = DatasetExporter()
        >>>
        >>> # Export to standard folder structure
        >>> path = exporter.to_folder(
        ...     "output/data",
        ...     X, y,
        ...     train_ratio=0.8,
        ...     wavelengths=wavelengths
        ... )
        >>>
        >>> # Export to single CSV
        >>> path = exporter.to_csv(
        ...     "output/all_data.csv",
        ...     X, y,
        ...     wavelengths=wavelengths
        ... )
    """

    def __init__(self, config: Optional[ExportConfig] = None) -> None:
        """
        Initialize the exporter.

        Args:
            config: Export configuration. Uses defaults if None.
        """
        self.config = config or ExportConfig()


[docs]
    def to_folder(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        train_ratio: float = 0.8,
        wavelengths: Optional[np.ndarray] = None,
        metadata: Optional[Dict[str, np.ndarray]] = None,
        random_state: Optional[int] = None,
        format: Optional[Literal["standard", "single", "fragmented"]] = None,
    ) -> Path:
        """
        Export dataset to a folder structure.

        Creates a folder with CSV files compatible with nirs4all's
        DatasetConfigs loader.

        Args:
            path: Output folder path.
            X: Feature matrix (n_samples, n_features).
            y: Target values (n_samples,) or (n_samples, n_targets).
            train_ratio: Proportion for training set.
            wavelengths: Optional wavelength values for column headers.
            metadata: Optional dict of metadata arrays (same length as X).
            random_state: Random seed for train/test split.
            format: Override config format for this export.

        Returns:
            Path to created folder.

        Raises:
            ValueError: If X and y have incompatible shapes.
            ImportError: If pandas is not available.

        Example:
            >>> exporter.to_folder(
            ...     "data/synthetic",
            ...     X, y,
            ...     train_ratio=0.8,
            ...     wavelengths=np.arange(1000, 2500, 2)
            ... )
        """
        if not HAS_PANDAS:
            raise ImportError("pandas is required for CSV export")

        path = Path(path)
        path.mkdir(parents=True, exist_ok=True)

        # Validate inputs
        if X.shape[0] != y.shape[0]:
            raise ValueError(
                f"X and y must have same number of samples. "
                f"Got X: {X.shape[0]}, y: {y.shape[0]}"
            )

        export_format = format or self.config.format

        if export_format == "standard":
            return self._export_standard(path, X, y, train_ratio, wavelengths, metadata, random_state)
        elif export_format == "single":
            return self._export_single(path, X, y, train_ratio, wavelengths, metadata, random_state)
        elif export_format == "fragmented":
            return self._export_fragmented(path, X, y, train_ratio, wavelengths, metadata, random_state)
        else:
            raise ValueError(f"Unknown format: {export_format}")


    def _export_standard(
        self,
        path: Path,
        X: np.ndarray,
        y: np.ndarray,
        train_ratio: float,
        wavelengths: Optional[np.ndarray],
        metadata: Optional[Dict[str, np.ndarray]],
        random_state: Optional[int],
    ) -> Path:
        """Export to standard Xcal/Ycal/Xval/Yval structure."""
        n_samples = X.shape[0]
        n_train = int(n_samples * train_ratio)

        # Create train/test split
        rng = np.random.default_rng(random_state)
        indices = rng.permutation(n_samples)
        train_idx = indices[:n_train]
        test_idx = indices[n_train:]

        # Create feature column names
        if wavelengths is not None:
            columns = [str(int(wl)) for wl in wavelengths]
        else:
            columns = [f"feature_{i}" for i in range(X.shape[1])]

        # Create target column names
        if y.ndim == 1:
            y = y.reshape(-1, 1)
        y_columns = [f"target_{i}" if y.shape[1] > 1 else "target" for i in range(y.shape[1])]
        if y.shape[1] == 1:
            y_columns = ["target"]

        # Export training data
        X_train = pd.DataFrame(X[train_idx], columns=columns)
        X_train.to_csv(
            path / f"Xcal{self.config.file_extension}",
            sep=self.config.separator,
            index=self.config.include_index,
            float_format=f"%.{self.config.float_precision}f",
        )

        y_train = pd.DataFrame(y[train_idx], columns=y_columns)
        y_train.to_csv(
            path / f"Ycal{self.config.file_extension}",
            sep=self.config.separator,
            index=self.config.include_index,
            float_format=f"%.{self.config.float_precision}f",
        )

        # Export test data
        if len(test_idx) > 0:
            X_test = pd.DataFrame(X[test_idx], columns=columns)
            X_test.to_csv(
                path / f"Xval{self.config.file_extension}",
                sep=self.config.separator,
                index=self.config.include_index,
                float_format=f"%.{self.config.float_precision}f",
            )

            y_test = pd.DataFrame(y[test_idx], columns=y_columns)
            y_test.to_csv(
                path / f"Yval{self.config.file_extension}",
                sep=self.config.separator,
                index=self.config.include_index,
                float_format=f"%.{self.config.float_precision}f",
            )

        # Export metadata if provided
        if metadata:
            self._export_metadata(path, metadata, train_idx, test_idx)

        return path

    def _export_single(
        self,
        path: Path,
        X: np.ndarray,
        y: np.ndarray,
        train_ratio: float,
        wavelengths: Optional[np.ndarray],
        metadata: Optional[Dict[str, np.ndarray]],
        random_state: Optional[int],
    ) -> Path:
        """Export all data to a single CSV file with partition column."""
        n_samples = X.shape[0]
        n_train = int(n_samples * train_ratio)

        # Create train/test split
        rng = np.random.default_rng(random_state)
        indices = rng.permutation(n_samples)
        train_idx = indices[:n_train]
        test_idx = indices[n_train:]

        # Create feature column names
        if wavelengths is not None:
            feature_columns = [str(int(wl)) for wl in wavelengths]
        else:
            feature_columns = [f"feature_{i}" for i in range(X.shape[1])]

        # Ensure y is 2D
        if y.ndim == 1:
            y = y.reshape(-1, 1)
        y_columns = [f"target_{i}" if y.shape[1] > 1 else "target" for i in range(y.shape[1])]
        if y.shape[1] == 1:
            y_columns = ["target"]

        # Build combined DataFrame
        data = {}

        # Add partition column
        partition = np.array(["train"] * n_samples)
        partition[test_idx] = "test"
        # Reorder by indices
        reorder = np.zeros(n_samples, dtype=int)
        reorder[indices] = np.arange(n_samples)
        data["partition"] = partition

        # Add sample IDs if in metadata
        if metadata and "sample_id" in metadata:
            data["sample_id"] = metadata["sample_id"]

        # Add features
        for i, col in enumerate(feature_columns):
            data[col] = X[:, i]

        # Add targets
        for i, col in enumerate(y_columns):
            data[col] = y[:, i]

        # Add remaining metadata
        if metadata:
            for key, values in metadata.items():
                if key != "sample_id":  # Already added
                    data[key] = values

        df = pd.DataFrame(data)
        df.to_csv(
            path / f"data{self.config.file_extension}",
            sep=self.config.separator,
            index=self.config.include_index,
            float_format=f"%.{self.config.float_precision}f",
        )

        return path

    def _export_fragmented(
        self,
        path: Path,
        X: np.ndarray,
        y: np.ndarray,
        train_ratio: float,
        wavelengths: Optional[np.ndarray],
        metadata: Optional[Dict[str, np.ndarray]],
        random_state: Optional[int],
    ) -> Path:
        """Export to multiple small files (for loader testing)."""
        n_samples = X.shape[0]
        n_train = int(n_samples * train_ratio)

        # Create train/test split
        rng = np.random.default_rng(random_state)
        indices = rng.permutation(n_samples)
        train_idx = indices[:n_train]
        test_idx = indices[n_train:]

        # Create feature column names
        if wavelengths is not None:
            columns = [str(int(wl)) for wl in wavelengths]
        else:
            columns = [f"feature_{i}" for i in range(X.shape[1])]

        # Ensure y is 2D
        if y.ndim == 1:
            y = y.reshape(-1, 1)
        y_columns = [f"target_{i}" if y.shape[1] > 1 else "target" for i in range(y.shape[1])]
        if y.shape[1] == 1:
            y_columns = ["target"]

        # Create train folder with fragmented files
        train_path = path / "train"
        train_path.mkdir(parents=True, exist_ok=True)

        # Split training data into chunks
        chunk_size = max(10, len(train_idx) // 5)  # At least 5 chunks
        for i, start in enumerate(range(0, len(train_idx), chunk_size)):
            end = min(start + chunk_size, len(train_idx))
            chunk_idx = train_idx[start:end]

            # Export features
            df_x = pd.DataFrame(X[chunk_idx], columns=columns)
            df_x.to_csv(
                train_path / f"X_part{i}{self.config.file_extension}",
                sep=self.config.separator,
                index=self.config.include_index,
                float_format=f"%.{self.config.float_precision}f",
            )

            # Export targets
            df_y = pd.DataFrame(y[chunk_idx], columns=y_columns)
            df_y.to_csv(
                train_path / f"Y_part{i}{self.config.file_extension}",
                sep=self.config.separator,
                index=self.config.include_index,
                float_format=f"%.{self.config.float_precision}f",
            )

        # Create test folder
        if len(test_idx) > 0:
            test_path = path / "test"
            test_path.mkdir(parents=True, exist_ok=True)

            df_x = pd.DataFrame(X[test_idx], columns=columns)
            df_x.to_csv(
                test_path / f"X{self.config.file_extension}",
                sep=self.config.separator,
                index=self.config.include_index,
                float_format=f"%.{self.config.float_precision}f",
            )

            df_y = pd.DataFrame(y[test_idx], columns=y_columns)
            df_y.to_csv(
                test_path / f"Y{self.config.file_extension}",
                sep=self.config.separator,
                index=self.config.include_index,
                float_format=f"%.{self.config.float_precision}f",
            )

        return path

    def _export_metadata(
        self,
        path: Path,
        metadata: Dict[str, np.ndarray],
        train_idx: np.ndarray,
        test_idx: np.ndarray,
    ) -> None:
        """Export metadata to separate CSV files."""
        meta_df = pd.DataFrame(metadata)

        # Training metadata
        meta_train = meta_df.iloc[train_idx]
        meta_train.to_csv(
            path / f"metadata_cal{self.config.file_extension}",
            sep=self.config.separator,
            index=self.config.include_index,
        )

        # Test metadata
        if len(test_idx) > 0:
            meta_test = meta_df.iloc[test_idx]
            meta_test.to_csv(
                path / f"metadata_val{self.config.file_extension}",
                sep=self.config.separator,
                index=self.config.include_index,
            )


[docs]
    def to_csv(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        wavelengths: Optional[np.ndarray] = None,
        metadata: Optional[Dict[str, np.ndarray]] = None,
        include_targets: bool = True,
    ) -> Path:
        """
        Export dataset to a single CSV file.

        Creates a CSV file with features (and optionally targets) combined.

        Args:
            path: Output file path.
            X: Feature matrix (n_samples, n_features).
            y: Target values (n_samples,) or (n_samples, n_targets).
            wavelengths: Optional wavelength values for column headers.
            metadata: Optional dict of metadata arrays.
            include_targets: Whether to include target column(s).

        Returns:
            Path to created file.

        Example:
            >>> exporter.to_csv("data.csv", X, y, wavelengths=wavelengths)
        """
        if not HAS_PANDAS:
            raise ImportError("pandas is required for CSV export")

        path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)

        # Validate inputs
        if X.shape[0] != y.shape[0]:
            raise ValueError(
                f"X and y must have same number of samples. "
                f"Got X: {X.shape[0]}, y: {y.shape[0]}"
            )

        # Create feature column names
        if wavelengths is not None:
            feature_columns = [str(int(wl)) for wl in wavelengths]
        else:
            feature_columns = [f"feature_{i}" for i in range(X.shape[1])]

        # Build DataFrame
        data = {}

        # Add metadata first (sample IDs, etc.)
        if metadata:
            for key, values in metadata.items():
                data[key] = values

        # Add features
        for i, col in enumerate(feature_columns):
            data[col] = X[:, i]

        # Add targets
        if include_targets:
            if y.ndim == 1:
                data["target"] = y
            else:
                for i in range(y.shape[1]):
                    data[f"target_{i}" if y.shape[1] > 1 else "target"] = y[:, i]

        df = pd.DataFrame(data)
        df.to_csv(
            path,
            sep=self.config.separator,
            index=self.config.include_index,
            float_format=f"%.{self.config.float_precision}f",
        )

        return path



[docs]
    def to_numpy(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        wavelengths: Optional[np.ndarray] = None,
        compressed: bool = False,
    ) -> Path:
        """
        Export dataset to numpy .npy or .npz format.

        Args:
            path: Output file path (without extension).
            X: Feature matrix (n_samples, n_features).
            y: Target values.
            wavelengths: Optional wavelength values.
            compressed: Whether to use compressed format (.npz).

        Returns:
            Path to created file.

        Example:
            >>> exporter.to_numpy("data", X, y, compressed=True)
        """
        path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)

        arrays = {"X": X, "y": y}
        if wavelengths is not None:
            arrays["wavelengths"] = wavelengths

        if compressed:
            save_path = path.with_suffix(".npz")
            np.savez_compressed(save_path, **arrays)
        else:
            save_path = path.with_suffix(".npz")
            np.savez(save_path, **arrays)

        return save_path





[docs]
class CSVVariationGenerator:
    """
    Generate CSV files with various format variations for loader testing.

    This class creates CSV files with different delimiters, encodings,
    header formats, and other variations to test the robustness of
    CSV loaders.

    Attributes:
        base_exporter: DatasetExporter for actual file writing.

    Example:
        >>> generator = CSVVariationGenerator()
        >>>
        >>> # Generate all variations
        >>> paths = generator.generate_all_variations(
        ...     "test_data",
        ...     X, y,
        ...     wavelengths=wavelengths
        ... )
        >>>
        >>> # Generate specific variation
        >>> path = generator.with_semicolon_delimiter(
        ...     "data_semicolon",
        ...     X, y
        ... )
    """

    def __init__(self) -> None:
        """Initialize the variation generator."""
        self.base_exporter = DatasetExporter()


[docs]
    def generate_all_variations(
        self,
        base_path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        wavelengths: Optional[np.ndarray] = None,
        train_ratio: float = 0.8,
        random_state: Optional[int] = None,
    ) -> Dict[str, Path]:
        """
        Generate CSV files with all format variations.

        Creates multiple versions of the dataset with different CSV
        format options for comprehensive loader testing.

        Args:
            base_path: Base output folder path.
            X: Feature matrix.
            y: Target values.
            wavelengths: Optional wavelength values.
            train_ratio: Train/test split ratio.
            random_state: Random seed.

        Returns:
            Dictionary mapping variation name to created path.

        Example:
            >>> paths = generator.generate_all_variations(
            ...     "test_variations",
            ...     X, y,
            ...     random_state=42
            ... )
            >>> print(paths.keys())
        """
        base_path = Path(base_path)
        base_path.mkdir(parents=True, exist_ok=True)

        paths = {}

        # Standard format (semicolon separator)
        paths["standard_semicolon"] = self.with_semicolon_delimiter(
            base_path / "standard_semicolon",
            X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )

        # Comma separator
        paths["comma_separated"] = self.with_comma_delimiter(
            base_path / "comma_separated",
            X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )

        # Tab separated
        paths["tab_separated"] = self.with_tab_delimiter(
            base_path / "tab_separated",
            X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )

        # No headers
        paths["no_headers"] = self.without_headers(
            base_path / "no_headers",
            X, y,
            train_ratio=train_ratio,
            random_state=random_state,
        )

        # With index
        paths["with_index"] = self.with_row_index(
            base_path / "with_index",
            X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )

        # Single file format
        paths["single_file"] = self.as_single_file(
            base_path / "single_file",
            X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )

        # Fragmented files
        paths["fragmented"] = self.as_fragmented(
            base_path / "fragmented",
            X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )

        # Low precision
        paths["low_precision"] = self.with_precision(
            base_path / "low_precision",
            X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
            precision=2,
        )

        # High precision
        paths["high_precision"] = self.with_precision(
            base_path / "high_precision",
            X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
            precision=10,
        )

        return paths



[docs]
    def with_semicolon_delimiter(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        wavelengths: Optional[np.ndarray] = None,
        train_ratio: float = 0.8,
        random_state: Optional[int] = None,
    ) -> Path:
        """Create CSV with semicolon delimiter (nirs4all default)."""
        config = ExportConfig(separator=";")
        exporter = DatasetExporter(config)
        return exporter.to_folder(
            path, X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )



[docs]
    def with_comma_delimiter(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        wavelengths: Optional[np.ndarray] = None,
        train_ratio: float = 0.8,
        random_state: Optional[int] = None,
    ) -> Path:
        """Create CSV with comma delimiter."""
        config = ExportConfig(separator=",")
        exporter = DatasetExporter(config)
        return exporter.to_folder(
            path, X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )



[docs]
    def with_tab_delimiter(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        wavelengths: Optional[np.ndarray] = None,
        train_ratio: float = 0.8,
        random_state: Optional[int] = None,
    ) -> Path:
        """Create CSV with tab delimiter."""
        config = ExportConfig(separator="\t", file_extension=".tsv")
        exporter = DatasetExporter(config)
        return exporter.to_folder(
            path, X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )



[docs]
    def without_headers(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        train_ratio: float = 0.8,
        random_state: Optional[int] = None,
    ) -> Path:
        """Create CSV without column headers."""
        config = ExportConfig(include_headers=False)
        exporter = DatasetExporter(config)

        # Need to manually write since pandas always writes headers by default
        path = Path(path)
        path.mkdir(parents=True, exist_ok=True)

        n_samples = X.shape[0]
        n_train = int(n_samples * train_ratio)

        rng = np.random.default_rng(random_state)
        indices = rng.permutation(n_samples)
        train_idx = indices[:n_train]
        test_idx = indices[n_train:]

        # Ensure y is 2D
        if y.ndim == 1:
            y_2d = y.reshape(-1, 1)
        else:
            y_2d = y

        np.savetxt(
            path / "Xcal.csv",
            X[train_idx],
            delimiter=config.separator,
            fmt=f"%.{config.float_precision}f",
        )
        np.savetxt(
            path / "Ycal.csv",
            y_2d[train_idx],
            delimiter=config.separator,
            fmt=f"%.{config.float_precision}f",
        )

        if len(test_idx) > 0:
            np.savetxt(
                path / "Xval.csv",
                X[test_idx],
                delimiter=config.separator,
                fmt=f"%.{config.float_precision}f",
            )
            np.savetxt(
                path / "Yval.csv",
                y_2d[test_idx],
                delimiter=config.separator,
                fmt=f"%.{config.float_precision}f",
            )

        return path



[docs]
    def with_row_index(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        wavelengths: Optional[np.ndarray] = None,
        train_ratio: float = 0.8,
        random_state: Optional[int] = None,
    ) -> Path:
        """Create CSV with row index column."""
        config = ExportConfig(include_index=True)
        exporter = DatasetExporter(config)
        return exporter.to_folder(
            path, X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )



[docs]
    def as_single_file(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        wavelengths: Optional[np.ndarray] = None,
        train_ratio: float = 0.8,
        random_state: Optional[int] = None,
    ) -> Path:
        """Create single CSV file with all data and partition column."""
        config = ExportConfig(format="single")
        exporter = DatasetExporter(config)
        return exporter.to_folder(
            path, X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
            format="single",
        )



[docs]
    def as_fragmented(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        wavelengths: Optional[np.ndarray] = None,
        train_ratio: float = 0.8,
        random_state: Optional[int] = None,
    ) -> Path:
        """Create fragmented dataset with multiple small files."""
        exporter = DatasetExporter()
        return exporter.to_folder(
            path, X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
            format="fragmented",
        )



[docs]
    def with_precision(
        self,
        path: Union[str, Path],
        X: np.ndarray,
        y: np.ndarray,
        *,
        wavelengths: Optional[np.ndarray] = None,
        train_ratio: float = 0.8,
        random_state: Optional[int] = None,
        precision: int = 6,
    ) -> Path:
        """Create CSV with specified floating point precision."""
        config = ExportConfig(float_precision=precision)
        exporter = DatasetExporter(config)
        return exporter.to_folder(
            path, X, y,
            wavelengths=wavelengths,
            train_ratio=train_ratio,
            random_state=random_state,
        )





[docs]
def export_to_folder(
    path: Union[str, Path],
    X: np.ndarray,
    y: np.ndarray,
    *,
    train_ratio: float = 0.8,
    wavelengths: Optional[np.ndarray] = None,
    format: Literal["standard", "single", "fragmented"] = "standard",
    random_state: Optional[int] = None,
) -> Path:
    """
    Quick function to export synthetic data to folder.

    Convenience function for simple export use cases.

    Args:
        path: Output folder path.
        X: Feature matrix.
        y: Target values.
        train_ratio: Train/test split ratio.
        wavelengths: Optional wavelength values.
        format: Export format.
        random_state: Random seed.

    Returns:
        Path to created folder.

    Example:
        >>> path = export_to_folder(
        ...     "data/synthetic",
        ...     X, y,
        ...     train_ratio=0.8,
        ...     wavelengths=wavelengths
        ... )
    """
    exporter = DatasetExporter()
    return exporter.to_folder(
        path, X, y,
        train_ratio=train_ratio,
        wavelengths=wavelengths,
        format=format,
        random_state=random_state,
    )




[docs]
def export_to_csv(
    path: Union[str, Path],
    X: np.ndarray,
    y: np.ndarray,
    *,
    wavelengths: Optional[np.ndarray] = None,
) -> Path:
    """
    Quick function to export synthetic data to single CSV.

    Args:
        path: Output file path.
        X: Feature matrix.
        y: Target values.
        wavelengths: Optional wavelength values.

    Returns:
        Path to created file.

    Example:
        >>> path = export_to_csv("data.csv", X, y)
    """
    exporter = DatasetExporter()
    return exporter.to_csv(path, X, y, wavelengths=wavelengths)