Source code for nirs4all.cli.commands.workspace

"""
Workspace management CLI commands for nirs4all.

Provides commands for workspace initialization, run management, catalog queries,
and library operations.
"""

import argparse
import sys
from pathlib import Path
from typing import Optional

from nirs4all.core.logging import get_logger

logger = get_logger(__name__)



[docs]
def workspace_init(args):
    """Initialize a new workspace."""
    from nirs4all.workspace import WorkspaceManager

    workspace_path = Path(args.path)
    ws = WorkspaceManager(workspace_path)
    ws.initialize_workspace()

    logger.success(f"Workspace initialized at: {workspace_path}")
    logger.info("  Created directories:")
    logger.info("    - runs/")
    logger.info("    - exports/full_pipelines/")
    logger.info("    - exports/best_predictions/")
    logger.info("    - library/templates/")
    logger.info("    - library/trained/filtered/")
    logger.info("    - library/trained/pipeline/")
    logger.info("    - library/trained/fullrun/")
    logger.info("    - catalog/")




[docs]
def workspace_list_runs(args):
    """List all runs in workspace."""
    from nirs4all.workspace import WorkspaceManager

    workspace_path = Path(args.workspace)
    ws = WorkspaceManager(workspace_path)

    runs = ws.list_runs()

    if not runs:
        logger.info("No runs found in workspace.")
        return

    logger.info(f"Found {len(runs)} run(s):\n")
    for run_info in runs:
        logger.info(f"  {run_info['name']}")
        logger.info(f"    Dataset: {run_info['dataset']}")
        logger.info(f"    Date: {run_info['date']}")
        if run_info.get('custom_name'):
            logger.info(f"    Custom name: {run_info['custom_name']}")
        logger.info("")




[docs]
def workspace_query_best(args):
    """Query best pipelines from catalog."""
    from nirs4all.data.predictions import Predictions

    workspace_path = Path(args.workspace)
    catalog_dir = workspace_path / "catalog"

    if not catalog_dir.exists():
        logger.error(f"Catalog not found at {catalog_dir}")
        logger.info("Run pipelines and archive predictions first.")
        sys.exit(1)

    meta_file = catalog_dir / "predictions_meta.parquet"
    if not meta_file.exists():
        logger.error("No predictions in catalog.")
        logger.info("Archive pipeline predictions using Predictions.archive_to_catalog()")
        sys.exit(1)

    # Load predictions from catalog
    try:
        preds = Predictions.load_from_parquet(catalog_dir)
        logger.success(f"Loaded {preds._df.height} predictions from catalog\n")
    except Exception as e:
        logger.error(f"Error loading catalog: {e}")
        sys.exit(1)

    # Query best
    best = preds.query_best(
        dataset_name=args.dataset,
        metric=args.metric,
        n=args.n,
        ascending=args.ascending
    )

    if best.height == 0:
        logger.info("No predictions found matching criteria.")
        return

    # Display results
    logger.info(f"Top {args.n} pipelines by {args.metric}:")
    logger.info(f"{'='*80}\n")

    # Convert to pandas for nice display
    df = best.to_pandas()
    logger.info(df.to_string(index=False))




[docs]
def workspace_query_filter(args):
    """Filter predictions by criteria."""
    from nirs4all.data.predictions import Predictions

    workspace_path = Path(args.workspace)
    catalog_dir = workspace_path / "catalog"

    if not catalog_dir.exists():
        logger.error(f"Catalog not found at {catalog_dir}")
        sys.exit(1)

    # Load predictions
    preds = Predictions.load_from_parquet(catalog_dir)

    # Build metric thresholds
    thresholds = {}
    if args.test_score:
        thresholds['test_score'] = args.test_score
    if args.train_score:
        thresholds['train_score'] = args.train_score
    if args.val_score:
        thresholds['val_score'] = args.val_score

    # Apply filters
    filtered = preds.filter_by_criteria(
        dataset_name=args.dataset,
        metric_thresholds=thresholds if thresholds else None
    )

    logger.info(f"Found {filtered.height} predictions matching criteria\n")

    if filtered.height > 0:
        df = filtered.to_pandas()
        logger.info(df.to_string(index=False))




[docs]
def workspace_stats(args):
    """Show catalog statistics."""
    from nirs4all.data.predictions import Predictions

    workspace_path = Path(args.workspace)
    catalog_dir = workspace_path / "catalog"

    if not catalog_dir.exists():
        logger.error(f"Catalog not found at {catalog_dir}")
        sys.exit(1)

    # Load predictions
    preds = Predictions.load_from_parquet(catalog_dir)

    logger.info("Catalog Statistics")
    logger.info(f"{'='*60}\n")
    logger.info(f"Total predictions: {preds._df.height}")

    # Datasets
    if 'dataset_name' in preds._df.columns:
        datasets = preds._df['dataset_name'].unique().to_list()
        logger.info(f"Datasets: {len(datasets)}")
        for ds in datasets:
            count = preds._df.filter(preds._df['dataset_name'] == ds).height
            logger.info(f"  - {ds}: {count} predictions")

    logger.info("")

    # Metric statistics
    metric = args.metric
    if metric in preds._df.columns:
        stats = preds.get_summary_stats(metric=metric)
        logger.info(f"{metric} statistics:")
        logger.info(f"  Min:    {stats['min']:.4f}")
        logger.info(f"  Max:    {stats['max']:.4f}")
        logger.info(f"  Mean:   {stats['mean']:.4f}")
        logger.info(f"  Median: {stats['median']:.4f}")
        logger.info(f"  Std:    {stats['std']:.4f}")




[docs]
def workspace_list_library(args):
    """List items in library."""
    from nirs4all.workspace import LibraryManager

    workspace_path = Path(args.workspace)
    library_dir = workspace_path / "library"

    if not library_dir.exists():
        logger.error(f"Library not found at {library_dir}")
        sys.exit(1)

    library = LibraryManager(library_dir)

    # List templates
    templates = library.list_templates()
    logger.info(f"Templates: {len(templates)}")
    for t in templates:
        logger.info(f"  - {t['name']}: {t.get('description', 'No description')}")
    logger.info("")

    # List filtered
    filtered = library.list_filtered()
    logger.info(f"Filtered pipelines: {len(filtered)}")
    for f in filtered:
        logger.info(f"  - {f['name']}: {f.get('description', 'No description')}")
    logger.info("")

    # List full pipelines
    pipelines = library.list_pipelines()
    logger.info(f"Full pipelines: {len(pipelines)}")
    for p in pipelines:
        logger.info(f"  - {p['name']}: {p.get('description', 'No description')}")
    logger.info("")

    # List full runs
    fullruns = library.list_fullruns()
    logger.info(f"Full runs: {len(fullruns)}")
    for r in fullruns:
        logger.info(f"  - {r['name']}: {r.get('description', 'No description')}")




[docs]
def add_workspace_commands(subparsers):
    """Add workspace commands to CLI."""

    # Workspace command group
    workspace = subparsers.add_parser(
        'workspace',
        help='Workspace management commands'
    )
    workspace_subparsers = workspace.add_subparsers(dest='workspace_command')

    # workspace init
    init_parser = workspace_subparsers.add_parser(
        'init',
        help='Initialize a new workspace'
    )
    init_parser.add_argument(
        'path',
        type=str,
        help='Path to workspace directory'
    )
    init_parser.set_defaults(func=workspace_init)

    # workspace list-runs
    list_runs_parser = workspace_subparsers.add_parser(
        'list-runs',
        help='List all runs in workspace'
    )
    list_runs_parser.add_argument(
        '--workspace',
        type=str,
        default='workspace',
        help='Workspace root directory (default: workspace)'
    )
    list_runs_parser.set_defaults(func=workspace_list_runs)

    # workspace query-best
    query_best_parser = workspace_subparsers.add_parser(
        'query-best',
        help='Query best pipelines from catalog'
    )
    query_best_parser.add_argument(
        '--workspace',
        type=str,
        default='workspace',
        help='Workspace root directory (default: workspace)'
    )
    query_best_parser.add_argument(
        '--dataset',
        type=str,
        help='Filter by dataset name'
    )
    query_best_parser.add_argument(
        '--metric',
        type=str,
        default='test_score',
        help='Metric to sort by (default: test_score)'
    )
    query_best_parser.add_argument(
        '-n',
        type=int,
        default=10,
        help='Number of results (default: 10)'
    )
    query_best_parser.add_argument(
        '--ascending',
        action='store_true',
        help='Sort ascending (lower is better)'
    )
    query_best_parser.set_defaults(func=workspace_query_best)

    # workspace filter
    filter_parser = workspace_subparsers.add_parser(
        'filter',
        help='Filter predictions by criteria'
    )
    filter_parser.add_argument(
        '--workspace',
        type=str,
        default='workspace',
        help='Workspace root directory (default: workspace)'
    )
    filter_parser.add_argument(
        '--dataset',
        type=str,
        help='Filter by dataset name'
    )
    filter_parser.add_argument(
        '--test-score',
        type=float,
        help='Minimum test score'
    )
    filter_parser.add_argument(
        '--train-score',
        type=float,
        help='Minimum train score'
    )
    filter_parser.add_argument(
        '--val-score',
        type=float,
        help='Minimum validation score'
    )
    filter_parser.set_defaults(func=workspace_query_filter)

    # workspace stats
    stats_parser = workspace_subparsers.add_parser(
        'stats',
        help='Show catalog statistics'
    )
    stats_parser.add_argument(
        '--workspace',
        type=str,
        default='workspace',
        help='Workspace root directory (default: workspace)'
    )
    stats_parser.add_argument(
        '--metric',
        type=str,
        default='test_score',
        help='Metric for statistics (default: test_score)'
    )
    stats_parser.set_defaults(func=workspace_stats)

    # workspace list-library
    list_library_parser = workspace_subparsers.add_parser(
        'list-library',
        help='List items in library'
    )
    list_library_parser.add_argument(
        '--workspace',
        type=str,
        default='workspace',
        help='Workspace root directory (default: workspace)'
    )
    list_library_parser.set_defaults(func=workspace_list_library)