Source code for nirs4all.cli.commands.workspace

"""
Workspace management CLI commands for nirs4all.

Provides commands for workspace initialization, run management, catalog queries,
and library operations.
"""

import argparse
import sys
from pathlib import Path
from typing import Optional

from nirs4all.core.logging import get_logger

logger = get_logger(__name__)


[docs] def workspace_init(args): """Initialize a new workspace.""" from nirs4all.workspace import WorkspaceManager workspace_path = Path(args.path) ws = WorkspaceManager(workspace_path) ws.initialize_workspace() logger.success(f"Workspace initialized at: {workspace_path}") logger.info(" Created directories:") logger.info(" - runs/") logger.info(" - exports/full_pipelines/") logger.info(" - exports/best_predictions/") logger.info(" - library/templates/") logger.info(" - library/trained/filtered/") logger.info(" - library/trained/pipeline/") logger.info(" - library/trained/fullrun/") logger.info(" - catalog/")
[docs] def workspace_list_runs(args): """List all runs in workspace.""" from nirs4all.workspace import WorkspaceManager workspace_path = Path(args.workspace) ws = WorkspaceManager(workspace_path) runs = ws.list_runs() if not runs: logger.info("No runs found in workspace.") return logger.info(f"Found {len(runs)} run(s):\n") for run_info in runs: logger.info(f" {run_info['name']}") logger.info(f" Dataset: {run_info['dataset']}") logger.info(f" Date: {run_info['date']}") if run_info.get('custom_name'): logger.info(f" Custom name: {run_info['custom_name']}") logger.info("")
[docs] def workspace_query_best(args): """Query best pipelines from catalog.""" from nirs4all.data.predictions import Predictions workspace_path = Path(args.workspace) catalog_dir = workspace_path / "catalog" if not catalog_dir.exists(): logger.error(f"Catalog not found at {catalog_dir}") logger.info("Run pipelines and archive predictions first.") sys.exit(1) meta_file = catalog_dir / "predictions_meta.parquet" if not meta_file.exists(): logger.error("No predictions in catalog.") logger.info("Archive pipeline predictions using Predictions.archive_to_catalog()") sys.exit(1) # Load predictions from catalog try: preds = Predictions.load_from_parquet(catalog_dir) logger.success(f"Loaded {preds._df.height} predictions from catalog\n") except Exception as e: logger.error(f"Error loading catalog: {e}") sys.exit(1) # Query best best = preds.query_best( dataset_name=args.dataset, metric=args.metric, n=args.n, ascending=args.ascending ) if best.height == 0: logger.info("No predictions found matching criteria.") return # Display results logger.info(f"Top {args.n} pipelines by {args.metric}:") logger.info(f"{'='*80}\n") # Convert to pandas for nice display df = best.to_pandas() logger.info(df.to_string(index=False))
[docs] def workspace_query_filter(args): """Filter predictions by criteria.""" from nirs4all.data.predictions import Predictions workspace_path = Path(args.workspace) catalog_dir = workspace_path / "catalog" if not catalog_dir.exists(): logger.error(f"Catalog not found at {catalog_dir}") sys.exit(1) # Load predictions preds = Predictions.load_from_parquet(catalog_dir) # Build metric thresholds thresholds = {} if args.test_score: thresholds['test_score'] = args.test_score if args.train_score: thresholds['train_score'] = args.train_score if args.val_score: thresholds['val_score'] = args.val_score # Apply filters filtered = preds.filter_by_criteria( dataset_name=args.dataset, metric_thresholds=thresholds if thresholds else None ) logger.info(f"Found {filtered.height} predictions matching criteria\n") if filtered.height > 0: df = filtered.to_pandas() logger.info(df.to_string(index=False))
[docs] def workspace_stats(args): """Show catalog statistics.""" from nirs4all.data.predictions import Predictions workspace_path = Path(args.workspace) catalog_dir = workspace_path / "catalog" if not catalog_dir.exists(): logger.error(f"Catalog not found at {catalog_dir}") sys.exit(1) # Load predictions preds = Predictions.load_from_parquet(catalog_dir) logger.info("Catalog Statistics") logger.info(f"{'='*60}\n") logger.info(f"Total predictions: {preds._df.height}") # Datasets if 'dataset_name' in preds._df.columns: datasets = preds._df['dataset_name'].unique().to_list() logger.info(f"Datasets: {len(datasets)}") for ds in datasets: count = preds._df.filter(preds._df['dataset_name'] == ds).height logger.info(f" - {ds}: {count} predictions") logger.info("") # Metric statistics metric = args.metric if metric in preds._df.columns: stats = preds.get_summary_stats(metric=metric) logger.info(f"{metric} statistics:") logger.info(f" Min: {stats['min']:.4f}") logger.info(f" Max: {stats['max']:.4f}") logger.info(f" Mean: {stats['mean']:.4f}") logger.info(f" Median: {stats['median']:.4f}") logger.info(f" Std: {stats['std']:.4f}")
[docs] def workspace_list_library(args): """List items in library.""" from nirs4all.workspace import LibraryManager workspace_path = Path(args.workspace) library_dir = workspace_path / "library" if not library_dir.exists(): logger.error(f"Library not found at {library_dir}") sys.exit(1) library = LibraryManager(library_dir) # List templates templates = library.list_templates() logger.info(f"Templates: {len(templates)}") for t in templates: logger.info(f" - {t['name']}: {t.get('description', 'No description')}") logger.info("") # List filtered filtered = library.list_filtered() logger.info(f"Filtered pipelines: {len(filtered)}") for f in filtered: logger.info(f" - {f['name']}: {f.get('description', 'No description')}") logger.info("") # List full pipelines pipelines = library.list_pipelines() logger.info(f"Full pipelines: {len(pipelines)}") for p in pipelines: logger.info(f" - {p['name']}: {p.get('description', 'No description')}") logger.info("") # List full runs fullruns = library.list_fullruns() logger.info(f"Full runs: {len(fullruns)}") for r in fullruns: logger.info(f" - {r['name']}: {r.get('description', 'No description')}")
[docs] def add_workspace_commands(subparsers): """Add workspace commands to CLI.""" # Workspace command group workspace = subparsers.add_parser( 'workspace', help='Workspace management commands' ) workspace_subparsers = workspace.add_subparsers(dest='workspace_command') # workspace init init_parser = workspace_subparsers.add_parser( 'init', help='Initialize a new workspace' ) init_parser.add_argument( 'path', type=str, help='Path to workspace directory' ) init_parser.set_defaults(func=workspace_init) # workspace list-runs list_runs_parser = workspace_subparsers.add_parser( 'list-runs', help='List all runs in workspace' ) list_runs_parser.add_argument( '--workspace', type=str, default='workspace', help='Workspace root directory (default: workspace)' ) list_runs_parser.set_defaults(func=workspace_list_runs) # workspace query-best query_best_parser = workspace_subparsers.add_parser( 'query-best', help='Query best pipelines from catalog' ) query_best_parser.add_argument( '--workspace', type=str, default='workspace', help='Workspace root directory (default: workspace)' ) query_best_parser.add_argument( '--dataset', type=str, help='Filter by dataset name' ) query_best_parser.add_argument( '--metric', type=str, default='test_score', help='Metric to sort by (default: test_score)' ) query_best_parser.add_argument( '-n', type=int, default=10, help='Number of results (default: 10)' ) query_best_parser.add_argument( '--ascending', action='store_true', help='Sort ascending (lower is better)' ) query_best_parser.set_defaults(func=workspace_query_best) # workspace filter filter_parser = workspace_subparsers.add_parser( 'filter', help='Filter predictions by criteria' ) filter_parser.add_argument( '--workspace', type=str, default='workspace', help='Workspace root directory (default: workspace)' ) filter_parser.add_argument( '--dataset', type=str, help='Filter by dataset name' ) filter_parser.add_argument( '--test-score', type=float, help='Minimum test score' ) filter_parser.add_argument( '--train-score', type=float, help='Minimum train score' ) filter_parser.add_argument( '--val-score', type=float, help='Minimum validation score' ) filter_parser.set_defaults(func=workspace_query_filter) # workspace stats stats_parser = workspace_subparsers.add_parser( 'stats', help='Show catalog statistics' ) stats_parser.add_argument( '--workspace', type=str, default='workspace', help='Workspace root directory (default: workspace)' ) stats_parser.add_argument( '--metric', type=str, default='test_score', help='Metric for statistics (default: test_score)' ) stats_parser.set_defaults(func=workspace_stats) # workspace list-library list_library_parser = workspace_subparsers.add_parser( 'list-library', help='List items in library' ) list_library_parser.add_argument( '--workspace', type=str, default='workspace', help='Workspace root directory (default: workspace)' ) list_library_parser.set_defaults(func=workspace_list_library)