hidai25 · Nova-OpenClawAgent · Apr 18, 2026
diff --git a/evalview/commands/snapshot_cmd.py b/evalview/commands/snapshot_cmd.py
@@ -1,6 +1,8 @@
 """Snapshot command — run tests and save passing results as baseline."""
 from __future__ import annotations
 
+import json
+import sys
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Dict, List, Optional, TYPE_CHECKING
@@ -189,9 +191,10 @@ def _group_tests_by_target(test_cases: List, config) -> Dict[tuple[str, str], li
 @click.option("--no-judge", "no_judge", is_flag=True, default=False, help="Skip LLM-as-judge evaluation. Uses deterministic scoring only (scores capped at 75). No API key required.")
 @click.option("--timeout", default=30.0, type=float, help="Timeout in seconds per test (default: 30).")
 @click.option("--preview", is_flag=True, help="Show what would change without saving. Dry-run mode for snapshot.")
+@click.option("--json", "json_output", is_flag=True, help="Output JSON for CI (suppresses Rich console output)")
 @track_command("snapshot")
 @click.pass_context
-def snapshot(ctx: click.Context, test_path: str, notes: str, test: str, variant: str, approve_generated: bool, reset: bool, judge_model: Optional[str], no_judge: bool, timeout: float, preview: bool):
+def snapshot(ctx: click.Context, test_path: str, notes: str, test: str, variant: str, approve_generated: bool, reset: bool, judge_model: Optional[str], no_judge: bool, timeout: float, preview: bool, json_output: bool):
     """Run tests and snapshot passing results as baseline.
 
     This is the simple workflow: snapshot → check → fix → snapshot.
@@ -218,7 +221,8 @@ def snapshot(ctx: click.Context, test_path: str, notes: str, test: str, variant:
     from evalview.core.messages import get_random_snapshot_message
     from evalview.skills.ui_utils import print_evalview_banner
 
-    print_evalview_banner(console, subtitle="[dim]Catch agent regressions before you ship[/dim]")
+    if not json_output:
+        print_evalview_banner(console, subtitle="[dim]Catch agent regressions before you ship[/dim]")
 
     # Initialize stores
     from evalview.core.golden import GoldenStore
@@ -236,14 +240,17 @@ def snapshot(ctx: click.Context, test_path: str, notes: str, test: str, variant:
                 import shutil
                 shutil.rmtree(golden_dir)
                 golden_dir.mkdir(parents=True, exist_ok=True)
-            console.print(f"[yellow]Cleared {len(existing)} existing baseline(s).[/yellow]\n")
+            if not json_output:
+                console.print(f"[yellow]Cleared {len(existing)} existing baseline(s).[/yellow]\n")
         else:
-            console.print("[dim]No existing baselines to clear.[/dim]\n")
+            if not json_output:
+                console.print("[dim]No existing baselines to clear.[/dim]\n")
 
     # Check if this is the first snapshot ever
     is_first = state_store.is_first_snapshot()
 
-    console.print(f"\n[cyan]▶ {get_random_snapshot_message()}[/cyan]\n")
+    if not json_output:
+        console.print(f"\n[cyan]▶ {get_random_snapshot_message()}[/cyan]\n")
 
     # Load test cases
     loader = TestCaseLoader()
@@ -255,13 +262,19 @@ def snapshot(ctx: click.Context, test_path: str, notes: str, test: str, variant:
         return
 
     if not test_cases:
+        if json_output:
+            print(json.dumps({"error": "no_tests_found", "test_path": test_path}))
+            sys.exit(1)
         Celebrations.no_tests_found()
         return
 
     # Filter to specific test if requested
     if test:
         test_cases = [tc for tc in test_cases if tc.name == test]
         if not test_cases:
+            if json_output:
+                print(json.dumps({"error": "test_not_found", "test": test}))
+                sys.exit(1)
             console.print(f"[red]❌ No test found with name: {test}[/red]\n")
             return
 
@@ -358,6 +371,24 @@ def snapshot(ctx: click.Context, test_path: str, notes: str, test: str, variant:
     # Save passing results as golden
     saved_count = _save_snapshot_results(results, notes, variant=variant)
 
+    if json_output:
+        saved_names = [r.test_case for r in results if r.passed]
+        golden_dir = GoldenStore().golden_dir
+        golden_paths = [
+            str(golden_dir / f"{name}.yaml") for name in saved_names
+        ]
+        print(json.dumps({
+            "snapshotted": saved_count,
+            "total": len(test_cases),
+            "failed": len(test_cases) - saved_count,
+            "test_names": saved_names,
+            "golden_paths": golden_paths,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "notes": notes,
+            "variant": variant,
+        }))
+        sys.exit(0 if saved_count > 0 else 1)
+
     if saved_count == 0:
         if draft_generated and approve_generated:
             console.print(