trycua · sarinali · Mar 2, 2026
diff --git a/.gitignore b/.gitignore
@@ -27,6 +27,10 @@ dist
 *.swp
 *.swo
 
+# Python
+__pycache__
+*.pyc
+
 # OS
 .DS_Store
 Thumbs.db

diff --git a/scripts/analyze-style/README.md b/scripts/analyze-style/README.md
@@ -0,0 +1,108 @@
+# Video Style Analyzer
+
+Extracts visual style from launch videos — color palettes, motion intensity, audio pacing, scene structure — and optionally uses Gemini Vision to synthesize a detailed style guide that a motion designer could use to recreate the look and feel.
+
+## Setup
+
+```bash
+cd scripts/analyze-style
+pip install -r requirements.txt
+```
+
+For Gemini Vision synthesis, set your API key:
+
+```bash
+export GEMINI_API_KEY="your-key-here"
+```
+
+## Usage
+
+### Basic analysis (metrics only)
+
+```bash
+python3 analyze.py /path/to/video.mp4 --output-dir ./output --output-json ./output/results.json
+```
+
+This runs the full metrics pipeline:
+- **Scene detection** — PySceneDetect identifies scene boundaries and extracts keyframes (start/mid/end per scene)
+- **Color analysis** — dominant colors, brightness, saturation, warmth per scene
+- **Audio analysis** — tempo (BPM), beat count, energy curve, spectral features
+- **Motion analysis** — frame-difference scoring per scene, editing pace classification
+
+### Full analysis with style synthesis
+
+```bash
+GEMINI_API_KEY="your-key" python3 analyze.py /path/to/video.mp4 \
+  --output-dir ./output \
+  --output-json ./output/results.json \
+  --synthesize
+```
+
+The `--synthesize` flag sends extracted keyframes to Gemini Vision and produces:
+- **Per-scene descriptions** — layout composition, typography treatment, animation states, iconography style
+- **Transition descriptions** — how scenes connect (hard cuts, dissolves, shared visual anchors)
+- **Style guide** — a comprehensive breakdown covering visual identity, typography, motion language, layout, narrative structure, iconography, audio-visual sync, and overall feel
+
+### Flags
+
+| Flag | Description |
+|------|-------------|
+| `--output-dir DIR` | Where to save extracted keyframes (default: temp directory) |
+| `--output-json PATH` | Write JSON results to file (default: prints to stdout) |
+| `--skip-audio` | Skip librosa audio analysis |
+| `--synthesize` | Run Gemini Vision synthesis (requires `GEMINI_API_KEY`) |
+
+### Run individual modules
+
+Each module can also be run standalone for debugging:
+
+```bash
+python3 scene_detector.py /path/to/video.mp4    # Scene detection + keyframes
+python3 color_analyzer.py /path/to/image.jpg     # Color palette from an image
+python3 audio_analyzer.py /path/to/video.mp4     # Audio features
+python3 motion_analyzer.py /path/to/video.mp4    # Motion scores
+python3 synthesizer.py /path/to/results.json     # Run synthesis on existing results
+```
+
+## Output format
+
+The JSON output follows this structure:
+
+```jsonc
+{
+  "video_path": "...",
+  "video_info": { "fps", "total_frames", "duration", "resolution" },
+  "scenes": [
+    {
+      "scene_number": 1,
+      "start_time": 0.0,
+      "end_time": 10.5,
+      "duration": 10.5,
+      "keyframe_path": "output/scene_0001_mid.jpg",
+      "keyframe_paths": ["..._start.jpg", "..._mid.jpg", "..._end.jpg"],
+      "colors": { "dominant_color", "avg_brightness", "avg_saturation", "warmth", "palette_hex" },
+      "motion_score": 2.03
+    }
+  ],
+  "audio": { "tempo_bpm", "beat_count", "duration", "avg_energy", "energy_description", "pacing_description" },
+  "motion": { "avg_motion_score", "motion_variance", "motion_description", "editing_pace" },
+  "style_summary": { "color_mood", "avg_brightness", "pacing", "motion_level", "scene_count", "avg_scene_duration" },
+  // Only present with --synthesize:
+  "synthesis": {
+    "scene_descriptions": ["..."],
+    "transition_descriptions": ["..."],
+    "style_guide": "..."
+  }
+}
+```
+
+## Architecture
+
+```
+analyze.py          — CLI entry point, orchestrates all modules
+scene_detector.py   — PySceneDetect keyframe extraction (start/mid/end per scene)
+color_analyzer.py   — colorgram.py color palette extraction + warmth/brightness analysis
+audio_analyzer.py   — librosa tempo, energy, spectral feature extraction
+motion_analyzer.py  — OpenCV frame-difference motion scoring
+synthesizer.py      — Gemini Vision scene captioning + style guide synthesis
+```
diff --git a/scripts/analyze-style/analyze.py b/scripts/analyze-style/analyze.py
@@ -0,0 +1,264 @@
+"""Main CLI entry point for video style analysis.
+
+Orchestrates scene detection, color analysis, audio analysis, and motion
+analysis to produce a comprehensive style profile of a video file.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+import tempfile
+from collections import Counter
+from typing import Optional
+
+from scene_detector import detect_scenes
+from color_analyzer import extract_palette, analyze_palette
+from audio_analyzer import analyze_audio
+from motion_analyzer import analyze_motion, get_video_info
+from synthesizer import run_synthesis
+
+
+def _log(msg: str) -> None:
+    """Print a progress message to stderr so stdout stays clean for JSON."""
+    print(msg, file=sys.stderr)
+
+
+def analyze_video(video_path: str, output_dir: str, skip_audio: bool = False) -> dict:
+    """Run the full analysis pipeline on a video file.
+
+    Args:
+        video_path: Path to the input video file.
+        output_dir: Directory to save keyframes and intermediate results.
+        skip_audio: If True, skip audio analysis entirely.
+
+    Returns:
+        A dictionary containing the complete style analysis results.
+    """
+    video_path = os.path.abspath(video_path)
+
+    # ── Video metadata ──────────────────────────────────────────────────
+    _log("Extracting video metadata...")
+    video_info: dict = get_video_info(video_path)
+
+    # ── Scene detection ─────────────────────────────────────────────────
+    _log("Detecting scenes and extracting keyframes...")
+    scenes: list[dict] = detect_scenes(video_path, output_dir)
+    _log(f"  Found {len(scenes)} scene(s).")
+
+    # ── Color analysis per keyframe ─────────────────────────────────────
+    _log("Analyzing color palettes...")
+    for scene in scenes:
+        keyframe_path: str = scene.get("keyframe_path", "")
+        if keyframe_path and os.path.isfile(keyframe_path):
+            palette = extract_palette(keyframe_path)
+            scene["colors"] = analyze_palette(palette)
+        else:
+            scene["colors"] = {
+                "dominant_color": None,
+                "avg_brightness": 0.0,
+                "avg_saturation": 0.0,
+                "warmth": "neutral",
+                "palette_hex": [],
+            }
+
+    # ── Audio analysis ──────────────────────────────────────────────────
+    audio_result: Optional[dict] = None
+    if skip_audio:
+        _log("Skipping audio analysis (--skip-audio).")
+    else:
+        _log("Analyzing audio track...")
+        try:
+            audio_result = analyze_audio(video_path)
+            _log("  Audio analysis complete.")
+        except (FileNotFoundError, RuntimeError) as exc:
+            _log(f"  Audio analysis failed: {exc}")
+            audio_result = None
+
+    # ── Motion analysis ─────────────────────────────────────────────────
+    _log("Analyzing motion...")
+    scene_boundaries: list[tuple[float, float]] = [
+        (s["start_time"], s["end_time"]) for s in scenes
+    ]
+    motion_result: dict = analyze_motion(video_path, scene_boundaries)
+
+    # Attach per-scene motion scores when available.
+    per_scene_scores: list[float] = motion_result.get("scene_motion_scores", [])
+    for i, scene in enumerate(scenes):
+        if i < len(per_scene_scores):
+            scene["motion_score"] = per_scene_scores[i]
+
+    # ── Style summary ───────────────────────────────────────────────────
+    _log("Computing style summary...")
+    style_summary = _compute_style_summary(scenes, audio_result, motion_result)
+
+    # ── Assemble final output ───────────────────────────────────────────
+    # Build a clean audio dict for the output (drop energy_curve for brevity).
+    audio_output: Optional[dict] = None
+    if audio_result is not None:
+        audio_output = {
+            "tempo_bpm": audio_result["tempo_bpm"],
+            "beat_count": audio_result["beat_count"],
+            "duration": audio_result["duration_seconds"],
+            "avg_energy": audio_result["avg_energy"],
+            "energy_description": audio_result["energy_description"],
+            "pacing_description": audio_result["pacing_description"],
+            "spectral_centroid_mean": audio_result["spectral_centroid_mean"],
+        }
+
+    result: dict = {
+        "video_path": video_path,
+        "video_info": video_info,
+        "scenes": [
+            {
+                "scene_number": s["scene_number"],
+                "start_time": s["start_time"],
+                "end_time": s["end_time"],
+                "duration": s["duration"],
+                "keyframe_path": s.get("keyframe_path", ""),
+                "keyframe_paths": s.get("keyframe_paths", []),
+                "colors": s.get("colors", {}),
+                **({"motion_score": s["motion_score"]} if "motion_score" in s else {}),
+            }
+            for s in scenes
+        ],
+        "audio": audio_output,
+        "motion": {
+            "avg_motion_score": motion_result.get("avg_motion_score", 0.0),
+            "motion_variance": motion_result.get("motion_variance", 0.0),
+            "motion_description": motion_result.get("motion_description", "unknown"),
+            "editing_pace": motion_result.get("editing_pace", "unknown"),
+        },
+        "style_summary": style_summary,
+    }
+
+    return result
+
+
+def _compute_style_summary(
+    scenes: list[dict],
+    audio_result: Optional[dict],
+    motion_result: dict,
+) -> dict:
+    """Derive high-level style descriptors from the raw analysis results."""
+    # Color mood: majority vote of per-scene warmth values.
+    warmth_counts: Counter[str] = Counter()
+    brightness_total = 0.0
+    brightness_count = 0
+
+    for scene in scenes:
+        colors = scene.get("colors", {})
+        warmth_counts[colors.get("warmth", "neutral")] += 1
+        bri = colors.get("avg_brightness", 0.0)
+        if bri > 0:
+            brightness_total += bri
+            brightness_count += 1
+
+    color_mood: str = warmth_counts.most_common(1)[0][0] if warmth_counts else "neutral"
+    avg_brightness: float = round(brightness_total / brightness_count, 2) if brightness_count else 0.0
+
+    # Pacing: prefer audio-derived pacing, fall back to editing pace.
+    if audio_result is not None:
+        pacing: str = audio_result.get("pacing_description", "unknown")
+    else:
+        pacing = motion_result.get("editing_pace", "unknown")
+
+    motion_level: str = motion_result.get("motion_description", "unknown")
+
+    scene_count: int = len(scenes)
+    total_duration = sum(s.get("duration", 0.0) for s in scenes)
+    avg_scene_duration: float = round(total_duration / scene_count, 3) if scene_count else 0.0
+
+    return {
+        "color_mood": color_mood,
+        "avg_brightness": avg_brightness,
+        "pacing": pacing,
+        "motion_level": motion_level,
+        "scene_count": scene_count,
+        "avg_scene_duration": avg_scene_duration,
+    }
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Analyze the visual and auditory style of a video file.",
+    )
+    parser.add_argument(
+        "video_path",
+        help="Path to the video file to analyze.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default=None,
+        help="Directory to save keyframes and results. A temp dir is created if omitted.",
+    )
+    parser.add_argument(
+        "--output-json",
+        default=None,
+        help="Path to write the JSON results file. Prints to stdout if omitted.",
+    )
+    parser.add_argument(
+        "--skip-audio",
+        action="store_true",
+        default=False,
+        help="Skip audio analysis.",
+    )
+    parser.add_argument(
+        "--synthesize",
+        action="store_true",
+        default=False,
+        help="Run Gemini Vision synthesis for rich style descriptions (requires GEMINI_API_KEY).",
+    )
+
+    args = parser.parse_args()
+
+    # Resolve output directory.
+    if args.output_dir:
+        output_dir: str = os.path.abspath(args.output_dir)
+    else:
+        output_dir = tempfile.mkdtemp(prefix="video_style_")
+        _log(f"Using temporary output directory: {output_dir}")
+
+    try:
+        result = analyze_video(args.video_path, output_dir, skip_audio=args.skip_audio)
+    except Exception as exc:
+        _log(f"Error: {exc}")
+        sys.exit(1)
+
+    # ── Gemini Vision synthesis (optional) ────────────────────────────
+    if args.synthesize:
+        _log("\n── Running Gemini Vision synthesis ──")
+        try:
+            synthesis = run_synthesis(result)
+            result["synthesis"] = synthesis
+        except RuntimeError as exc:
+            _log(f"Synthesis failed: {exc}")
+            result["synthesis"] = None
+
+    # Write JSON output.
+    json_str = json.dumps(result, indent=2, ensure_ascii=False)
+
+    if args.output_json:
+        output_json_path = os.path.abspath(args.output_json)
+        os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
+        with open(output_json_path, "w", encoding="utf-8") as f:
+            f.write(json_str)
+            f.write("\n")
+        _log(f"Results written to {output_json_path}")
+    else:
+        print(json_str)
+
+    # Final summary to stderr.
+    summary = result.get("style_summary", {})
+    _log(
+        f"Analysis complete. {summary.get('scene_count', 0)} scenes detected. "
+        f"Style: {summary.get('color_mood', 'unknown')}, "
+        f"{summary.get('pacing', 'unknown')} pacing, "
+        f"{summary.get('motion_level', 'unknown')} motion."
+    )
+
+
+if __name__ == "__main__":
+    main()