diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 35ba34d4..51afba6a 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -42,6 +42,7 @@ concurrency:
 env:
   CARGO_TERM_COLOR: always
   RUST_BACKTRACE: 1
+  UV_VERSION: "0.11.8"
   BENCHMARK_TIMEOUT: 1800 # 30 min; pre-computed seeds + reduced 5D counts keep runtime well under this
   DELAUNAY_BENCH_DISCOVER_SEEDS_LIMIT: 256 # fallback only; ci_performance_suite uses pre-computed seeds
 
@@ -64,7 +65,7 @@ jobs:
       - name: Install uv (Python package manager)
         uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
         with:
-          version: "latest"
+          version: ${{ env.UV_VERSION }}
 
       - name: Verify uv installation
         run: uv --version
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6e50c50a..241f1f2d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -25,7 +25,7 @@ env:
   MARKDOWNLINT_VERSION: "0.47.0"
   SHFMT_VERSION: "3.12.0"
   TYPOS_VERSION: "1.43.4"
-  UV_VERSION: "0.9.21"
+  UV_VERSION: "0.11.8"
 
 jobs:
   build:
diff --git a/.github/workflows/generate-baseline.yml b/.github/workflows/generate-baseline.yml
index a2884b2f..7b49d2bc 100644
--- a/.github/workflows/generate-baseline.yml
+++ b/.github/workflows/generate-baseline.yml
@@ -25,6 +25,7 @@ permissions:
 env:
   CARGO_TERM_COLOR: always
   RUST_BACKTRACE: 1
+  UV_VERSION: "0.11.8"
   # Seed search limit for both old (pre-v0.8) and current env var names.
   # Old tags read DELAUNAY_BENCH_SEED_SEARCH_LIMIT; current code reads
   # DELAUNAY_BENCH_DISCOVER_SEEDS_LIMIT. Setting both ensures backward
@@ -54,7 +55,7 @@ jobs:
       - name: Install uv (Python package manager)
         uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
         with:
-          version: "latest"
+          version: ${{ env.UV_VERSION }}
 
       - name: Verify uv installation
         run: uv --version
diff --git a/.github/workflows/profiling-benchmarks.yml b/.github/workflows/profiling-benchmarks.yml
index e4291800..7103c22b 100644
--- a/.github/workflows/profiling-benchmarks.yml
+++ b/.github/workflows/profiling-benchmarks.yml
@@ -41,7 +41,6 @@ permissions:
 env:
   CARGO_TERM_COLOR: always
   RUST_BACKTRACE: 1
-  RUST_TOOLCHAIN: 1.92.0
 
 jobs:
   comprehensive-profiling:
@@ -56,7 +55,6 @@ jobs:
       - name: Install Rust toolchain
         uses: actions-rust-lang/setup-rust-toolchain@2b1f5e9b395427c92ee4e3331786ca3c37afe2d7 # v1.16.0
         with:
-          toolchain: ${{ env.RUST_TOOLCHAIN }}
           cache: false
           rustflags: ""
 
@@ -112,6 +110,11 @@ jobs:
             } >> "$GITHUB_ENV"
           fi
 
+      - name: Capture profiling environment metadata
+        env:
+          BENCH_FILTER_VALUE: ${{ github.event.inputs.benchmark_filter || '' }}
+        run: ./scripts/ci/capture_profiling_metadata.sh
+
       - name: Build profiling suite
         run: |
           # Build with the same perf profile used by `cargo bench --profile perf`
@@ -197,6 +200,7 @@ jobs:
 
           - \`profiling_output.log\`: Complete benchmark output
           - \`memory_profiling_detailed.log\`: Detailed memory allocation analysis
+          - \`environment_metadata.md\`: Code ref, compiler, profile, and filter metadata
           - \`criterion/\`: HTML reports and detailed timing data
 
           EOF
@@ -253,7 +257,6 @@ jobs:
       - name: Install Rust toolchain
         uses: actions-rust-lang/setup-rust-toolchain@2b1f5e9b395427c92ee4e3331786ca3c37afe2d7 # v1.16.0
         with:
-          toolchain: ${{ env.RUST_TOOLCHAIN }}
           cache: false
           rustflags: ""
 
@@ -273,6 +276,13 @@ jobs:
           echo "Running allocation API tests..."
           cargo test --test allocation_api --features count-allocations --verbose
 
+      - name: Capture memory profiling environment metadata
+        env:
+          PROFILE_METADATA_TITLE: Memory Profiling Environment
+          PROFILE_METADATA_FILTER: memory_profiling
+          PROFILE_METADATA_MODE: development
+        run: ./scripts/ci/capture_profiling_metadata.sh
+
       - name: Run memory scaling benchmarks
         env:
           PROFILING_DEV_MODE: "1"
@@ -292,5 +302,6 @@ jobs:
         with:
           name: memory-stress-results-${{ github.run_number }}
           path: |
+            profiling-results/
             target/criterion/
           retention-days: 14
diff --git a/Cargo.toml b/Cargo.toml
index 20fd7cd5..1f6dff1c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -67,11 +67,6 @@ name = "circumsphere_containment"
 path = "benches/circumsphere_containment.rs"
 harness = false
 
-[[bench]]
-name = "microbenchmarks"
-path = "benches/microbenchmarks.rs"
-harness = false
-
 [[bench]]
 name = "topology_guarantee_construction"
 path = "benches/topology_guarantee_construction.rs"
diff --git a/benches/PERFORMANCE_RESULTS.md b/benches/PERFORMANCE_RESULTS.md
index 068f335a..b9f19eff 100644
--- a/benches/PERFORMANCE_RESULTS.md
+++ b/benches/PERFORMANCE_RESULTS.md
@@ -3,9 +3,9 @@
 This file contains performance benchmarks and analysis for the delaunay library.
 The results are automatically generated and updated by the benchmark infrastructure.
 
-**Last Updated**: 2026-04-25 15:39:16 UTC
+**Last Updated**: 2026-04-27 19:30:43 UTC
 **Generated By**: benchmark_utils.py
-**Git Commit**: 7e42be8fba9abe571d0137710fbd7ed0151ebc85
+**Git Commit**: 5f3e02917d813463716f7e2f009d6096d89148da
 **Hardware**: Apple M4 Max (16 cores)
 **Memory**: 64.0 GB
 **OS**: macOS
@@ -13,7 +13,113 @@ The results are automatically generated and updated by the benchmark infrastruct
 
 ## Performance Results Summary
 
-### Circumsphere Performance Results
+### Public API Performance Contract (`ci_performance_suite`)
+
+This suite is the versioned benchmark contract for public Delaunay workflows.
+It covers construction, hull extraction, validation, incremental insertion,
+boundary traversal, and explicit bistellar flip roundtrips.
+
+#### Construction
+
+Public API: `DelaunayTriangulation::new_with_options`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `tds_new_2d/tds_new/10` | 2D | 10 | well-conditioned | 143.4 µs | 143.1 µs - 143.7 µs |
+| `tds_new_2d/tds_new_adversarial/10` | 2D | 10 | adversarial | 336.1 µs | 334.7 µs - 337.6 µs |
+| `tds_new_2d/tds_new/25` | 2D | 25 | well-conditioned | 904.6 µs | 902.6 µs - 906.8 µs |
+| `tds_new_2d/tds_new_adversarial/25` | 2D | 25 | adversarial | 3.557 ms | 3.526 ms - 3.586 ms |
+| `tds_new_2d/tds_new/50` | 2D | 50 | well-conditioned | 3.055 ms | 3.046 ms - 3.065 ms |
+| `tds_new_2d/tds_new_adversarial/50` | 2D | 50 | adversarial | 16.089 ms | 16.055 ms - 16.121 ms |
+| `tds_new_3d/tds_new/10` | 3D | 10 | well-conditioned | 1.004 ms | 999.9 µs - 1.009 ms |
+| `tds_new_3d/tds_new_adversarial/10` | 3D | 10 | adversarial | 2.876 ms | 2.868 ms - 2.884 ms |
+| `tds_new_3d/tds_new/25` | 3D | 25 | well-conditioned | 14.925 ms | 14.882 ms - 14.969 ms |
+| `tds_new_3d/tds_new_adversarial/25` | 3D | 25 | adversarial | 33.642 ms | 33.512 ms - 33.773 ms |
+| `tds_new_3d/tds_new/50` | 3D | 50 | well-conditioned | 74.230 ms | 73.980 ms - 74.482 ms |
+| `tds_new_3d/tds_new_adversarial/50` | 3D | 50 | adversarial | 167.721 ms | 166.922 ms - 168.499 ms |
+| `tds_new_4d/tds_new/10` | 4D | 10 | well-conditioned | 12.852 ms | 12.774 ms - 12.936 ms |
+| `tds_new_4d/tds_new_adversarial/10` | 4D | 10 | adversarial | 9.161 ms | 9.115 ms - 9.206 ms |
+| `tds_new_4d/tds_new/25` | 4D | 25 | well-conditioned | 287.991 ms | 286.462 ms - 289.393 ms |
+| `tds_new_4d/tds_new_adversarial/25` | 4D | 25 | adversarial | 231.443 ms | 230.582 ms - 232.428 ms |
+| `tds_new_4d/tds_new/50` | 4D | 50 | well-conditioned | 1.632 s | 1.624 s - 1.645 s |
+| `tds_new_4d/tds_new_adversarial/50` | 4D | 50 | adversarial | 1.283 s | 1.280 s - 1.286 s |
+| `tds_new_5d/tds_new/10` | 5D | 10 | well-conditioned | 24.993 ms | 24.906 ms - 25.072 ms |
+| `tds_new_5d/tds_new_adversarial/10` | 5D | 10 | adversarial | 27.704 ms | 27.550 ms - 27.834 ms |
+| `tds_new_5d/tds_new/25` | 5D | 25 | well-conditioned | 1.461 s | 1.457 s - 1.466 s |
+| `tds_new_5d/tds_new_adversarial/25` | 5D | 25 | adversarial | 1.353 s | 1.350 s - 1.357 s |
+
+#### Boundary facets
+
+Public API: `DelaunayTriangulation::boundary_facets`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `boundary_facets/boundary_facets_2d/50` | 2D | 50 | well-conditioned | 15.9 µs | 15.9 µs - 15.9 µs |
+| `boundary_facets/boundary_facets_2d_adversarial/50` | 2D | 50 | adversarial | 16.4 µs | 16.3 µs - 16.4 µs |
+| `boundary_facets/boundary_facets_3d/50` | 3D | 50 | well-conditioned | 66.2 µs | 65.8 µs - 66.5 µs |
+| `boundary_facets/boundary_facets_3d_adversarial/50` | 3D | 50 | adversarial | 65.4 µs | 65.1 µs - 65.8 µs |
+| `boundary_facets/boundary_facets_4d/50` | 4D | 50 | well-conditioned | 270.1 µs | 267.8 µs - 272.3 µs |
+| `boundary_facets/boundary_facets_4d_adversarial/50` | 4D | 50 | adversarial | 255.7 µs | 253.8 µs - 257.6 µs |
+| `boundary_facets/boundary_facets_5d/25` | 5D | 25 | well-conditioned | 245.5 µs | 242.4 µs - 248.5 µs |
+| `boundary_facets/boundary_facets_5d_adversarial/25` | 5D | 25 | adversarial | 233.8 µs | 231.4 µs - 236.3 µs |
+
+#### Convex hull
+
+Public API: `ConvexHull::from_triangulation`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `convex_hull/from_triangulation_2d/50` | 2D | 50 | well-conditioned | 16.0 µs | 16.0 µs - 16.1 µs |
+| `convex_hull/from_triangulation_2d_adversarial/50` | 2D | 50 | adversarial | 16.5 µs | 16.5 µs - 16.6 µs |
+| `convex_hull/from_triangulation_3d/50` | 3D | 50 | well-conditioned | 66.3 µs | 66.0 µs - 66.6 µs |
+| `convex_hull/from_triangulation_3d_adversarial/50` | 3D | 50 | adversarial | 66.3 µs | 66.0 µs - 66.5 µs |
+| `convex_hull/from_triangulation_4d/50` | 4D | 50 | well-conditioned | 271.7 µs | 270.0 µs - 273.3 µs |
+| `convex_hull/from_triangulation_4d_adversarial/50` | 4D | 50 | adversarial | 256.6 µs | 254.9 µs - 258.4 µs |
+| `convex_hull/from_triangulation_5d/25` | 5D | 25 | well-conditioned | 247.4 µs | 245.4 µs - 249.2 µs |
+| `convex_hull/from_triangulation_5d_adversarial/25` | 5D | 25 | adversarial | 229.6 µs | 227.0 µs - 232.3 µs |
+
+#### Validation
+
+Public API: `DelaunayTriangulation::validate`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `validation/validate_3d/50` | 3D | 50 | well-conditioned | 1.071 ms | 1.057 ms - 1.088 ms |
+| `validation/validate_3d_adversarial/50` | 3D | 50 | adversarial | 1.652 ms | 1.643 ms - 1.662 ms |
+| `validation/validate_4d/50` | 4D | 50 | well-conditioned | 43.553 ms | 43.383 ms - 43.729 ms |
+| `validation/validate_4d_adversarial/50` | 4D | 50 | adversarial | 39.152 ms | 38.994 ms - 39.326 ms |
+| `validation/validate_5d/25` | 5D | 25 | well-conditioned | 78.675 ms | 78.339 ms - 78.994 ms |
+| `validation/validate_5d_adversarial/25` | 5D | 25 | adversarial | 72.246 ms | 71.893 ms - 72.631 ms |
+
+#### Incremental insert
+
+Public API: `DelaunayTriangulation::insert`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `incremental_insert/insert_2d/10` | 2D | 10 | well-conditioned | 1.098 ms | 1.095 ms - 1.102 ms |
+| `incremental_insert/insert_2d_adversarial/10` | 2D | 10 | adversarial | 2.071 ms | 2.067 ms - 2.075 ms |
+| `incremental_insert/insert_3d/10` | 3D | 10 | well-conditioned | 5.988 ms | 5.960 ms - 6.018 ms |
+| `incremental_insert/insert_3d_adversarial/10` | 3D | 10 | adversarial | 48.951 ms | 48.658 ms - 49.245 ms |
+| `incremental_insert/insert_4d/6` | 4D | 6 | well-conditioned | 259.223 ms | 258.041 ms - 260.310 ms |
+| `incremental_insert/insert_4d_adversarial/6` | 4D | 6 | adversarial | 431.328 ms | 429.736 ms - 433.006 ms |
+| `incremental_insert/insert_5d/4` | 5D | 4 | well-conditioned | 930.065 ms | 927.662 ms - 932.270 ms |
+| `incremental_insert/insert_5d_adversarial/4` | 5D | 4 | adversarial | 445.154 ms | 443.820 ms - 446.406 ms |
+
+#### Bistellar flips
+
+Public API: `BistellarFlips`
+
+| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |
+|--------------|-----------|-------|---------|------|--------|
+| `bistellar_flips_4d/k1_roundtrip` | 4D | roundtrip | well-conditioned | 38.0 µs | 37.8 µs - 38.2 µs |
+| `bistellar_flips_4d/k2_roundtrip` | 4D | roundtrip | well-conditioned | 40.6 µs | 40.4 µs - 40.8 µs |
+| `bistellar_flips_4d/k3_roundtrip` | 4D | roundtrip | well-conditioned | 40.1 µs | 40.0 µs - 40.3 µs |
+
+### Circumsphere Predicate Performance
+
+This focused predicate suite tracks `la-stack`-backed circumsphere and
+insphere query performance independently from full triangulation workflows.
 
 #### Version 0.7.6 Results (2026-04-25)
 
@@ -21,33 +127,33 @@ The results are automatically generated and updated by the benchmark infrastruct
 
 | Test Case | insphere | insphere_distance | insphere_lifted | Winner |
 |-----------|----------|------------------|-----------------|---------|
-| Basic 2D | 15 ns | 25 ns | 7 ns | **insphere_lifted** |
-| Boundary vertex | 2 ns | 24 ns | 196 ns | **insphere** |
-| Far vertex | 15 ns | 25 ns | 7 ns | **insphere_lifted** |
+| Basic 2D | 15 ns | 26 ns | 7 ns | **insphere_lifted** |
+| Boundary vertex | 2 ns | 25 ns | 260 ns | **insphere** |
+| Far vertex | 15 ns | 24 ns | 8 ns | **insphere_lifted** |
 
 #### Single Query Performance (3D)
 
 | Test Case | insphere | insphere_distance | insphere_lifted | Winner |
 |-----------|----------|------------------|-----------------|---------|
-| Basic 3D | 2.1 µs | 25 ns | 17 ns | **insphere_lifted** |
-| Boundary vertex | 2 ns | 26 ns | 432 ns | **insphere** |
-| Far vertex | 2.1 µs | 26 ns | 17 ns | **insphere_lifted** |
+| Basic 3D | 2.8 µs | 26 ns | 18 ns | **insphere_lifted** |
+| Boundary vertex | 2 ns | 26 ns | 563 ns | **insphere** |
+| Far vertex | 2.8 µs | 26 ns | 17 ns | **insphere_lifted** |
 
 #### Single Query Performance (4D)
 
 | Test Case | insphere | insphere_distance | insphere_lifted | Winner |
 |-----------|----------|------------------|-----------------|---------|
-| Basic 4D | 5.1 µs | 53 ns | 2.9 µs | **insphere_distance** |
-| Boundary vertex | 2 ns | 60 ns | 1.5 µs | **insphere** |
-| Far vertex | 3.2 µs | 53 ns | 1.8 µs | **insphere_distance** |
+| Basic 4D | 6.7 µs | 56 ns | 3.7 µs | **insphere_distance** |
+| Boundary vertex | 2 ns | 57 ns | 1.9 µs | **insphere** |
+| Far vertex | 4.4 µs | 54 ns | 2.5 µs | **insphere_distance** |
 
 #### Single Query Performance (5D)
 
 | Test Case | insphere | insphere_distance | insphere_lifted | Winner |
 |-----------|----------|------------------|-----------------|---------|
-| Basic 5D | 8.3 µs | 80 ns | 4.8 µs | **insphere_distance** |
-| Boundary vertex | 2 ns | 81 ns | 2.3 µs | **insphere** |
-| Far vertex | 4.9 µs | 79 ns | 2.8 µs | **insphere_distance** |
+| Basic 5D | 10.4 µs | 82 ns | 6.0 µs | **insphere_distance** |
+| Boundary vertex | 2 ns | 82 ns | 2.9 µs | **insphere** |
+| Far vertex | 6.3 µs | 81 ns | 3.8 µs | **insphere_distance** |
 
 ## Triangulation Data Structure Performance
 
@@ -88,13 +194,13 @@ The results are automatically generated and updated by the benchmark infrastruct
 | 10 | 27.463 ms | 0.364 Kelem/s | 1.0x |
 | 25 | 5956.682 ms | 0.004 Kelem/s | 216.9x |
 
-## Key Findings
+## Circumsphere Predicate Analysis
 
 ### Performance Ranking
 
 1. **insphere_distance** - (best in 4D, 5D) - Best average performance
-2. **insphere_lifted** - (best in 2D, 3D) - ~33.6x average vs fastest
-3. **insphere** - ~70.4x slower than fastest on average
+2. **insphere_lifted** - (best in 2D, 3D) - ~42.6x average vs fastest
+3. **insphere** - ~89.2x slower than fastest on average
 
 ### Numerical Accuracy Analysis
 
@@ -105,19 +211,19 @@ Based on random test cases:
 - **insphere_distance vs insphere_lifted**: 100.0% agreement
 - **All three methods agree**: 100.0% (expected due to different numerical approaches)
 
-## Recommendations
+### Recommendations
 
-### Method Selection Guide
+#### Method Selection Guide
 
 **All three methods are mathematically correct** (they produce valid insphere test results).
 Choose based on your specific requirements:
 
-#### Performance Optimization by Dimension
+##### Performance Optimization by Dimension
 
 - **`insphere_distance`**: (best in 4D, 5D) - Best average performance
-- **`insphere_lifted`**: (best in 2D, 3D) - ~33.6x average vs fastest
+- **`insphere_lifted`**: (best in 2D, 3D) - ~42.6x average vs fastest
 
-#### General Recommendations
+##### General Recommendations
 
 **For maximum performance**: Choose the method that performs best in your target dimension (see above)
 
@@ -127,20 +233,20 @@ and uses the standard determinant-based approach with well-understood numerical
 **For algorithm transparency**: `insphere_distance` explicitly calculates the circumcenter,
 making it excellent for educational purposes, debugging, and algorithm validation
 
-#### Performance Comparison
+##### Performance Comparison
 
 Average performance across all non-boundary test cases:
 
-- `insphere_distance`: 46 ns (best in 4D, 5D)
-- `insphere_lifted`: 1.5 µs (best in 2D, 3D)
-- `insphere`: 3.2 µs (third fastest)
+- `insphere_distance`: 47 ns (best in 4D, 5D)
+- `insphere_lifted`: 2.0 µs (best in 2D, 3D)
+- `insphere`: 4.2 µs (third fastest)
 
-## Conclusion
+### Conclusion
 
 All three methods are mathematically correct and produce valid results. Performance characteristics vary by dimension:
 
 - `insphere_distance` (best in 4D, 5D) - Best average performance
-- `insphere_lifted` (best in 2D, 3D) - ~33.6x average vs fastest
+- `insphere_lifted` (best in 2D, 3D) - ~42.6x average vs fastest
 
 For general-purpose applications, choose based on your primary use case:
 
@@ -188,6 +294,11 @@ The disagreements between methods are expected due to:
 
 ## Benchmark Structure
 
+The `ci_performance_suite.rs` benchmark is the primary regression and
+release-summary suite. It emits a versioned `api_benchmark_manifest` and
+covers public construction, hull, validation, insertion, boundary, and
+bistellar-flip workflows across supported dimensions.
+
 The `circumsphere_containment.rs` benchmark includes:
 
 - **Random queries**: Batch processing performance with 1000 random test points
@@ -203,7 +314,7 @@ This file is automatically generated from benchmark results. To update:
 # Generate performance summary with current data
 uv run benchmark-utils generate-summary
 
-# Run fresh perf-profile benchmarks and generate summary (includes numerical accuracy)
+# Run fresh perf-profile public API and circumsphere benchmarks
 uv run benchmark-utils generate-summary --run-benchmarks --profile perf
 
 # Generate baseline results for regression testing
diff --git a/benches/README.md b/benches/README.md
index 93992d41..fde5e694 100644
--- a/benches/README.md
+++ b/benches/README.md
@@ -12,10 +12,9 @@ This directory contains performance benchmarks for the delaunay library, organiz
 
 | Benchmark | Purpose | Scale | Runtime | Used By |
 |-----------|---------|-------|---------|----------|
-| `ci_performance_suite.rs` | **CI regression detection** | 10–50 vertices | ~5-10 min | CI workflows, baseline generation |
-| `circumsphere_containment.rs` | Algorithm comparison | Random queries | ~5 min | Performance summary generation |
+| `ci_performance_suite.rs` | **CI regression detection** | 10–50 vertices | ~5-10 min | CI workflows, baseline generation, performance summary |
+| `circumsphere_containment.rs` | Predicate algorithm comparison | Random queries | ~5 min | Performance summary predicate subsection |
 | `large_scale_performance.rs` | **Phase 4 SlotMap evaluation** | 1k–10k vertices | ~10-30 min (default); ~2-3 hours (BENCH_LARGE_SCALE=1) | Manual |
-| `microbenchmarks.rs` | Core operations | Various | ~10 min | Manual |
 | `profiling_suite.rs` | Comprehensive profiling | 10³–10⁶ vertices | 1-2 hours | Monthly profiling, manual |
 | `topology_guarantee_construction.rs` | Topology guarantee construction overhead | 2D–5D (small/medium point counts) | ~5–15 min | Manual |
 | ~~`triangulation_creation.rs`~~ | ~~Simple construction~~ | ~~1000 vertices~~ | ~~N/A~~ | **DEPRECATED / REMOVED** |
@@ -25,11 +24,12 @@ This directory contains performance benchmarks for the delaunay library, organiz
 | Use Case | Benchmark | Command |
 |----------|-----------|----------|
 | CI regression check | `ci_performance_suite.rs` | `just bench-ci` or `cargo bench --profile perf --bench ci_performance_suite` |
-| Release performance summary | `circumsphere_containment.rs` | `just bench-perf-summary` |
+| Release performance summary | `ci_performance_suite.rs` + `circumsphere_containment.rs` | `just bench-perf-summary` |
 | Smoke-test benchmark harnesses | Workspace benches | `just bench-smoke` |
 | Phase 4 SlotMap evaluation | `large_scale_performance.rs` | `cargo bench --profile perf --bench large_scale_performance` |
 | Deep profiling (1-2 hours) | `profiling_suite.rs` | `cargo bench --profile perf --bench profiling_suite` |
 | Memory analysis | `profiling_suite.rs` (memory groups) | `cargo bench --profile perf --bench profiling_suite -- memory_profiling` |
+| Validation layer diagnostics | `profiling_suite.rs` (validation components) | `cargo bench --profile perf --bench profiling_suite -- validation_components` |
 | Algorithm comparison | `circumsphere_containment.rs` | `cargo bench --profile perf --bench circumsphere_containment` |
 | Topology guarantee overhead | `topology_guarantee_construction.rs` | See section below |
 
@@ -73,7 +73,7 @@ numbers. Do not treat `bench-smoke` output as performance data.
 cargo bench --profile perf --bench ci_performance_suite
 ```
 
-The CI Performance Suite is the primary benchmarking suite used for automated performance-regression testing:
+The CI Performance Suite is the primary benchmarking suite used for automated performance-regression testing and generated performance summaries:
 
 - **Purpose**: Fast performance regression detection for regular CI/CD
 - **Dimensions**: 2D–5D triangulations
@@ -102,7 +102,9 @@ cargo bench --bench circumsphere_containment -- --test
 
 📊 **[View Detailed Performance Results](PERFORMANCE_RESULTS.md)**
 
-Comprehensive performance benchmarks, analysis, and recommendations have been moved to a dedicated file for easier maintenance and automated updates.
+Comprehensive performance benchmarks, analysis, and recommendations have been moved to a dedicated file for easier
+maintenance and automated updates. Circumsphere performance remains a dedicated subsection because these predicates
+exercise `la-stack` code paths that are important to tune independently.
 
 ##### Quick Summary
 
@@ -190,14 +192,6 @@ just compare-storage-large # Large scale comparison (~8-12 hours, compute cluste
 It measures iteration speed, memory usage, query performance, and validation - all critical
 for SlotMap comparison.
 
-### Microbenchmarks (`microbenchmarks.rs`)
-
-A collection of smaller benchmarks for core operations (varies by module).
-
-```bash
-cargo bench --profile perf --bench microbenchmarks
-```
-
 ### Profiling Suite (`profiling_suite.rs`) (comprehensive)
 
 ```bash
@@ -215,6 +209,7 @@ cargo bench --profile perf --bench profiling_suite --features count-allocations
 cargo bench --profile perf --bench profiling_suite --features count-allocations -- memory_profiling
 cargo bench --profile perf --bench profiling_suite --features count-allocations -- query_latency
 cargo bench --profile perf --bench profiling_suite --features count-allocations -- algorithmic_bottlenecks
+cargo bench --profile perf --bench profiling_suite --features count-allocations -- validation_components
 
 # Run only memory profiling group (useful for focused analysis)
 cargo bench --profile perf --bench profiling_suite --features count-allocations -- "memory_profiling"
diff --git a/benches/ci_performance_suite.rs b/benches/ci_performance_suite.rs
index 4267f5ce..a06adaf2 100644
--- a/benches/ci_performance_suite.rs
+++ b/benches/ci_performance_suite.rs
@@ -1,15 +1,18 @@
-//! CI Performance Suite - Optimized performance regression testing for CI/CD
+//! CI Performance Suite - optimized performance regression testing for CI/CD
 //!
-//! This benchmark consolidates the most critical performance tests from across
-//! the delaunay library into a single, CI-optimized suite that provides:
+//! This benchmark is the small, durable performance contract for the delaunay
+//! library. It covers the user-facing workflows that should stay fast across
+//! releases without duplicating every specialized microbenchmark:
 //!
-//! 1. Core triangulation performance (3D/4D/5D at key scales)
-//! 2. Critical circumsphere operations (`insphere_lifted` focus)
-//! 3. Key algorithmic bottlenecks (neighbor assignment, deduplication)
-//! 4. Basic memory footprint tracking
+//! 1. Delaunay construction across 2D-5D at CI-sized scales
+//! 2. Convex hull extraction from completed triangulations
+//! 3. Boundary facet traversal
+//! 4. Full validation (Levels 1-4)
+//! 5. Incremental vertex insertion
+//! 6. Explicit bistellar flip roundtrips on a stable 4D PL-manifold case
 //!
-//! Designed for ~5-10 minute CI runtime while maintaining comprehensive
-//! regression detection across all performance-critical code paths.
+//! Predicate microbenchmarks, allocation-focused measurements, and large-scale
+//! stress tests live in the dedicated benchmark targets under `benches/`.
 //!
 //! ## Sample Size Strategy
 //!
@@ -24,21 +27,189 @@
 //! - 2D: Fundamental triangulation case
 //! - 3D-5D: Higher-dimensional triangulations as documented in README.md
 
-use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
-use delaunay::core::vertex::Vertex;
-use delaunay::geometry::point::Point;
-use delaunay::geometry::util::generate_random_points_seeded;
-use delaunay::prelude::{ConstructionOptions, DelaunayTriangulation, RetryPolicy};
+use criterion::measurement::WallTime;
+use criterion::{
+    BatchSize, BenchmarkGroup, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main,
+};
+use delaunay::geometry::util::simplex_volume;
+use delaunay::prelude::generators::generate_random_points_seeded;
+use delaunay::prelude::geometry::{AdaptiveKernel, Coordinate, Point, RobustKernel};
+use delaunay::prelude::query::ConvexHull;
+use delaunay::prelude::triangulation::flips::{
+    BistellarFlips, CellKey, EdgeKey, FacetHandle, RidgeHandle, TopologyGuarantee, TriangleHandle,
+};
+use delaunay::prelude::triangulation::{
+    ConstructionOptions, DelaunayTriangulation, InsertionOrderStrategy, RetryPolicy, Vertex,
+};
 use delaunay::vertex;
-use std::hint::black_box;
-use std::num::NonZeroUsize;
-use tracing::{error, warn};
+use std::{env, hint::black_box, num::NonZeroUsize, sync::Once};
+#[cfg(feature = "bench-logging")]
+use tracing::warn;
 
 /// Default point counts for 2D–4D benchmarks.
 const COUNTS: &[usize] = &[10, 25, 50];
 /// Reduced point counts for 5D (50-point construction is prohibitively slow).
 const COUNTS_5D: &[usize] = &[10, 25];
+/// Representative operation count for 2D-4D non-construction workflows.
+const OPERATION_COUNT: usize = 50;
+/// Representative operation count for 5D non-construction workflows.
+const OPERATION_COUNT_5D: usize = 25;
+/// Small insert batch for 2D-3D incremental insertion benchmarks.
+const INSERT_COUNT: usize = 10;
+/// Reduced insert batch for 4D incremental insertion benchmarks.
+const INSERT_COUNT_4D: usize = 6;
+/// Reduced insert batch for 5D incremental insertion benchmarks.
+const INSERT_COUNT_5D: usize = 4;
 type SeedSearchResult<const D: usize> = Option<(u64, Vec<Point<f64, D>>, Vec<Vertex<f64, (), D>>)>;
+type BenchTriangulation<const D: usize> = DelaunayTriangulation<AdaptiveKernel<f64>, (), (), D>;
+type FlipTriangulation4 = DelaunayTriangulation<RobustKernel<f64>, (), (), 4>;
+
+#[derive(Clone, Copy)]
+enum Dataset {
+    WellConditioned,
+    Adversarial,
+}
+
+impl Dataset {
+    const fn suffix(self) -> &'static str {
+        match self {
+            Self::WellConditioned => "",
+            Self::Adversarial => "_adversarial",
+        }
+    }
+}
+
+struct ApiBenchmarkEntry {
+    group: &'static str,
+    public_api: &'static str,
+    dimensions: &'static str,
+    benchmark_ids: String,
+    note: &'static str,
+}
+
+static API_BENCHMARK_MANIFEST: Once = Once::new();
+
+fn count_list(counts: &[usize]) -> String {
+    counts
+        .iter()
+        .map(usize::to_string)
+        .collect::<Vec<_>>()
+        .join(",")
+}
+
+fn construction_benchmark_ids() -> String {
+    [
+        format!(
+            "tds_new_2d/{{tds_new,tds_new_adversarial}}/{{{}}}",
+            count_list(COUNTS)
+        ),
+        format!(
+            "tds_new_3d/{{tds_new,tds_new_adversarial}}/{{{}}}",
+            count_list(COUNTS)
+        ),
+        format!(
+            "tds_new_4d/{{tds_new,tds_new_adversarial}}/{{{}}}",
+            count_list(COUNTS)
+        ),
+        format!(
+            "tds_new_5d/{{tds_new,tds_new_adversarial}}/{{{}}}",
+            count_list(COUNTS_5D)
+        ),
+    ]
+    .join(";")
+}
+
+fn operation_benchmark_ids(group: &str, prefix: &str) -> String {
+    [
+        format!("{group}/{{{prefix}_2d,{prefix}_2d_adversarial}}/{OPERATION_COUNT}"),
+        format!("{group}/{{{prefix}_3d,{prefix}_3d_adversarial}}/{OPERATION_COUNT}"),
+        format!("{group}/{{{prefix}_4d,{prefix}_4d_adversarial}}/{OPERATION_COUNT}"),
+        format!("{group}/{{{prefix}_5d,{prefix}_5d_adversarial}}/{OPERATION_COUNT_5D}"),
+    ]
+    .join(";")
+}
+
+fn validation_benchmark_ids() -> String {
+    [
+        format!("validation/{{validate_3d,validate_3d_adversarial}}/{OPERATION_COUNT}"),
+        format!("validation/{{validate_4d,validate_4d_adversarial}}/{OPERATION_COUNT}"),
+        format!("validation/{{validate_5d,validate_5d_adversarial}}/{OPERATION_COUNT_5D}"),
+    ]
+    .join(";")
+}
+
+fn insert_benchmark_ids() -> String {
+    [
+        format!("incremental_insert/{{insert_2d,insert_2d_adversarial}}/{INSERT_COUNT}"),
+        format!("incremental_insert/{{insert_3d,insert_3d_adversarial}}/{INSERT_COUNT}"),
+        format!("incremental_insert/{{insert_4d,insert_4d_adversarial}}/{INSERT_COUNT_4D}"),
+        format!("incremental_insert/{{insert_5d,insert_5d_adversarial}}/{INSERT_COUNT_5D}"),
+    ]
+    .join(";")
+}
+
+fn api_benchmark_entries() -> Vec<ApiBenchmarkEntry> {
+    vec![
+        ApiBenchmarkEntry {
+            group: "construction",
+            public_api: "DelaunayTriangulation::new_with_options",
+            dimensions: "2,3,4,5",
+            benchmark_ids: construction_benchmark_ids(),
+            note: "construct_from_seeded_vertices_and_adversarial_large_coordinate_inputs",
+        },
+        ApiBenchmarkEntry {
+            group: "boundary_facets",
+            public_api: "DelaunayTriangulation::boundary_facets",
+            dimensions: "2,3,4,5",
+            benchmark_ids: operation_benchmark_ids("boundary_facets", "boundary_facets"),
+            note: "iterate_boundary_facets_on_well_conditioned_and_adversarial_inputs",
+        },
+        ApiBenchmarkEntry {
+            group: "convex_hull",
+            public_api: "ConvexHull::from_triangulation",
+            dimensions: "2,3,4,5",
+            benchmark_ids: operation_benchmark_ids("convex_hull", "from_triangulation"),
+            note: "extract_hull_from_well_conditioned_and_adversarial_triangulations",
+        },
+        ApiBenchmarkEntry {
+            group: "validation",
+            public_api: "DelaunayTriangulation::validate",
+            dimensions: "3,4,5",
+            benchmark_ids: validation_benchmark_ids(),
+            note: "levels_1_through_4_on_well_conditioned_and_adversarial_inputs",
+        },
+        ApiBenchmarkEntry {
+            group: "incremental_insert",
+            public_api: "DelaunayTriangulation::insert",
+            dimensions: "2,3,4,5",
+            benchmark_ids: insert_benchmark_ids(),
+            note: "insert_batches_into_prebuilt_well_conditioned_and_adversarial_triangulations",
+        },
+        ApiBenchmarkEntry {
+            group: "bistellar_flips",
+            public_api: "BistellarFlips::{flip_k1_insert,flip_k1_remove,flip_k2,flip_k2_inverse_from_edge,flip_k3,flip_k3_inverse_from_triangle}",
+            dimensions: "4",
+            benchmark_ids: "bistellar_flips_4d/k1_roundtrip;bistellar_flips_4d/k2_roundtrip;bistellar_flips_4d/k3_roundtrip".to_string(),
+            note: "stable_pl_manifold_roundtrips",
+        },
+    ]
+}
+
+/// Stable 4D PL-manifold configuration used for explicit bistellar flips.
+const STABLE_POINTS_4D: &[[f64; 4]] = &[
+    [0.0, 0.0, 0.0, 0.0],
+    [1.0, 0.0, 0.0, 0.0],
+    [0.0, 1.0, 0.0, 0.0],
+    [0.0, 0.0, 1.0, 0.0],
+    [0.0, 0.0, 0.0, 1.0],
+    [0.10, 0.10, 0.10, 0.10],
+    [0.15, 0.10, 0.10, 0.10],
+    [0.10, 0.15, 0.10, 0.10],
+    [0.10, 0.10, 0.15, 0.10],
+    [0.12, 0.12, 0.12, 0.12],
+    [0.20, 0.15, 0.10, 0.05],
+    [0.08, 0.18, 0.12, 0.14],
+];
 
 /// Pre-computed seeds for each (dimension, count) pair.
 ///
@@ -71,6 +242,24 @@ const KNOWN_SEEDS: &[(usize, usize, u64)] = &[
     (5, 25, 816),
 ];
 
+const KNOWN_ADV_SEEDS: &[(usize, usize, u64)] = &[
+    // 2D
+    (2, 10, 2_779_097_209),
+    (2, 25, 2_779_097_224),
+    (2, 50, 2_779_097_249),
+    // 3D
+    (3, 10, 2_779_098_586),
+    (3, 25, 2_779_098_601),
+    (3, 50, 2_779_098_627),
+    // 4D
+    (4, 10, 2_779_104_247),
+    (4, 25, 2_779_104_262),
+    (4, 50, 2_779_104_287),
+    // 5D
+    (5, 10, 2_779_109_908),
+    (5, 25, 2_779_109_924),
+];
+
 fn known_seed(dim: usize, count: usize) -> Option<u64> {
     KNOWN_SEEDS
         .iter()
@@ -78,41 +267,120 @@ fn known_seed(dim: usize, count: usize) -> Option<u64> {
         .map(|&(_, _, seed)| seed)
 }
 
+fn known_adv_seed(dim: usize, count: usize) -> Option<u64> {
+    KNOWN_ADV_SEEDS
+        .iter()
+        .find(|&&(d, c, _)| d == dim && c == count)
+        .map(|&(_, _, seed)| seed)
+}
+
+fn print_manifest_once() {
+    API_BENCHMARK_MANIFEST.call_once(|| {
+        println!(
+            "api_benchmark_manifest crate=delaunay version={} benchmark=ci_performance_suite schema=1",
+            env!("CARGO_PKG_VERSION")
+        );
+        for entry in api_benchmark_entries() {
+            println!(
+                "api_benchmark group={} public_api={} dimensions={} benchmark_ids={} note={}",
+                entry.group, entry.public_api, entry.dimensions, entry.benchmark_ids, entry.note
+            );
+        }
+    });
+}
+
 /// Prepare benchmark inputs by looking up a pre-computed seed, falling back
 /// to a runtime search only if the known seed is missing or invalid.
-fn prepare_benchmark_data<const D: usize>(
+fn prepare_data<const D: usize>(
     dim_seed: u64,
     count: usize,
     bounds: (f64, f64),
     attempts: NonZeroUsize,
 ) -> (u64, Vec<Point<f64, D>>, Vec<Vertex<f64, (), D>>) {
     // Fast path: use the pre-computed seed (single verification construction)
-    if let Some(seed) = known_seed(D, count) {
-        if let Some(result) = find_seed_and_vertices::<D>(seed, count, bounds, 1, attempts) {
-            return result;
+    match known_seed(D, count).map(|seed| {
+        (
+            seed,
+            find_seed_vertices::<D>(seed, count, bounds, 1, attempts),
+        )
+    }) {
+        Some((_seed, Some(result))) => return result,
+        Some((seed, None)) => {
+            #[cfg(not(feature = "bench-logging"))]
+            let _ = seed;
+            #[cfg(feature = "bench-logging")]
+            {
+                warn!(
+                    known_seed = seed,
+                    dim = D,
+                    count,
+                    "known seed failed, falling back to runtime search"
+                );
+            }
         }
-        warn!(
-            known_seed = seed,
-            dim = D,
-            count,
-            "known seed failed, falling back to runtime search"
-        );
+        None => {}
     }
 
     // Slow fallback: runtime search from the base seed
     let base_seed = dim_seed.wrapping_add(count as u64);
-    let search_limit = bench_seed_search_limit();
-    find_seed_and_vertices::<D>(base_seed, count, bounds, search_limit, attempts).unwrap_or_else(
-        || {
-            panic!(
-                "No stable benchmark seed found for {D}D/{count}: \
+    let search_limit = seed_search_limit();
+    find_seed_vertices::<D>(base_seed, count, bounds, search_limit, attempts).unwrap_or_else(|| {
+        panic!(
+            "No stable benchmark seed found for {D}D/{count}: \
                  start_seed={base_seed}; search_limit={search_limit}; bounds={bounds:?}"
-            )
-        },
-    )
+        )
+    })
+}
+
+fn prepare_dt<const D: usize>(dim_seed: u64, count: usize) -> BenchTriangulation<D> {
+    let bounds = (-100.0, 100.0);
+    let attempts = NonZeroUsize::new(6).expect("retry attempts must be non-zero");
+    let (seed, _, vertices) = prepare_data::<D>(dim_seed, count, bounds, attempts);
+    let options = ConstructionOptions::default().with_retry_policy(RetryPolicy::Shuffled {
+        attempts,
+        base_seed: Some(seed),
+    });
+
+    BenchTriangulation::<D>::new_with_options(&vertices, options).unwrap_or_else(|err| {
+        panic!("failed to prepare {D}D benchmark triangulation with {count} vertices: {err}");
+    })
+}
+
+fn prepare_adv_dt<const D: usize>(dim_seed: u64, count: usize) -> BenchTriangulation<D> {
+    let attempts = NonZeroUsize::new(8).expect("retry attempts must be non-zero");
+    let (seed, _, vertices) = prepare_adv_data::<D>(dim_seed, count, attempts);
+    let options = ConstructionOptions::default().with_retry_policy(RetryPolicy::Shuffled {
+        attempts,
+        base_seed: Some(seed),
+    });
+
+    BenchTriangulation::<D>::new_with_options(&vertices, options).unwrap_or_else(|err| {
+        panic!(
+            "failed to prepare adversarial {D}D benchmark triangulation with {count} vertices: {err}"
+        );
+    })
+}
+
+fn prepare_inserts<const D: usize>(
+    dim_seed: u64,
+    count: usize,
+    dataset: Dataset,
+) -> Vec<Vertex<f64, (), D>> {
+    let mut seed = dim_seed.wrapping_add(0x5151_5151);
+    if matches!(dataset, Dataset::Adversarial) {
+        seed ^= 0xA5A5_A5A5;
+    }
+    let points = match dataset {
+        Dataset::WellConditioned => {
+            generate_random_points_seeded::<f64, D>(count, (-50.0, 50.0), seed)
+                .unwrap_or_else(|error| panic!("insert point generation failed for {D}D: {error}"))
+        }
+        Dataset::Adversarial => generate_adv_points::<D>(count, seed),
+    };
+    points.iter().map(|point| vertex!(*point)).collect()
 }
 
-fn find_seed_and_vertices<const D: usize>(
+fn find_seed_vertices<const D: usize>(
     start_seed: u64,
     count: usize,
     bounds: (f64, f64),
@@ -140,16 +408,322 @@ fn find_seed_and_vertices<const D: usize>(
     None
 }
 
-fn bench_logging_enabled() -> bool {
-    std::env::var("DELAUNAY_BENCH_LOG").is_ok_and(|value| value != "0")
+fn stable_adv_points<const D: usize>(
+    seed: u64,
+    count: usize,
+    attempts: NonZeroUsize,
+) -> SeedSearchResult<D> {
+    let points = generate_adv_points::<D>(count, seed);
+    let vertices = points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>();
+    let options = ConstructionOptions::default().with_retry_policy(RetryPolicy::Shuffled {
+        attempts,
+        base_seed: Some(seed),
+    });
+
+    BenchTriangulation::<D>::new_with_options(&vertices, options)
+        .is_ok()
+        .then_some((seed, points, vertices))
+}
+
+fn prepare_adv_data<const D: usize>(
+    dim_seed: u64,
+    count: usize,
+    attempts: NonZeroUsize,
+) -> (u64, Vec<Point<f64, D>>, Vec<Vertex<f64, (), D>>) {
+    if !discover_seeds_enabled() {
+        match known_adv_seed(D, count)
+            .map(|seed| (seed, stable_adv_points::<D>(seed, count, attempts)))
+        {
+            Some((_seed, Some(result))) => return result,
+            Some((seed, None)) => {
+                #[cfg(not(feature = "bench-logging"))]
+                let _ = seed;
+                #[cfg(feature = "bench-logging")]
+                {
+                    warn!(
+                        known_seed = seed,
+                        dim = D,
+                        count,
+                        "known adversarial seed failed, falling back to runtime search"
+                    );
+                }
+            }
+            None => {}
+        }
+    }
+
+    let start_seed = dim_seed
+        .wrapping_mul(17)
+        .wrapping_add(count as u64)
+        .wrapping_add(0xA5A5_A5A5);
+    let search_limit = seed_search_limit();
+
+    for offset in 0..search_limit {
+        let candidate_seed = start_seed.wrapping_add(offset as u64);
+        if let Some(result) = stable_adv_points::<D>(candidate_seed, count, attempts) {
+            if discover_seeds_enabled() {
+                println!("ADV_SEED {D} {count} {candidate_seed}");
+            }
+            return result;
+        }
+    }
+
+    panic!(
+        "No stable adversarial benchmark seed found for {D}D/{count}: \
+         start_seed={start_seed}; search_limit={search_limit}"
+    );
+}
+
+fn generate_adv_points<const D: usize>(count: usize, seed: u64) -> Vec<Point<f64, D>> {
+    let base_points = generate_random_points_seeded::<f64, D>(count, (-1.0, 1.0), seed)
+        .unwrap_or_else(|error| {
+            panic!("generate_random_points_seeded failed for adversarial {D}D: {error}");
+        });
+
+    base_points
+        .iter()
+        .enumerate()
+        .map(|(index, point)| {
+            let index = u32::try_from(index).expect("benchmark point index should fit in u32");
+            let mut coords = [0.0_f64; D];
+            for (axis, coord) in coords.iter_mut().enumerate() {
+                let axis_number = u32::try_from(axis + 1).expect("axis should fit in u32");
+                let base = point.coords()[axis];
+                let cluster_offset = f64::from(index % 7) * 1.0e-3;
+                let axis_offset = f64::from(axis_number) * 0.25;
+                let perturbation = f64::from((index + axis_number) % 11) * 1.0e-6;
+                *coord = base.mul_add(1.0e3, 1.0e9 + axis_offset + cluster_offset + perturbation);
+            }
+            Point::new(coords)
+        })
+        .collect()
 }
 
-fn bench_discover_seeds_enabled() -> bool {
-    std::env::var("DELAUNAY_BENCH_DISCOVER_SEEDS").is_ok_and(|value| value != "0")
+fn stable_vertices_4d() -> Vec<Vertex<f64, (), 4>> {
+    STABLE_POINTS_4D
+        .iter()
+        .map(|coords| vertex!(*coords))
+        .collect()
+}
+
+fn build_flip_dt_4d() -> FlipTriangulation4 {
+    let vertices = stable_vertices_4d();
+    let options =
+        ConstructionOptions::default().with_insertion_order(InsertionOrderStrategy::Input);
+    DelaunayTriangulation::with_topology_guarantee_and_options(
+        &RobustKernel::new(),
+        &vertices,
+        TopologyGuarantee::PLManifold,
+        options,
+    )
+    .unwrap_or_else(|err| panic!("failed to build stable 4D flip triangulation: {err}"))
+}
+
+fn cell_centroid_4d(dt: &FlipTriangulation4, cell_key: CellKey) -> [f64; 4] {
+    let cell = dt
+        .tds()
+        .get_cell(cell_key)
+        .expect("cell key should exist in benchmark triangulation");
+
+    let mut coords = [0.0_f64; 4];
+    for &vkey in cell.vertices() {
+        let vertex = dt
+            .tds()
+            .get_vertex_by_key(vkey)
+            .expect("vertex key should exist in benchmark triangulation");
+        let vcoords = vertex.point().coords();
+        for i in 0..4 {
+            coords[i] += vcoords[i];
+        }
+    }
+
+    let vertex_count =
+        u32::try_from(cell.vertices().len()).expect("cell vertex count should fit in u32");
+    let inv = 1.0_f64 / f64::from(vertex_count);
+    for coord in &mut coords {
+        *coord *= inv;
+    }
+    coords
+}
+
+fn cell_points_4d(dt: &FlipTriangulation4, cell_key: CellKey) -> Vec<Point<f64, 4>> {
+    let cell = dt
+        .tds()
+        .get_cell(cell_key)
+        .expect("cell key should exist in benchmark triangulation");
+
+    cell.vertices()
+        .iter()
+        .map(|vertex_key| {
+            *dt.tds()
+                .get_vertex_by_key(*vertex_key)
+                .expect("vertex key should exist in benchmark triangulation")
+                .point()
+        })
+        .collect()
+}
+
+fn largest_volume_cell_4d(dt: &FlipTriangulation4) -> CellKey {
+    dt.cells()
+        .filter_map(|(cell_key, _)| {
+            simplex_volume(&cell_points_4d(dt, cell_key))
+                .ok()
+                .map(|volume| (cell_key, volume))
+        })
+        .max_by(|(_, left), (_, right)| left.total_cmp(right))
+        .map(|(cell_key, _)| cell_key)
+        .expect("stable 4D benchmark triangulation should have a non-degenerate cell")
+}
+
+fn roundtrip_k1_4d(dt: &mut FlipTriangulation4, cell_key: CellKey) {
+    let centroid = cell_centroid_4d(dt, cell_key);
+    let new_vertex = vertex!(centroid);
+    let new_uuid = new_vertex.uuid();
+
+    dt.flip_k1_insert(cell_key, new_vertex)
+        .expect("k=1 insert should succeed on stable 4D benchmark triangulation");
+
+    let new_key = dt
+        .tds()
+        .vertex_key_from_uuid(&new_uuid)
+        .expect("inserted vertex should be present after k=1 insert");
+
+    dt.flip_k1_remove(new_key)
+        .expect("k=1 remove should invert k=1 insert");
 }
 
-fn bench_seed_search_limit() -> usize {
-    std::env::var("DELAUNAY_BENCH_DISCOVER_SEEDS_LIMIT")
+fn interior_facets_4d(dt: &FlipTriangulation4) -> Vec<FacetHandle> {
+    let mut facets = Vec::new();
+    for (cell_key, cell) in dt.cells() {
+        if let Some(neighbors) = cell.neighbors() {
+            for (facet_index, neighbor) in neighbors.iter().enumerate() {
+                if neighbor.is_some() {
+                    let facet_index = u8::try_from(facet_index).expect("facet index fits in u8");
+                    facets.push(FacetHandle::new(cell_key, facet_index));
+                }
+            }
+        }
+    }
+    facets
+}
+
+fn flippable_k2_facet_4d(dt: &FlipTriangulation4) -> FacetHandle {
+    let mut last_error = None;
+    for facet in interior_facets_4d(dt) {
+        let mut trial = dt.clone();
+        match trial.flip_k2(facet) {
+            Ok(info) => {
+                assert_eq!(
+                    info.inserted_face_vertices.len(),
+                    2,
+                    "k=2 flip should insert an edge"
+                );
+                let edge = EdgeKey::new(
+                    info.inserted_face_vertices[0],
+                    info.inserted_face_vertices[1],
+                );
+                trial
+                    .flip_k2_inverse_from_edge(edge)
+                    .expect("k=2 inverse should succeed after k=2 flip");
+                return facet;
+            }
+            Err(err) => last_error = Some(format!("{err}")),
+        }
+    }
+
+    panic!(
+        "no flippable interior facet found for k=2 benchmark (last error: {})",
+        last_error.unwrap_or_else(|| "none".to_string())
+    );
+}
+
+fn roundtrip_k2_4d(dt: &mut FlipTriangulation4, facet: FacetHandle) {
+    let info = dt
+        .flip_k2(facet)
+        .expect("k=2 flip should succeed for preselected 4D benchmark facet");
+    assert_eq!(
+        info.inserted_face_vertices.len(),
+        2,
+        "k=2 flip should insert an edge"
+    );
+    let edge = EdgeKey::new(
+        info.inserted_face_vertices[0],
+        info.inserted_face_vertices[1],
+    );
+    dt.flip_k2_inverse_from_edge(edge)
+        .expect("k=2 inverse should succeed after k=2 flip");
+}
+
+fn ridges_4d(dt: &FlipTriangulation4) -> Vec<RidgeHandle> {
+    let mut ridges = Vec::new();
+    for (cell_key, cell) in dt.cells() {
+        let vertex_count = cell.number_of_vertices();
+        for i in 0..vertex_count {
+            for j in (i + 1)..vertex_count {
+                let omit_a = u8::try_from(i).expect("ridge index fits in u8");
+                let omit_b = u8::try_from(j).expect("ridge index fits in u8");
+                ridges.push(RidgeHandle::new(cell_key, omit_a, omit_b));
+            }
+        }
+    }
+    ridges
+}
+
+fn flippable_k3_ridge_4d(dt: &FlipTriangulation4) -> RidgeHandle {
+    let mut last_error = None;
+    for ridge in ridges_4d(dt) {
+        let mut trial = dt.clone();
+        match trial.flip_k3(ridge) {
+            Ok(info) => {
+                assert_eq!(
+                    info.inserted_face_vertices.len(),
+                    3,
+                    "k=3 flip should insert a triangle"
+                );
+                let triangle = TriangleHandle::new(
+                    info.inserted_face_vertices[0],
+                    info.inserted_face_vertices[1],
+                    info.inserted_face_vertices[2],
+                );
+                trial
+                    .flip_k3_inverse_from_triangle(triangle)
+                    .expect("k=3 inverse should succeed after k=3 flip");
+                return ridge;
+            }
+            Err(err) => last_error = Some(format!("{err}")),
+        }
+    }
+
+    panic!(
+        "no flippable ridge found for k=3 benchmark (last error: {})",
+        last_error.unwrap_or_else(|| "none".to_string())
+    );
+}
+
+fn roundtrip_k3_4d(dt: &mut FlipTriangulation4, ridge: RidgeHandle) {
+    let info = dt
+        .flip_k3(ridge)
+        .expect("k=3 flip should succeed for preselected 4D benchmark ridge");
+    assert_eq!(
+        info.inserted_face_vertices.len(),
+        3,
+        "k=3 flip should insert a triangle"
+    );
+    let triangle = TriangleHandle::new(
+        info.inserted_face_vertices[0],
+        info.inserted_face_vertices[1],
+        info.inserted_face_vertices[2],
+    );
+    dt.flip_k3_inverse_from_triangle(triangle)
+        .expect("k=3 inverse should succeed after k=3 flip");
+}
+
+fn discover_seeds_enabled() -> bool {
+    env::var("DELAUNAY_BENCH_DISCOVER_SEEDS").is_ok_and(|value| value != "0")
+}
+
+fn seed_search_limit() -> usize {
+    env::var("DELAUNAY_BENCH_DISCOVER_SEEDS_LIMIT")
         .ok()
         .and_then(|value| value.parse::<usize>().ok())
         .unwrap_or(2000)
@@ -164,7 +738,9 @@ fn bench_seed_search_limit() -> usize {
 macro_rules! benchmark_tds_new_dimension {
     ($dim:literal, $func_name:ident, $seed:literal, $counts:expr) => {
         /// Benchmark triangulation creation for D-dimensional triangulations
+        #[allow(clippy::too_many_lines)]
         fn $func_name(c: &mut Criterion) {
+            print_manifest_once();
             let counts = $counts;
 
             // Opt-in helper for discovering stable seeds without paying Criterion warmup/
@@ -182,43 +758,53 @@ macro_rules! benchmark_tds_new_dimension {
             //
             // We avoid `std::process::exit` here so that destructors run and Criterion
             // can clean up state on both success and failure.
-            if bench_discover_seeds_enabled() {
+            if discover_seeds_enabled() {
                 let bounds = (-100.0, 100.0);
-                let filters: Vec<String> = std::env::args()
+                let filters: Vec<String> = env::args()
                     .skip(1)
                     .filter(|arg| !arg.starts_with('-'))
                     .collect();
 
                 for &count in counts {
-                    let bench_id =
-                        format!("tds_new_{}d/tds_new/{}", stringify!($dim), count);
+                    let bench_id = format!("tds_new_{}d/tds_new/{}", stringify!($dim), count);
+                    let adv_bench_id =
+                        format!("tds_new_{}d/tds_new_adversarial/{}", stringify!($dim), count);
 
-                    if !filters.is_empty() && !filters.iter().any(|filter| bench_id.contains(filter)) {
-                        continue;
+                    if !filters.is_empty()
+                        && filters.iter().any(|filter| adv_bench_id.contains(filter))
+                    {
+                        let attempts =
+                            NonZeroUsize::new(8).expect("retry attempts must be non-zero");
+                        let _ = prepare_adv_data::<$dim>($seed, count, attempts);
+                        return;
                     }
 
-                    let seed = ($seed as u64).wrapping_add(count as u64);
-                    let limit = bench_seed_search_limit();
-                    let attempts =
-                        NonZeroUsize::new(6).expect("retry attempts must be non-zero");
-
-                    if let Some((candidate_seed, _, _)) =
-                        find_seed_and_vertices::<$dim>(seed, count, bounds, limit, attempts)
+                    if filters.is_empty()
+                        || filters.iter().any(|filter| bench_id.contains(filter))
                     {
-                        println!(
-                            "seed_search_found dim={} count={} seed={}",
-                            $dim, count, candidate_seed
+                        let seed = ($seed as u64).wrapping_add(count as u64);
+                        let limit = seed_search_limit();
+                        let attempts =
+                            NonZeroUsize::new(6).expect("retry attempts must be non-zero");
+
+                        if let Some((candidate_seed, _, _)) =
+                            find_seed_vertices::<$dim>(seed, count, bounds, limit, attempts)
+                        {
+                            println!(
+                                "seed_search_found dim={} count={} seed={}",
+                                $dim, count, candidate_seed
+                            );
+                            return;
+                        }
+
+                        panic!(
+                            "seed_search_failed dim={} count={} start_seed={} limit={}",
+                            $dim,
+                            count,
+                            seed,
+                            limit
                         );
-                        return;
                     }
-
-                    panic!(
-                        "seed_search_failed dim={} count={} start_seed={} limit={}",
-                        $dim,
-                        count,
-                        seed,
-                        limit
-                    );
                 }
 
                 // No filter matched this benchmark function; do nothing.
@@ -244,7 +830,7 @@ macro_rules! benchmark_tds_new_dimension {
                     let attempts =
                         NonZeroUsize::new(6).expect("retry attempts must be non-zero");
                     let (seed, points, vertices) =
-                        prepare_benchmark_data::<$dim>($seed, count, bounds, attempts);
+                        prepare_data::<$dim>($seed, count, bounds, attempts);
                     let sample_points = points.iter().take(5).collect::<Vec<_>>();
 
                     // In benchmarks we compile in release mode, where the default retry policy is
@@ -266,17 +852,6 @@ macro_rules! benchmark_tds_new_dimension {
                             }
                             Err(err) => {
                                 let error = format!("{err:?}");
-                                if bench_logging_enabled() {
-                                    error!(
-                                        dim = $dim,
-                                        count,
-                                        seed,
-                                        bounds = ?bounds,
-                                        sample_points = ?sample_points,
-                                        error = %error,
-                                        "DelaunayTriangulation::new failed"
-                                    );
-                                }
                                 panic!(
                                     "DelaunayTriangulation::new failed for {}D: {error}; dim={}; count={}; seed={}; bounds={:?}; sample_points={sample_points:?}",
                                     $dim,
@@ -289,6 +864,45 @@ macro_rules! benchmark_tds_new_dimension {
                         }
                     });
                 });
+
+                group.bench_with_input(
+                    BenchmarkId::new("tds_new_adversarial", count),
+                    &count,
+                    |b, &count| {
+                        let attempts =
+                            NonZeroUsize::new(8).expect("retry attempts must be non-zero");
+                        let (seed, points, vertices) =
+                            prepare_adv_data::<$dim>($seed, count, attempts);
+                        let sample_points = points.iter().take(5).collect::<Vec<_>>();
+                        let options = ConstructionOptions::default().with_retry_policy(
+                            RetryPolicy::Shuffled {
+                                attempts,
+                                base_seed: Some(seed),
+                            },
+                        );
+
+                        b.iter(|| {
+                            match DelaunayTriangulation::<_, (), (), $dim>::new_with_options(
+                                &vertices,
+                                options,
+                            ) {
+                                Ok(dt) => {
+                                    black_box(dt);
+                                }
+                                Err(err) => {
+                                    let error = format!("{err:?}");
+                                    panic!(
+                                        "adversarial DelaunayTriangulation::new failed for {}D: {error}; dim={}; count={}; seed={}; sample_points={sample_points:?}",
+                                        $dim,
+                                        $dim,
+                                        count,
+                                        seed
+                                    );
+                                }
+                            }
+                        });
+                    },
+                );
             }
 
             group.finish();
@@ -302,6 +916,461 @@ benchmark_tds_new_dimension!(3, benchmark_tds_new_3d, 123, COUNTS);
 benchmark_tds_new_dimension!(4, benchmark_tds_new_4d, 456, COUNTS);
 benchmark_tds_new_dimension!(5, benchmark_tds_new_5d, 789, COUNTS_5D);
 
+fn bench_boundary_case<const D: usize>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    dimension: usize,
+    dataset: Dataset,
+    count: usize,
+    dt: &BenchTriangulation<D>,
+) {
+    group.throughput(Throughput::Elements(count as u64));
+    group.bench_function(
+        BenchmarkId::new(
+            format!("boundary_facets_{dimension}d{}", dataset.suffix()),
+            count,
+        ),
+        |b| {
+            b.iter(|| black_box(dt.boundary_facets().count()));
+        },
+    );
+}
+
+fn bench_hull_case<const D: usize>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    dimension: usize,
+    dataset: Dataset,
+    count: usize,
+    dt: &BenchTriangulation<D>,
+) {
+    group.throughput(Throughput::Elements(count as u64));
+    group.bench_function(
+        BenchmarkId::new(
+            format!("from_triangulation_{dimension}d{}", dataset.suffix()),
+            count,
+        ),
+        |b| {
+            b.iter(|| {
+                black_box(
+                    ConvexHull::from_triangulation(dt.as_triangulation()).unwrap_or_else(|err| {
+                        panic!("{dimension}D convex hull extraction should succeed: {err}")
+                    }),
+                );
+            });
+        },
+    );
+}
+
+fn bench_validate_case<const D: usize>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    dimension: usize,
+    dataset: Dataset,
+    count: usize,
+    dt: &BenchTriangulation<D>,
+) {
+    group.throughput(Throughput::Elements(count as u64));
+    group.bench_function(
+        BenchmarkId::new(format!("validate_{dimension}d{}", dataset.suffix()), count),
+        |b| {
+            b.iter(|| {
+                black_box(dt.validate()).unwrap_or_else(|err| {
+                    panic!("{dimension}D benchmark triangulation should validate: {err}");
+                });
+            });
+        },
+    );
+}
+
+fn bench_insert_case<const D: usize>(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    dimension: usize,
+    dataset: Dataset,
+    count: usize,
+    base_dt: &BenchTriangulation<D>,
+    insert_vertices: &[Vertex<f64, (), D>],
+) {
+    group.throughput(Throughput::Elements(count as u64));
+    group.bench_function(
+        BenchmarkId::new(format!("insert_{dimension}d{}", dataset.suffix()), count),
+        |b| {
+            b.iter_batched(
+                || (base_dt.clone(), insert_vertices.to_vec()),
+                |(mut dt, vertices)| {
+                    for vertex in vertices {
+                        black_box(dt.insert(vertex)).unwrap_or_else(|err| {
+                            panic!("{dimension}D incremental insert should succeed: {err}");
+                        });
+                    }
+                    black_box(dt);
+                },
+                BatchSize::LargeInput,
+            );
+        },
+    );
+}
+
+fn benchmark_boundary_facets(c: &mut Criterion) {
+    print_manifest_once();
+    if discover_seeds_enabled() {
+        return;
+    }
+    let mut group = c.benchmark_group("boundary_facets");
+    group.sample_size(25);
+
+    let dt_2d = prepare_dt::<2>(42, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        2,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_2d,
+    );
+    let dt_2d_adversarial = prepare_adv_dt::<2>(42, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        2,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_2d_adversarial,
+    );
+
+    let dt_3d = prepare_dt::<3>(123, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        3,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_3d,
+    );
+    let dt_3d_adversarial = prepare_adv_dt::<3>(123, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        3,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_3d_adversarial,
+    );
+
+    let dt_4d = prepare_dt::<4>(456, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        4,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_4d,
+    );
+    let dt_4d_adversarial = prepare_adv_dt::<4>(456, OPERATION_COUNT);
+    bench_boundary_case(
+        &mut group,
+        4,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_4d_adversarial,
+    );
+
+    let dt_5d = prepare_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_boundary_case(
+        &mut group,
+        5,
+        Dataset::WellConditioned,
+        OPERATION_COUNT_5D,
+        &dt_5d,
+    );
+    let dt_5d_adversarial = prepare_adv_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_boundary_case(
+        &mut group,
+        5,
+        Dataset::Adversarial,
+        OPERATION_COUNT_5D,
+        &dt_5d_adversarial,
+    );
+
+    group.finish();
+}
+
+fn benchmark_convex_hull(c: &mut Criterion) {
+    print_manifest_once();
+    if discover_seeds_enabled() {
+        return;
+    }
+    let mut group = c.benchmark_group("convex_hull");
+    group.sample_size(20);
+
+    let dt_2d = prepare_dt::<2>(42, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        2,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_2d,
+    );
+    let dt_2d_adversarial = prepare_adv_dt::<2>(42, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        2,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_2d_adversarial,
+    );
+
+    let dt_3d = prepare_dt::<3>(123, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        3,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_3d,
+    );
+    let dt_3d_adversarial = prepare_adv_dt::<3>(123, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        3,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_3d_adversarial,
+    );
+
+    let dt_4d = prepare_dt::<4>(456, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        4,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_4d,
+    );
+    let dt_4d_adversarial = prepare_adv_dt::<4>(456, OPERATION_COUNT);
+    bench_hull_case(
+        &mut group,
+        4,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_4d_adversarial,
+    );
+
+    let dt_5d = prepare_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_hull_case(
+        &mut group,
+        5,
+        Dataset::WellConditioned,
+        OPERATION_COUNT_5D,
+        &dt_5d,
+    );
+    let dt_5d_adversarial = prepare_adv_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_hull_case(
+        &mut group,
+        5,
+        Dataset::Adversarial,
+        OPERATION_COUNT_5D,
+        &dt_5d_adversarial,
+    );
+
+    group.finish();
+}
+
+fn benchmark_validation(c: &mut Criterion) {
+    print_manifest_once();
+    if discover_seeds_enabled() {
+        return;
+    }
+    let mut group = c.benchmark_group("validation");
+    group.sample_size(15);
+
+    let dt_3d = prepare_dt::<3>(123, OPERATION_COUNT);
+    bench_validate_case(
+        &mut group,
+        3,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_3d,
+    );
+    let dt_3d_adversarial = prepare_adv_dt::<3>(123, OPERATION_COUNT);
+    bench_validate_case(
+        &mut group,
+        3,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_3d_adversarial,
+    );
+
+    let dt_4d = prepare_dt::<4>(456, OPERATION_COUNT);
+    bench_validate_case(
+        &mut group,
+        4,
+        Dataset::WellConditioned,
+        OPERATION_COUNT,
+        &dt_4d,
+    );
+    let dt_4d_adversarial = prepare_adv_dt::<4>(456, OPERATION_COUNT);
+    bench_validate_case(
+        &mut group,
+        4,
+        Dataset::Adversarial,
+        OPERATION_COUNT,
+        &dt_4d_adversarial,
+    );
+
+    let dt_5d = prepare_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_validate_case(
+        &mut group,
+        5,
+        Dataset::WellConditioned,
+        OPERATION_COUNT_5D,
+        &dt_5d,
+    );
+    let dt_5d_adversarial = prepare_adv_dt::<5>(789, OPERATION_COUNT_5D);
+    bench_validate_case(
+        &mut group,
+        5,
+        Dataset::Adversarial,
+        OPERATION_COUNT_5D,
+        &dt_5d_adversarial,
+    );
+
+    group.finish();
+}
+
+fn benchmark_insert(c: &mut Criterion) {
+    print_manifest_once();
+    if discover_seeds_enabled() {
+        return;
+    }
+    let mut group = c.benchmark_group("incremental_insert");
+    group.sample_size(15);
+
+    let dt_2d = prepare_dt::<2>(42, OPERATION_COUNT);
+    let insert_2d = prepare_inserts::<2>(42, INSERT_COUNT, Dataset::WellConditioned);
+    bench_insert_case(
+        &mut group,
+        2,
+        Dataset::WellConditioned,
+        INSERT_COUNT,
+        &dt_2d,
+        &insert_2d,
+    );
+    let dt_2d_adversarial = prepare_adv_dt::<2>(42, OPERATION_COUNT);
+    let insert_2d_adversarial = prepare_inserts::<2>(42, INSERT_COUNT, Dataset::Adversarial);
+    bench_insert_case(
+        &mut group,
+        2,
+        Dataset::Adversarial,
+        INSERT_COUNT,
+        &dt_2d_adversarial,
+        &insert_2d_adversarial,
+    );
+
+    let dt_3d = prepare_dt::<3>(123, OPERATION_COUNT);
+    let insert_3d = prepare_inserts::<3>(123, INSERT_COUNT, Dataset::WellConditioned);
+    bench_insert_case(
+        &mut group,
+        3,
+        Dataset::WellConditioned,
+        INSERT_COUNT,
+        &dt_3d,
+        &insert_3d,
+    );
+    let dt_3d_adversarial = prepare_adv_dt::<3>(123, OPERATION_COUNT);
+    let insert_3d_adversarial = prepare_inserts::<3>(123, INSERT_COUNT, Dataset::Adversarial);
+    bench_insert_case(
+        &mut group,
+        3,
+        Dataset::Adversarial,
+        INSERT_COUNT,
+        &dt_3d_adversarial,
+        &insert_3d_adversarial,
+    );
+
+    let dt_4d = prepare_dt::<4>(456, OPERATION_COUNT);
+    let insert_4d = prepare_inserts::<4>(456, INSERT_COUNT_4D, Dataset::WellConditioned);
+    bench_insert_case(
+        &mut group,
+        4,
+        Dataset::WellConditioned,
+        INSERT_COUNT_4D,
+        &dt_4d,
+        &insert_4d,
+    );
+    let dt_4d_adversarial = prepare_adv_dt::<4>(456, OPERATION_COUNT);
+    let insert_4d_adversarial = prepare_inserts::<4>(456, INSERT_COUNT_4D, Dataset::Adversarial);
+    bench_insert_case(
+        &mut group,
+        4,
+        Dataset::Adversarial,
+        INSERT_COUNT_4D,
+        &dt_4d_adversarial,
+        &insert_4d_adversarial,
+    );
+
+    let dt_5d = prepare_dt::<5>(789, OPERATION_COUNT_5D);
+    let insert_5d = prepare_inserts::<5>(789, INSERT_COUNT_5D, Dataset::WellConditioned);
+    bench_insert_case(
+        &mut group,
+        5,
+        Dataset::WellConditioned,
+        INSERT_COUNT_5D,
+        &dt_5d,
+        &insert_5d,
+    );
+    let dt_5d_adversarial = prepare_adv_dt::<5>(789, OPERATION_COUNT_5D);
+    let insert_5d_adversarial = prepare_inserts::<5>(789, INSERT_COUNT_5D, Dataset::Adversarial);
+    bench_insert_case(
+        &mut group,
+        5,
+        Dataset::Adversarial,
+        INSERT_COUNT_5D,
+        &dt_5d_adversarial,
+        &insert_5d_adversarial,
+    );
+
+    group.finish();
+}
+
+fn benchmark_bistellar_flips(c: &mut Criterion) {
+    print_manifest_once();
+    if discover_seeds_enabled() {
+        return;
+    }
+    let mut group = c.benchmark_group("bistellar_flips_4d");
+    group.sample_size(10);
+    let base_dt = build_flip_dt_4d();
+    let k1_cell = largest_volume_cell_4d(&base_dt);
+    let k2_facet = flippable_k2_facet_4d(&base_dt);
+    let k3_ridge = flippable_k3_ridge_4d(&base_dt);
+
+    group.bench_function("k1_roundtrip", |b| {
+        b.iter_batched(
+            || base_dt.clone(),
+            |mut dt| {
+                roundtrip_k1_4d(&mut dt, k1_cell);
+                black_box(dt);
+            },
+            BatchSize::LargeInput,
+        );
+    });
+
+    group.bench_function("k2_roundtrip", |b| {
+        b.iter_batched(
+            || base_dt.clone(),
+            |mut dt| {
+                roundtrip_k2_4d(&mut dt, k2_facet);
+                black_box(dt);
+            },
+            BatchSize::LargeInput,
+        );
+    });
+
+    group.bench_function("k3_roundtrip", |b| {
+        b.iter_batched(
+            || base_dt.clone(),
+            |mut dt| {
+                roundtrip_k3_4d(&mut dt, k3_ridge);
+                black_box(dt);
+            },
+            BatchSize::LargeInput,
+        );
+    });
+
+    group.finish();
+}
+
 criterion_group!(
     name = benches;
     config = Criterion::default();
@@ -309,6 +1378,11 @@ criterion_group!(
         benchmark_tds_new_2d,
         benchmark_tds_new_3d,
         benchmark_tds_new_4d,
-        benchmark_tds_new_5d
+        benchmark_tds_new_5d,
+        benchmark_boundary_facets,
+        benchmark_convex_hull,
+        benchmark_validation,
+        benchmark_insert,
+        benchmark_bistellar_flips
 );
 criterion_main!(benches);
diff --git a/benches/microbenchmarks.rs b/benches/microbenchmarks.rs
deleted file mode 100644
index 40d6daee..00000000
--- a/benches/microbenchmarks.rs
+++ /dev/null
@@ -1,432 +0,0 @@
-//! Microbenchmarks for key delaunay methods
-//!
-//! This benchmark suite focuses on measuring the performance of individual key methods
-//! in the delaunay triangulation library, particularly those that are performance-critical:
-//!
-//! 1. **`DelaunayTriangulation::with_kernel`**: Complete triangulation creation
-//! 2. **Layered validation**: `dt.tds().is_valid()/validate()`, `dt.as_triangulation().is_valid()/validate()`, `dt.is_valid()`, `dt.validate()`
-//! 3. **Incremental construction**: Performance of `insert()` method for vertex insertion
-//! 4. **Memory usage patterns**: Allocation and deallocation patterns
-//!
-//! These benchmarks measure the effectiveness of the optimization implementations
-//! completed as part of the Pure Incremental Delaunay Triangulation refactoring project.
-
-use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
-use delaunay::geometry::kernel::RobustKernel;
-use delaunay::geometry::util::generate_random_points_seeded;
-use delaunay::prelude::query::*;
-use delaunay::triangulation::delaunay::DelaunayTriangulation;
-use delaunay::vertex;
-use std::hint::black_box;
-use std::sync::OnceLock;
-
-#[cfg(feature = "bench-logging")]
-fn init_tracing() {
-    static INIT: std::sync::Once = std::sync::Once::new();
-    INIT.call_once(|| {
-        let filter = tracing_subscriber::EnvFilter::try_from_default_env()
-            .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
-        let _ = tracing_subscriber::fmt().with_env_filter(filter).try_init();
-    });
-}
-
-#[cfg(not(feature = "bench-logging"))]
-const fn init_tracing() {}
-
-macro_rules! bench_info {
-    ($($arg:tt)*) => {{
-        #[cfg(feature = "bench-logging")]
-        {
-            init_tracing();
-            tracing::info!($($arg)*);
-        }
-    }};
-}
-
-macro_rules! bench_warn {
-    ($($arg:tt)*) => {{
-        #[cfg(feature = "bench-logging")]
-        {
-            init_tracing();
-            tracing::warn!($($arg)*);
-        }
-    }};
-}
-
-/// Get the deterministic seed for random point generation.
-/// Reads `DELAUNAY_BENCH_SEED` (decimal or 0x-hex). Defaults to 0xD1EA.
-/// Logs the resolved seed once on first use if `PRINT_BENCH_SEED` is set and
-/// the `bench-logging` feature is enabled.
-fn get_benchmark_seed() -> u64 {
-    static SEED: OnceLock<u64> = OnceLock::new();
-    *SEED.get_or_init(|| {
-        let seed = std::env::var("DELAUNAY_BENCH_SEED")
-            .ok()
-            .and_then(|s| {
-                let s = s.trim();
-                s.strip_prefix("0x")
-                    .or_else(|| s.strip_prefix("0X"))
-                    .map_or_else(|| s.parse().ok(), |hex| u64::from_str_radix(hex, 16).ok())
-            })
-            .unwrap_or(0xD1EA);
-        if std::env::var("PRINT_BENCH_SEED").is_ok() {
-            bench_info!("Benchmark seed: 0x{seed:X} ({seed})");
-        }
-        seed
-    })
-}
-
-/// Macro to generate comprehensive dimensional benchmarks for core algorithms
-macro_rules! generate_dimensional_benchmarks {
-    ($dim:literal) => {
-        pastey::paste! {
-            /// Benchmark incremental Delaunay triangulation for [<$dim>]D
-            fn [<benchmark_delaunay_triangulation_ $dim d>](c: &mut Criterion) {
-                let point_counts = [10, 25, 50, 100, 250];
-                let seed = get_benchmark_seed(); // Cache seed locally for consistency across iterations
-
-                let mut group = c.benchmark_group(concat!("delaunay_triangulation_", stringify!([<$dim>]), "d"));
-
-                for &n_points in &point_counts {
-                    let throughput = n_points as u64;
-                    group.throughput(Throughput::Elements(throughput));
-
-                    group.bench_with_input(
-                        BenchmarkId::new("with_kernel", n_points),
-                        &n_points,
-                        |b, &n_points| {
-                            b.iter_batched(
-                                || {
-                                    let points: Vec<Point<f64, $dim>> = generate_random_points_seeded(n_points, (-100.0, 100.0), seed).unwrap();
-                                    points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>()
-                                },
-                                |vertices| black_box(DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &vertices).unwrap()),
-                                BatchSize::LargeInput,
-                            );
-                        },
-                    );
-                }
-
-                group.finish();
-            }
-        }
-    };
-}
-
-// Generate comprehensive benchmarks for dimensions 2-5
-generate_dimensional_benchmarks!(2);
-generate_dimensional_benchmarks!(3);
-generate_dimensional_benchmarks!(4);
-generate_dimensional_benchmarks!(5);
-
-/// Macro to generate memory usage benchmarks for all dimensions
-macro_rules! generate_memory_usage_benchmarks {
-    ($dim:literal) => {
-        pastey::paste! {
-            /// Benchmark memory allocation patterns for [<$dim>]D
-            fn [<benchmark_memory_usage_ $dim d>](c: &mut Criterion) {
-                let point_counts: &[usize] = if $dim <= 3 { &[50, 100, 200] } else { &[20, 50, 100] };
-                let seed = get_benchmark_seed(); // Cache seed locally for consistency across iterations
-
-                let mut group = c.benchmark_group(&format!("memory_usage_{}d", $dim));
-
-                for &n_points in point_counts {
-                    group.bench_with_input(
-                        BenchmarkId::new("triangulation_memory", n_points),
-                        &n_points,
-                        |b, &n_points| {
-                            b.iter(|| {
-                                // Measure complete triangulation creation and destruction
-                                let points: Vec<Point<f64, $dim>> = generate_random_points_seeded(n_points, (-100.0, 100.0), seed).unwrap();
-                                let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                                let dt = DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &vertices).unwrap();
-                                black_box((dt.tds().number_of_vertices(), dt.tds().number_of_cells()))
-                            });
-                        },
-                    );
-                }
-
-                group.finish();
-            }
-        }
-    };
-}
-
-// Generate memory usage benchmarks for dimensions 2-5
-generate_memory_usage_benchmarks!(2);
-generate_memory_usage_benchmarks!(3);
-generate_memory_usage_benchmarks!(4);
-generate_memory_usage_benchmarks!(5);
-
-/// Macro to generate validation method benchmarks for all dimensions
-macro_rules! generate_validation_benchmarks {
-    ($dim:literal) => {
-        pastey::paste! {
-            /// Benchmark validation methods performance for [<$dim>]D
-            fn [<benchmark_validation_methods_ $dim d>](c: &mut Criterion) {
-                let point_counts: &[usize] = if $dim <= 3 { &[10, 25, 50, 100] } else { &[10, 25, 50] };
-                let seed = get_benchmark_seed(); // Cache seed locally for consistency across iterations
-
-                let mut group = c.benchmark_group(&format!("validation_methods_{}d", $dim));
-
-                for &n_points in point_counts {
-                    let throughput = n_points as u64;
-                    group.throughput(Throughput::Elements(throughput));
-
-                    group.bench_with_input(
-                        BenchmarkId::new("validate", n_points),
-                        &n_points,
-                        |b, &n_points| {
-                            b.iter_batched(
-                                || {
-                                    let points: Vec<Point<f64, $dim>> = generate_random_points_seeded(n_points, (-100.0, 100.0), seed).unwrap();
-                                    let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                                    DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &vertices).unwrap()
-
-                                },
-                                |dt| {
-                                    dt.validate().unwrap();
-                                    black_box(dt);
-                                },
-                                BatchSize::LargeInput,
-                            );
-                        },
-                    );
-
-                    group.bench_with_input(
-                        BenchmarkId::new("is_valid_delaunay", n_points),
-                        &n_points,
-                        |b, &n_points| {
-                            b.iter_batched(
-                                || {
-                                    let points: Vec<Point<f64, $dim>> = generate_random_points_seeded(n_points, (-100.0, 100.0), seed).unwrap();
-                                    let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                                    DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &vertices).unwrap()
-                                },
-                                |dt| {
-                                    dt.is_valid().unwrap();
-                                    black_box(dt);
-                                },
-                                BatchSize::LargeInput,
-                            );
-                        },
-                    );
-                }
-
-                group.finish();
-            }
-
-            /// Benchmark individual validation components for [<$dim>]D
-            fn [<benchmark_validation_components_ $dim d>](c: &mut Criterion) {
-                let seed = get_benchmark_seed(); // Cache seed locally for consistency across iterations
-                let n_points = if $dim <= 3 { 50 } else { 25 }; // Fixed size for component benchmarks
-                let points: Vec<Point<f64, $dim>> = generate_random_points_seeded(n_points, (-100.0, 100.0), seed).unwrap();
-                let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                let dt = DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &vertices).unwrap();
-
-                let mut group = c.benchmark_group(&format!("validation_components_{}d", $dim));
-
-                group.bench_function("tds_is_valid", |b| {
-                    b.iter(|| {
-                        dt.tds().is_valid().unwrap();
-                        // Black box to prevent dead code elimination
-                        black_box(());
-                    });
-                });
-
-                group.bench_function("tri_is_valid", |b| {
-                    b.iter(|| {
-                        dt.as_triangulation().is_valid().unwrap();
-                        // Black box to prevent dead code elimination
-                        black_box(());
-                    });
-                });
-
-                group.bench_function("is_valid_delaunay", |b| {
-                    b.iter(|| {
-                        dt.is_valid().unwrap();
-                        // Black box to prevent dead code elimination
-                        black_box(());
-                    });
-                });
-
-                group.bench_function("validate", |b| {
-                    b.iter(|| {
-                        dt.validate().unwrap();
-                        // Black box to prevent dead code elimination
-                        black_box(());
-                    });
-                });
-
-                group.finish();
-            }
-        }
-    };
-}
-
-// Generate validation benchmarks for dimensions 2-5
-generate_validation_benchmarks!(2);
-generate_validation_benchmarks!(3);
-generate_validation_benchmarks!(4);
-generate_validation_benchmarks!(5);
-
-/// Macro to generate incremental construction benchmarks for all dimensions
-macro_rules! generate_incremental_construction_benchmarks {
-    ($dim:literal) => {
-        pastey::paste! {
-            /// Benchmark incremental vertex addition for [<$dim>]D
-            fn [<benchmark_incremental_construction_ $dim d>](c: &mut Criterion) {
-                let seed = get_benchmark_seed(); // Cache seed locally for consistency across iterations
-                let mut group = c.benchmark_group(&format!("incremental_construction_{}d", $dim));
-
-                // Generate initial simplex for the given dimension
-                let mut initial_coords = Vec::new();
-                for i in 0..=$dim {
-                    let mut coords = vec![0.0; $dim];
-                    if i < $dim {
-                        coords[i] = 1.0;
-                    }
-                    initial_coords.push(coords);
-                }
-                let initial_vertices: Vec<_> = initial_coords
-                    .into_iter()
-                    .map(|coords| {
-                        let mut array = [0.0; $dim];
-                        array.copy_from_slice(&coords);
-                        vertex!(array)
-                    })
-                    .collect();
-
-                // Test single vertex addition
-                let additional_coords = vec![0.5; $dim];
-                let mut additional_array = [0.0; $dim];
-                additional_array.copy_from_slice(&additional_coords);
-                // Note: additional_vertex is Copy, so we can use the same value in each benchmark iteration
-                let additional_vertex = vertex!(additional_array);
-
-                group.bench_function("single_vertex_addition", |b| {
-                    b.iter_batched(
-                        || DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &initial_vertices).unwrap(),
-                        |mut dt| {
-                            dt.insert(additional_vertex).unwrap();
-                            black_box(dt);
-                        },
-                        BatchSize::SmallInput,
-                    );
-                });
-
-                // Test multiple vertex additions with dimension-appropriate counts
-                let counts: &[usize] = if $dim <= 3 { &[2, 5, 10] } else { &[2, 4, 6] };
-                for &count in counts {
-                    group.bench_with_input(
-                        BenchmarkId::new("multiple_vertex_addition", count),
-                        &count,
-                        |b, &count| {
-                            b.iter_batched(
-                                || {
-                                    let dt = DelaunayTriangulation::<RobustKernel<f64>, (), (), $dim>::with_kernel(&RobustKernel::new(), &initial_vertices).unwrap();
-                                    let additional_points: Vec<Point<f64, $dim>> = generate_random_points_seeded(count, (-100.0, 100.0), seed).unwrap();
-                                    let additional_vertices: Vec<_> =
-                                        additional_points.iter().map(|p| vertex!(*p)).collect();
-                                    (dt, additional_vertices)
-                                },
-                                |(mut dt, additional_vertices)| {
-                                    for vertex in additional_vertices {
-                                        dt.insert(vertex).unwrap();
-                                    }
-                                    black_box(dt);
-                                },
-                                BatchSize::SmallInput,
-                            );
-                        },
-                    );
-                }
-
-                group.finish();
-            }
-        }
-    };
-}
-
-// Generate incremental construction benchmarks for dimensions 2-5
-generate_incremental_construction_benchmarks!(2);
-generate_incremental_construction_benchmarks!(3);
-generate_incremental_construction_benchmarks!(4);
-generate_incremental_construction_benchmarks!(5);
-
-/// Build Criterion configuration with optional environment variable overrides.
-///
-/// Supports:
-/// - `CRIT_SAMPLE_SIZE`: Number of samples per benchmark (default: Criterion's default)
-/// - `CRIT_MEASUREMENT_MS`: Measurement time in milliseconds (default: Criterion's default)
-/// - `CRIT_WARMUP_MS`: Warm-up time in milliseconds (default: Criterion's default)
-///
-/// This allows CI and local tuning without code changes.
-fn bench_config() -> Criterion {
-    use std::time::Duration;
-    init_tracing();
-    let mut c = Criterion::default();
-
-    if let Some(v) = std::env::var("CRIT_SAMPLE_SIZE")
-        .ok()
-        .and_then(|s| s.parse::<usize>().ok())
-    {
-        c = c.sample_size(v);
-    } else if std::env::var("CRIT_SAMPLE_SIZE").is_ok() {
-        bench_warn!("Failed to parse CRIT_SAMPLE_SIZE, using default");
-    }
-
-    if let Some(v) = std::env::var("CRIT_MEASUREMENT_MS")
-        .ok()
-        .and_then(|s| s.parse::<u64>().ok())
-    {
-        c = c.measurement_time(Duration::from_millis(v));
-    } else if std::env::var("CRIT_MEASUREMENT_MS").is_ok() {
-        bench_warn!("Failed to parse CRIT_MEASUREMENT_MS, using default");
-    }
-
-    if let Some(v) = std::env::var("CRIT_WARMUP_MS")
-        .ok()
-        .and_then(|s| s.parse::<u64>().ok())
-    {
-        c = c.warm_up_time(Duration::from_millis(v));
-    } else if std::env::var("CRIT_WARMUP_MS").is_ok() {
-        bench_warn!("Failed to parse CRIT_WARMUP_MS, using default");
-    }
-
-    c
-}
-
-criterion_group!(
-    name = benches;
-    config = bench_config();
-    targets =
-        // Core triangulation benchmarks (2D-5D)
-        benchmark_delaunay_triangulation_2d,
-        benchmark_delaunay_triangulation_3d,
-        benchmark_delaunay_triangulation_4d,
-        benchmark_delaunay_triangulation_5d,
-
-        // Memory usage benchmarks (2D-5D)
-        benchmark_memory_usage_2d,
-        benchmark_memory_usage_3d,
-        benchmark_memory_usage_4d,
-        benchmark_memory_usage_5d,
-
-        // Validation benchmarks (2D-5D)
-        benchmark_validation_methods_2d,
-        benchmark_validation_methods_3d,
-        benchmark_validation_methods_4d,
-        benchmark_validation_methods_5d,
-        benchmark_validation_components_2d,
-        benchmark_validation_components_3d,
-        benchmark_validation_components_4d,
-        benchmark_validation_components_5d,
-
-        // Incremental construction benchmarks (2D-5D)
-        benchmark_incremental_construction_2d,
-        benchmark_incremental_construction_3d,
-        benchmark_incremental_construction_4d,
-        benchmark_incremental_construction_5d
-);
-criterion_main!(benches);
diff --git a/benches/profiling_suite.rs b/benches/profiling_suite.rs
index 46313e3c..2a635cf5 100644
--- a/benches/profiling_suite.rs
+++ b/benches/profiling_suite.rs
@@ -9,6 +9,7 @@
 //! 4. **Query latency analysis** (circumsphere tests, neighbor queries)
 //! 5. **Multi-dimensional scaling** (2D through 5D)
 //! 6. **Algorithmic bottleneck identification** (specific operation profiling)
+//! 7. **Validation layer diagnostics** (Level 1-3 vs Level 4 cost separation)
 //!
 //! ## Usage
 //!
@@ -44,31 +45,40 @@
 //! - `PROFILING_DEV_MODE`: Set to "1", "true", "yes", or "on" for reduced scale (faster iteration)
 //! - `BENCH_MEASUREMENT_TIME`: Override measurement time in seconds (minimum: 1, guards against invalid values)
 //! - `BENCH_PERCENTILE`: Configure percentile for memory analysis (1-100, default: 95)
-//! - `BENCH_SAMPLE_SIZE`: Override Criterion sample size (default: 10)
+//! - `BENCH_SAMPLE_SIZE`: Override Criterion sample size (default: 10; values below 10 are clamped to 10, so
+//!   `BENCH_SAMPLE_SIZE=5` still runs 10 samples)
 //! - `BENCH_WARMUP_SECS`: Override Criterion warm-up time in seconds (default: 10)
 //!
 //! Example with custom configuration:
 //! ```bash
-//! BENCH_SAMPLE_SIZE=5 BENCH_WARMUP_SECS=5 BENCH_PERCENTILE=90 cargo bench --profile perf --bench profiling_suite
+//! BENCH_SAMPLE_SIZE=10 BENCH_WARMUP_SECS=5 BENCH_PERCENTILE=90 cargo bench --profile perf --bench profiling_suite
 //! ```
 
-use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use criterion::measurement::WallTime;
+use criterion::{
+    BatchSize, BenchmarkGroup, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main,
+};
 use delaunay::core::collections::SmallBuffer;
+use delaunay::geometry::traits::coordinate::Coordinate;
 use delaunay::geometry::util::{
     generate_grid_points, generate_poisson_points, generate_random_points_seeded,
     safe_usize_to_scalar,
 };
 use delaunay::prelude::query::*;
-use delaunay::prelude::triangulation::DelaunayTriangulationBuilder;
+use delaunay::prelude::triangulation::{
+    ConstructionOptions, DelaunayTriangulationBuilder, RetryPolicy,
+};
 use delaunay::vertex;
 use num_traits::cast;
-use serde::{Serialize, de::DeserializeOwned};
+use std::env;
 use std::hint::black_box;
+use std::num::NonZeroUsize;
+use std::sync::Once;
 use std::time::{Duration, Instant};
 
 #[cfg(feature = "bench-logging")]
 fn init_tracing() {
-    static INIT: std::sync::Once = std::sync::Once::new();
+    static INIT: Once = Once::new();
     INIT.call_once(|| {
         let filter = tracing_subscriber::EnvFilter::try_from_default_env()
             .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
@@ -79,18 +89,8 @@ fn init_tracing() {
 #[cfg(not(feature = "bench-logging"))]
 const fn init_tracing() {}
 
-#[cfg(not(feature = "count-allocations"))]
-macro_rules! bench_warn {
-    ($($arg:tt)*) => {{
-        #[cfg(feature = "bench-logging")]
-        {
-            init_tracing();
-            tracing::warn!($($arg)*);
-        }
-    }};
-}
-
 // SmallBuffer size constants for different use cases
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
 const BENCHMARK_ITERATION_BUFFER_SIZE: usize = 8; // For tracking allocation info across benchmark iterations
 const SIMPLEX_VERTICES_BUFFER_SIZE: usize = 4; // 3D simplex = 4 vertices
 const QUERY_RESULTS_BUFFER_SIZE: usize = 1024; // For bounded query result collections (max 1000 in code)
@@ -99,6 +99,7 @@ const QUERY_RESULTS_BUFFER_SIZE: usize = 1024; // For bounded query result colle
 const DEFAULT_SEED: u64 = 42;
 const QUERY_SEED: u64 = 123;
 const MAX_QUERY_RESULTS: usize = 1_000;
+const VALIDATION_SEED_SEARCH_LIMIT: u64 = 64;
 
 // Memory allocation counting support
 #[cfg(feature = "count-allocations")]
@@ -106,27 +107,20 @@ use allocation_counter::{AllocationInfo, measure};
 
 #[cfg(not(feature = "count-allocations"))]
 #[derive(Debug, Default)]
-struct AllocationInfo {
-    count_total: u64,
-    count_current: i64,
-    count_max: u64,
-    bytes_total: u64,
-    bytes_current: i64,
-    bytes_max: u64,
-}
+struct AllocationInfo;
 
 #[cfg(not(feature = "count-allocations"))]
-fn measure<F: FnOnce()>(f: F) -> AllocationInfo {
+fn measure(f: impl FnOnce()) -> AllocationInfo {
     f();
-    AllocationInfo::default()
+    AllocationInfo
 }
 
 #[cfg(not(feature = "count-allocations"))]
-fn print_count_allocations_banner_once() {
-    use std::sync::Once;
+fn print_alloc_banner_once() {
     static ONCE: Once = Once::new();
     ONCE.call_once(|| {
-        bench_warn!("count-allocations feature not enabled; memory stats are placeholders.");
+        #[cfg(feature = "bench-logging")]
+        println!("allocation stats unavailable: count-allocations feature disabled");
     });
 }
 
@@ -153,7 +147,7 @@ const PROFILING_COUNTS_DEVELOPMENT: &[usize] = &[
 /// Returns true for: "1", "true", "TRUE", "yes", "on" (case-insensitive)
 /// Returns false for anything else (including "0", "false", empty, or unset)
 fn is_dev_mode() -> bool {
-    let dev = std::env::var("PROFILING_DEV_MODE").ok();
+    let dev = env::var("PROFILING_DEV_MODE").ok();
     dev.as_deref().is_some_and(|s| {
         s == "1"
             || s.eq_ignore_ascii_case("true")
@@ -174,7 +168,7 @@ fn get_profiling_counts() -> &'static [usize] {
 /// Helper function to parse benchmark measurement time from environment
 /// Guards against zero/invalid values by ensuring minimum of 1 second
 fn bench_time(default_secs: u64) -> Duration {
-    let secs = std::env::var("BENCH_MEASUREMENT_TIME")
+    let secs = env::var("BENCH_MEASUREMENT_TIME")
         .ok()
         .and_then(|s| s.parse::<u64>().ok())
         .map_or_else(|| default_secs.max(1), |parsed| parsed.max(1));
@@ -187,6 +181,7 @@ enum PointDistribution {
     Random,
     Grid,
     PoissonDisk,
+    Adversarial,
 }
 
 impl PointDistribution {
@@ -195,12 +190,13 @@ impl PointDistribution {
             Self::Random => "random",
             Self::Grid => "grid",
             Self::PoissonDisk => "poisson",
+            Self::Adversarial => "adversarial",
         }
     }
 }
 
 /// Generate points according to the specified distribution
-fn generate_points_by_distribution<const D: usize>(
+fn gen_points<const D: usize>(
     count: usize,
     distribution: PointDistribution,
     seed: u64,
@@ -208,6 +204,28 @@ fn generate_points_by_distribution<const D: usize>(
     match distribution {
         PointDistribution::Random => generate_random_points_seeded(count, (-100.0, 100.0), seed)
             .expect("random point generation failed"),
+        PointDistribution::Adversarial => generate_random_points_seeded::<f64, D>(
+            count,
+            (-1.0, 1.0),
+            seed ^ 0xA5A5_A5A5_A5A5_A5A5,
+        )
+        .expect("adversarial base point generation failed")
+        .iter()
+        .enumerate()
+        .map(|(index, point)| {
+            let index = u32::try_from(index).expect("benchmark point index should fit in u32");
+            let mut coords = [0.0_f64; D];
+            for (axis, coord) in coords.iter_mut().enumerate() {
+                let axis_number = u32::try_from(axis + 1).expect("axis should fit in u32");
+                let base: f64 = point.coords()[axis];
+                let cluster_offset = f64::from(index % 7) * 1.0e-3;
+                let axis_offset = f64::from(axis_number) * 0.25;
+                let perturbation = f64::from((index + axis_number) % 11) * 1.0e-6;
+                *coord = base.mul_add(1.0e3, 1.0e9 + axis_offset + cluster_offset + perturbation);
+            }
+            Point::new(coords)
+        })
+        .collect(),
         PointDistribution::Grid => {
             // Calculate points per dimension to get approximately `count` points total
             let count_f64 = safe_usize_to_scalar::<f64>(count).unwrap_or(2.0);
@@ -253,7 +271,7 @@ fn generate_points_by_distribution<const D: usize>(
 
 /// Comprehensive triangulation scaling analysis across dimensions and distributions
 #[expect(clippy::significant_drop_tightening, clippy::too_many_lines)]
-fn benchmark_triangulation_scaling(c: &mut Criterion) {
+fn bench_scaling(c: &mut Criterion) {
     let counts = get_profiling_counts();
     let distributions = [
         PointDistribution::Random,
@@ -268,8 +286,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
     for &count in counts {
         for &distribution in &distributions {
             // Pre-generate sample points to calculate actual count and avoid double-generation
-            let sample_points =
-                generate_points_by_distribution::<2>(count, distribution, DEFAULT_SEED);
+            let sample_points = gen_points::<2>(count, distribution, DEFAULT_SEED);
             let actual_count = sample_points.len();
             group.throughput(Throughput::Elements(actual_count as u64));
 
@@ -281,11 +298,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
                     b.iter_batched(
                         || {
                             // Reuse same generation logic to ensure consistent point count
-                            let points = generate_points_by_distribution::<2>(
-                                count,
-                                distribution,
-                                DEFAULT_SEED,
-                            );
+                            let points = gen_points::<2>(count, distribution, DEFAULT_SEED);
                             points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>()
                         },
                         |vertices| {
@@ -320,8 +333,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
             }
 
             // Pre-generate sample points to calculate actual count and avoid double-generation
-            let sample_points =
-                generate_points_by_distribution::<3>(count, distribution, DEFAULT_SEED);
+            let sample_points = gen_points::<3>(count, distribution, DEFAULT_SEED);
             let actual_count = sample_points.len();
             group.throughput(Throughput::Elements(actual_count as u64));
 
@@ -332,11 +344,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
                 |b, &(count, distribution, _actual_count)| {
                     b.iter_batched(
                         || {
-                            let points = generate_points_by_distribution::<3>(
-                                count,
-                                distribution,
-                                DEFAULT_SEED,
-                            );
+                            let points = gen_points::<3>(count, distribution, DEFAULT_SEED);
                             points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>()
                         },
                         |vertices| {
@@ -373,8 +381,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
     {
         for &distribution in &distributions {
             // Pre-generate sample points to calculate actual count and avoid double-generation
-            let sample_points =
-                generate_points_by_distribution::<4>(count, distribution, DEFAULT_SEED);
+            let sample_points = gen_points::<4>(count, distribution, DEFAULT_SEED);
             let actual_count = sample_points.len();
             group.throughput(Throughput::Elements(actual_count as u64));
 
@@ -385,11 +392,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
                 |b, &(count, distribution, _actual_count)| {
                     b.iter_batched(
                         || {
-                            let points = generate_points_by_distribution::<4>(
-                                count,
-                                distribution,
-                                DEFAULT_SEED,
-                            );
+                            let points = gen_points::<4>(count, distribution, DEFAULT_SEED);
                             points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>()
                         },
                         |vertices| {
@@ -425,8 +428,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
     {
         for &distribution in &distributions {
             // Pre-generate sample points to calculate actual count and avoid double-generation
-            let sample_points =
-                generate_points_by_distribution::<5>(count, distribution, DEFAULT_SEED);
+            let sample_points = gen_points::<5>(count, distribution, DEFAULT_SEED);
             let actual_count = sample_points.len();
             group.throughput(Throughput::Elements(actual_count as u64));
 
@@ -437,11 +439,7 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
                 |b, &(count, distribution, _actual_count)| {
                     b.iter_batched(
                         || {
-                            let points = generate_points_by_distribution::<5>(
-                                count,
-                                distribution,
-                                DEFAULT_SEED,
-                            );
+                            let points = gen_points::<5>(count, distribution, DEFAULT_SEED);
                             points.iter().map(|p| vertex!(*p)).collect::<Vec<_>>()
                         },
                         |vertices| {
@@ -464,19 +462,37 @@ fn benchmark_triangulation_scaling(c: &mut Criterion) {
 // Memory Usage Profiling
 // ============================================================================
 
-/// Calculate percentile from a slice of values using nearest-rank method
-/// Supports configurable percentile via environment variable `BENCH_PERCENTILE` (default: 95)
-fn calculate_percentile(values: &mut [u64]) -> u64 {
+/// Read the memory summary percentile from `BENCH_PERCENTILE` (default: 95).
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+fn configured_percentile() -> usize {
+    env::var("BENCH_PERCENTILE")
+        .ok()
+        .and_then(|s| s.parse::<usize>().ok())
+        .map_or(95, |p| p.clamp(1, 100))
+}
+
+/// Format a percentile as an ordinal label for the memory summary.
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+fn percentile_label(percentile: usize) -> String {
+    let suffix = match percentile % 100 {
+        11..=13 => "th",
+        _ => match percentile % 10 {
+            1 => "st",
+            2 => "nd",
+            3 => "rd",
+            _ => "th",
+        },
+    };
+    format!("{percentile}{suffix}")
+}
+
+/// Calculate percentile from a slice of values using nearest-rank method.
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+fn calculate_percentile(values: &mut [u64], percentile: usize) -> u64 {
     if values.is_empty() {
         return 0;
     }
 
-    // Parse percentile from environment, defaulting to 95
-    let percentile = std::env::var("BENCH_PERCENTILE")
-        .ok()
-        .and_then(|s| s.parse::<usize>().ok())
-        .map_or(95, |p| p.clamp(1, 100)); // Clamp to valid percentile range
-
     values.sort_unstable();
     let n = values.len();
     // nearest-rank: ceil(p/100 * n), clamped to [1, n]
@@ -488,12 +504,14 @@ fn calculate_percentile(values: &mut [u64]) -> u64 {
 }
 
 /// Print memory allocation summary
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
 #[expect(clippy::cast_precision_loss)]
 fn print_alloc_summary(
     info: &AllocationInfo,
     description: &str,
     actual_point_count: usize,
-    percentile_95: u64,
+    percentile: usize,
+    percentile_value: u64,
 ) {
     println!("\n=== Memory Allocation Summary for {description} ({actual_point_count} points) ===");
     println!("Total allocations: {}", info.count_total);
@@ -507,9 +525,10 @@ fn print_alloc_summary(
         info.bytes_max as f64 / (1024.0 * 1024.0)
     );
     println!(
-        "95th percentile bytes: {} ({:.2} MB)",
-        percentile_95,
-        percentile_95 as f64 / (1024.0 * 1024.0)
+        "{} percentile bytes: {} ({:.2} MB)",
+        percentile_label(percentile),
+        percentile_value,
+        percentile_value as f64 / (1024.0 * 1024.0)
     );
     if actual_point_count > 0 {
         println!(
@@ -522,109 +541,121 @@ fn print_alloc_summary(
     println!("=====================================\n");
 }
 
-/// Generic helper to benchmark memory usage for a specific dimension D
+#[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
 #[expect(clippy::cast_possible_wrap)]
+fn print_alloc_summary_from_samples<const D: usize>(
+    allocation_infos: &SmallBuffer<AllocationInfo, BENCHMARK_ITERATION_BUFFER_SIZE>,
+    actual_point_counts: &SmallBuffer<usize, BENCHMARK_ITERATION_BUFFER_SIZE>,
+) {
+    if allocation_infos.is_empty() {
+        return;
+    }
+
+    // Safe cast for division: Criterion sample buffers here are small and non-empty.
+    let divisor_unsigned = allocation_infos.len() as u64;
+    let divisor_signed = allocation_infos.len() as i64;
+    let avg_info = AllocationInfo {
+        count_total: allocation_infos.iter().map(|i| i.count_total).sum::<u64>() / divisor_unsigned,
+        count_current: allocation_infos
+            .iter()
+            .map(|i| i.count_current)
+            .sum::<i64>()
+            / divisor_signed,
+        count_max: allocation_infos
+            .iter()
+            .map(|i| i.count_max)
+            .max()
+            .unwrap_or(0),
+        bytes_total: allocation_infos.iter().map(|i| i.bytes_total).sum::<u64>() / divisor_unsigned,
+        bytes_current: allocation_infos
+            .iter()
+            .map(|i| i.bytes_current)
+            .sum::<i64>()
+            / divisor_signed,
+        bytes_max: allocation_infos
+            .iter()
+            .map(|i| i.bytes_max)
+            .max()
+            .unwrap_or(0),
+    };
+    let avg_actual_count = if actual_point_counts.is_empty() {
+        0
+    } else {
+        actual_point_counts.iter().sum::<usize>() / actual_point_counts.len()
+    };
+
+    let mut bytes_max_values: Vec<u64> = allocation_infos.iter().map(|i| i.bytes_max).collect();
+    let percentile = configured_percentile();
+    let percentile_value = calculate_percentile(&mut bytes_max_values, percentile);
+
+    print_alloc_summary(
+        &avg_info,
+        &format!("{D}D Triangulation"),
+        avg_actual_count,
+        percentile,
+        percentile_value,
+    );
+}
+
+/// Generic helper to benchmark memory usage for a specific dimension D
 fn bench_memory_usage<const D: usize>(
-    group: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
+    group: &mut BenchmarkGroup<'_, WallTime>,
     bench_id_prefix: &str,
     count: usize,
-) where
-    [f64; D]: Copy + DeserializeOwned + Serialize + Sized,
-{
+) {
+    #[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+    let mut allocation_infos: SmallBuffer<AllocationInfo, BENCHMARK_ITERATION_BUFFER_SIZE> =
+        SmallBuffer::new();
+
+    #[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+    let mut actual_point_counts: SmallBuffer<usize, BENCHMARK_ITERATION_BUFFER_SIZE> =
+        SmallBuffer::new();
+
     group.bench_with_input(
         BenchmarkId::new(bench_id_prefix, count),
         &count,
         |b, &count| {
             b.iter_custom(|iters| {
                 let mut total_time = Duration::new(0, 0);
-                let mut allocation_infos: SmallBuffer<
-                    AllocationInfo,
-                    BENCHMARK_ITERATION_BUFFER_SIZE,
-                > = SmallBuffer::new();
-
-                let mut actual_point_counts: SmallBuffer<usize, BENCHMARK_ITERATION_BUFFER_SIZE> =
-                    SmallBuffer::new();
 
                 for _ in 0..iters {
+                    let points = gen_points::<D>(count, PointDistribution::Random, DEFAULT_SEED);
+                    #[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+                    let pts_len = points.len();
+                    let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
                     let start_time = Instant::now();
 
                     let alloc_info = measure(|| {
-                        let points = generate_points_by_distribution::<D>(
-                            count,
-                            PointDistribution::Random,
-                            DEFAULT_SEED,
-                        );
-                        let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                        actual_point_counts.push(points.len()); // Track actual count
                         if let Ok(dt) = DelaunayTriangulationBuilder::new(&vertices).build::<()>() {
                             black_box(dt);
                         }
                     });
 
                     total_time += start_time.elapsed();
-                    allocation_infos.push(alloc_info);
-                }
 
-                // Report memory usage summary if available
-                if !allocation_infos.is_empty() {
-                    // Safe cast for division - allocation_infos.len() is guaranteed to be small and non-zero
-                    let divisor_unsigned = allocation_infos.len() as u64;
-                    let divisor_signed = allocation_infos.len() as i64;
-                    let avg_info = AllocationInfo {
-                        count_total: allocation_infos.iter().map(|i| i.count_total).sum::<u64>()
-                            / divisor_unsigned,
-                        count_current: allocation_infos
-                            .iter()
-                            .map(|i| i.count_current)
-                            .sum::<i64>()
-                            / divisor_signed,
-                        count_max: allocation_infos
-                            .iter()
-                            .map(|i| i.count_max)
-                            .max()
-                            .unwrap_or(0),
-                        bytes_total: allocation_infos.iter().map(|i| i.bytes_total).sum::<u64>()
-                            / divisor_unsigned,
-                        bytes_current: allocation_infos
-                            .iter()
-                            .map(|i| i.bytes_current)
-                            .sum::<i64>()
-                            / divisor_signed,
-                        bytes_max: allocation_infos
-                            .iter()
-                            .map(|i| i.bytes_max)
-                            .max()
-                            .unwrap_or(0),
-                    };
-                    let avg_actual_count = if actual_point_counts.is_empty() {
-                        0
-                    } else {
-                        actual_point_counts.iter().sum::<usize>() / actual_point_counts.len()
-                    };
-
-                    // Calculate percentile of bytes_max (configurable via BENCH_PERCENTILE, default 95th)
-                    let mut bytes_max_values: Vec<u64> =
-                        allocation_infos.iter().map(|i| i.bytes_max).collect();
-                    let percentile_value = calculate_percentile(&mut bytes_max_values);
+                    #[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+                    {
+                        allocation_infos.push(alloc_info);
+                        actual_point_counts.push(pts_len);
+                    }
 
-                    print_alloc_summary(
-                        &avg_info,
-                        &format!("{D}D Triangulation"),
-                        avg_actual_count,
-                        percentile_value,
-                    );
+                    #[cfg(not(all(feature = "count-allocations", feature = "bench-logging")))]
+                    let _ = alloc_info;
                 }
 
                 total_time
             });
         },
     );
+
+    #[cfg(all(feature = "count-allocations", feature = "bench-logging"))]
+    print_alloc_summary_from_samples::<D>(&allocation_infos, &actual_point_counts);
 }
 
 /// Memory usage profiling across different scales and dimensions using allocation counter
 fn benchmark_memory_profiling(c: &mut Criterion) {
     #[cfg(not(feature = "count-allocations"))]
-    print_count_allocations_banner_once();
+    print_alloc_banner_once();
 
     let counts = if is_dev_mode() {
         &[1_000, 10_000][..]
@@ -682,11 +713,7 @@ fn benchmark_query_latency(c: &mut Criterion) {
             &count,
             |b, &count| {
                 // Setup: Create triangulation and query points
-                let points = generate_points_by_distribution::<3>(
-                    count,
-                    PointDistribution::Random,
-                    DEFAULT_SEED,
-                );
+                let points = gen_points::<3>(count, PointDistribution::Random, DEFAULT_SEED);
                 let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
                 let Ok(dt) = DelaunayTriangulationBuilder::new(&vertices).build::<()>() else {
                     // Construction hit a geometric degeneracy; skip this benchmark entry
@@ -696,11 +723,7 @@ fn benchmark_query_latency(c: &mut Criterion) {
                 let tds = dt.tds();
 
                 // Generate query points
-                let query_points = generate_points_by_distribution::<3>(
-                    100,
-                    PointDistribution::Random,
-                    QUERY_SEED,
-                );
+                let query_points = gen_points::<3>(100, PointDistribution::Random, QUERY_SEED);
 
                 // Precompute all valid simplex vertices outside the benchmark loop
                 let mut precomputed_simplices: Vec<
@@ -742,11 +765,8 @@ fn benchmark_query_latency(c: &mut Criterion) {
                             let query_point_obj = *query_point;
 
                             // Use the fastest circumsphere method (based on benchmark results)
-                            {
-                                use delaunay::geometry::predicates::insphere_lifted;
-                                let result = insphere_lifted(points_for_test, query_point_obj);
-                                query_results.push(result);
-                            }
+                            let result = insphere_lifted(points_for_test, query_point_obj);
+                            query_results.push(result);
 
                             // Limit total queries to prevent extremely long benchmarks
                             if query_results.len() >= MAX_QUERY_RESULTS {
@@ -768,12 +788,112 @@ fn benchmark_query_latency(c: &mut Criterion) {
     group.finish();
 }
 
+// ============================================================================
+// Validation Layer Diagnostics
+// ============================================================================
+
+macro_rules! benchmark_validation_components_dimension {
+    ($dim:literal, $func_name:ident, $count:expr) => {
+        fn $func_name(c: &mut Criterion) {
+            let is_adversarial = stringify!($func_name).ends_with("_adversarial");
+            let distribution = if is_adversarial {
+                PointDistribution::Adversarial
+            } else {
+                PointDistribution::Random
+            };
+            let suffix = if is_adversarial { "_adversarial" } else { "" };
+            let mut last_error = None;
+            let dt = (0..VALIDATION_SEED_SEARCH_LIMIT)
+                .find_map(|offset| {
+                    let seed = DEFAULT_SEED.wrapping_add(offset);
+                    let points = gen_points::<$dim>($count, distribution, seed);
+                    let vertices: Vec<_> = points.iter().map(|point| vertex!(*point)).collect();
+                    let builder = DelaunayTriangulationBuilder::new(&vertices);
+                    let builder = if is_adversarial {
+                        let attempts =
+                            NonZeroUsize::new(8).expect("retry attempts must be non-zero");
+                        builder.construction_options(
+                            ConstructionOptions::default().with_retry_policy(
+                                RetryPolicy::Shuffled {
+                                    attempts,
+                                    base_seed: Some(seed),
+                                },
+                            ),
+                        )
+                    } else {
+                        builder
+                    };
+
+                    match builder.build::<()>() {
+                        Ok(dt) => Some(dt),
+                        Err(err) => {
+                            last_error = Some(format!("{err}"));
+                            None
+                        }
+                    }
+                })
+                .unwrap_or_else(|| {
+                    panic!(
+                        "failed to build {}D validation component benchmark triangulation \
+                         after {} seeds (last error: {})",
+                        $dim,
+                        VALIDATION_SEED_SEARCH_LIMIT,
+                        last_error.unwrap_or_else(|| "none".to_string())
+                    );
+                });
+
+            let mut group = c.benchmark_group(format!("validation_components_{}d{}", $dim, suffix));
+            group.measurement_time(bench_time(15));
+            group.throughput(Throughput::Elements($count as u64));
+
+            group.bench_function("tds_is_valid", |b| {
+                b.iter(|| {
+                    black_box(dt.tds().is_valid())
+                        .expect("TDS validation should pass for benchmark triangulation");
+                });
+            });
+
+            group.bench_function("tri_is_valid", |b| {
+                b.iter(|| {
+                    black_box(dt.as_triangulation().is_valid())
+                        .expect("triangulation validation should pass for benchmark triangulation");
+                });
+            });
+
+            group.bench_function("is_valid_delaunay", |b| {
+                b.iter(|| {
+                    black_box(dt.is_valid())
+                        .expect("Delaunay validation should pass for benchmark triangulation");
+                });
+            });
+
+            group.bench_function("validate", |b| {
+                b.iter(|| {
+                    black_box(dt.validate())
+                        .expect("full validation should pass for benchmark triangulation");
+                });
+            });
+
+            group.finish();
+        }
+    };
+}
+
+benchmark_validation_components_dimension!(2, benchmark_validation_components_2d, 50);
+benchmark_validation_components_dimension!(3, benchmark_validation_components_3d, 50);
+benchmark_validation_components_dimension!(4, benchmark_validation_components_4d, 25);
+benchmark_validation_components_dimension!(5, benchmark_validation_components_5d, 25);
+benchmark_validation_components_dimension!(2, benchmark_validation_components_2d_adversarial, 50);
+benchmark_validation_components_dimension!(3, benchmark_validation_components_3d_adversarial, 50);
+benchmark_validation_components_dimension!(4, benchmark_validation_components_4d_adversarial, 25);
+benchmark_validation_components_dimension!(5, benchmark_validation_components_5d_adversarial, 25);
+
 // ============================================================================
 // Algorithmic Bottleneck Identification
 // ============================================================================
 
 /// Profile specific algorithmic components to identify bottlenecks
-fn benchmark_algorithmic_bottlenecks(c: &mut Criterion) {
+fn bench_bottlenecks(c: &mut Criterion) {
     let counts = if is_dev_mode() {
         &[3_000][..]
     } else {
@@ -791,11 +911,8 @@ fn benchmark_algorithmic_bottlenecks(c: &mut Criterion) {
             |b, &count| {
                 b.iter_batched(
                     || {
-                        let points = generate_points_by_distribution::<3>(
-                            count,
-                            PointDistribution::Random,
-                            DEFAULT_SEED,
-                        );
+                        let points =
+                            gen_points::<3>(count, PointDistribution::Random, DEFAULT_SEED);
                         let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
                         DelaunayTriangulationBuilder::new(&vertices)
                             .build::<()>()
@@ -820,17 +937,17 @@ fn benchmark_algorithmic_bottlenecks(c: &mut Criterion) {
             |b, &count| {
                 b.iter_batched(
                     || {
-                        let points = generate_points_by_distribution::<3>(
-                            count,
-                            PointDistribution::Random,
-                            DEFAULT_SEED,
-                        );
+                        let points =
+                            gen_points::<3>(count, PointDistribution::Random, DEFAULT_SEED);
                         let vertices: Vec<_> = points.iter().map(|p| vertex!(*p)).collect();
-                        DelaunayTriangulationBuilder::new(&vertices).build::<()>().ok()
+                        DelaunayTriangulationBuilder::new(&vertices)
+                            .build::<()>()
+                            .ok()
                     },
                     |dt| {
                         if let Some(dt) = dt {
-                            let hull = delaunay::geometry::algorithms::convex_hull::ConvexHull::from_triangulation(dt.as_triangulation()).unwrap();
+                            let hull =
+                                ConvexHull::from_triangulation(dt.as_triangulation()).unwrap();
                             black_box(hull);
                         }
                     },
@@ -852,11 +969,11 @@ criterion_group!(
     config = {
         init_tracing();
         // Allow configuration via environment variables for CI stability
-        let sample_size = std::env::var("BENCH_SAMPLE_SIZE")
+        let sample_size = env::var("BENCH_SAMPLE_SIZE")
             .ok()
             .and_then(|v| v.parse().ok())
-            .unwrap_or(10);
-        let warm_up_secs = std::env::var("BENCH_WARMUP_SECS")
+            .map_or(10, |size: usize| size.max(10));
+        let warm_up_secs = env::var("BENCH_WARMUP_SECS")
             .ok()
             .and_then(|v| v.parse().ok())
             .unwrap_or(10);
@@ -867,10 +984,18 @@ criterion_group!(
             .measurement_time(bench_time(60))
     };
     targets =
-        benchmark_triangulation_scaling,
+        bench_scaling,
         benchmark_memory_profiling,
         benchmark_query_latency,
-        benchmark_algorithmic_bottlenecks
+        benchmark_validation_components_2d,
+        benchmark_validation_components_3d,
+        benchmark_validation_components_4d,
+        benchmark_validation_components_5d,
+        benchmark_validation_components_2d_adversarial,
+        benchmark_validation_components_3d_adversarial,
+        benchmark_validation_components_4d_adversarial,
+        benchmark_validation_components_5d_adversarial,
+        bench_bottlenecks
 );
 
 criterion_main!(profiling_benches);
diff --git a/docs/code_organization.md b/docs/code_organization.md
index e6a6bcd1..fb577a99 100644
--- a/docs/code_organization.md
+++ b/docs/code_organization.md
@@ -65,7 +65,6 @@ delaunay/
 │   ├── ci_performance_suite.rs
 │   ├── circumsphere_containment.rs
 │   ├── large_scale_performance.rs
-│   ├── microbenchmarks.rs
 │   ├── profiling_suite.rs
 │   └── topology_guarantee_construction.rs
 ├── docs/
diff --git a/docs/dev/commands.md b/docs/dev/commands.md
index d6adb4ac..80f9503e 100644
--- a/docs/dev/commands.md
+++ b/docs/dev/commands.md
@@ -103,6 +103,15 @@ just check
 `just check` is the non-mutating lint/validator bundle. It does not run tests,
 examples, or benchmarks.
 
+`just check` runs the default DenseSlotMap backend checks and an
+`--all-features` pass. The justfile runs Clippy for the default feature set and
+for `--all-features`; the legacy SlotMap backend is kept as an optional
+compatibility canary. Run it explicitly with:
+
+```bash
+just check-storage-backends
+```
+
 ---
 
 ## Documentation Validation
@@ -301,7 +310,8 @@ CI enforces:
 - tests
 
 Rust warnings are denied by the manifest lint policy and Clippy warnings are
-denied by the `just clippy` invocations. Keep any intentional warning-level
+denied by the `just clippy` invocations. `just check-storage-backends` separately
+checks the SlotMap backend with `--no-default-features`. Keep any intentional warning-level
 exceptions explicit in `Cargo.toml`.
 
 Agents must ensure changes pass CI locally before proposing patches.
diff --git a/docs/dev/python.md b/docs/dev/python.md
new file mode 100644
index 00000000..7b803142
--- /dev/null
+++ b/docs/dev/python.md
@@ -0,0 +1,86 @@
+# Python Development Guidelines
+
+Guidance for Python automation under `scripts/`.
+
+The Rust library is the primary product, but the Python benchmark, changelog,
+hardware, and release utilities are part of the trusted development workflow.
+Keep them typed and predictable so failures are visible in CI instead of being
+hidden behind loose mocks or broad exception handling.
+
+---
+
+## Validation
+
+Run the Python validators through the repository toolchain:
+
+```bash
+uv run ruff check scripts/
+uv run ty check scripts/ --error all
+uv run pytest scripts/tests
+```
+
+`ty check scripts/ --error all` is the type-checking authority. Prefer reducing
+untyped surfaces in code and tests over adding more `ty` configuration.
+
+`just check` also runs Python formatting checks, Ruff, and `ty` as part of the
+normal repository validation bundle.
+
+---
+
+## Typing
+
+- Add return annotations to functions and methods.
+- Prefer concrete standard-library types over `Any`, `dict`, or bare `Mock`
+  when the shape is known.
+- Keep helper signatures precise enough that `ty` can validate the call sites.
+- Avoid growing type-checker configuration unless a demonstrated false positive
+  cannot be solved cleanly in code.
+
+---
+
+## Subprocess Mocks
+
+When mocking command wrappers such as `run_git_command()`,
+`run_cargo_command()`, or `run_safe_command()`, prefer real typed subprocess
+results:
+
+```python
+import subprocess
+
+
+def completed_process(stdout: str = "", *, returncode: int = 0) -> subprocess.CompletedProcess[str]:
+    """Return a typed subprocess result for command-wrapper mocks."""
+    return subprocess.CompletedProcess(args=[], returncode=returncode, stdout=stdout, stderr="")
+```
+
+Use that helper instead of ad-hoc mocks such as:
+
+```python
+mock_result = Mock()
+mock_result.stdout = "..."
+mock_result.returncode = 0
+```
+
+Structured results make tests closer to production behavior and give `ty` real
+attributes to check.
+
+---
+
+## Exceptions
+
+- Catch specific recoverable error families in production code. Avoid
+  `except Exception`.
+- In tests, raise concrete exceptions that match the production recovery path
+  (`OSError`, `RuntimeError`, `subprocess.CalledProcessError`,
+  `subprocess.TimeoutExpired`, etc.).
+- Do not use raw `Exception` in mocks just to force a fallback branch; doing so
+  weakens the contract that the production code is meant to enforce.
+
+---
+
+## Test Helpers
+
+Put reusable typed test helpers near the top of the test module or in
+`scripts/tests/conftest.py` when they are shared. Prefer one helper that returns
+the real structured type over repeating partially configured mocks throughout a
+file.
diff --git a/examples/convex_hull_3d_100_points.rs b/examples/convex_hull_3d_100_points.rs
index b924a805..76153eac 100644
--- a/examples/convex_hull_3d_100_points.rs
+++ b/examples/convex_hull_3d_100_points.rs
@@ -28,10 +28,14 @@
 //! - Validation results
 //! - Performance metrics
 
-use delaunay::geometry::util::generate_random_triangulation;
+use delaunay::prelude::generators::generate_random_triangulation;
 use delaunay::prelude::query::*;
+use delaunay::prelude::triangulation::flips::CellKey;
 use num_traits::cast::cast;
-use std::time::Instant;
+use std::cmp;
+use std::env;
+use std::mem;
+use std::time::{Duration, Instant};
 
 const SEED_CANDIDATES: &[u64] = &[1, 7, 11, 42, 99, 123, 666];
 
@@ -44,15 +48,12 @@ fn main() {
     // Use a fixed seed + bounds so that `just examples` is reproducible and robust.
     let n_points = 100;
     let bounds = (-3.0, 3.0);
-    let seed_override: Option<u64> =
-        std::env::var("DELAUNAY_EXAMPLE_SEED")
-            .ok()
-            .and_then(|value| {
-                value.parse().ok().or_else(|| {
-                    eprintln!("Invalid DELAUNAY_EXAMPLE_SEED={value:?}; using default seed list.");
-                    None
-                })
-            });
+    let seed_override: Option<u64> = env::var("DELAUNAY_EXAMPLE_SEED").ok().and_then(|value| {
+        value.parse().ok().or_else(|| {
+            eprintln!("Invalid DELAUNAY_EXAMPLE_SEED={value:?}; using default seed list.");
+            None
+        })
+    });
     let seed_candidates: Vec<u64> =
         seed_override.map_or_else(|| SEED_CANDIDATES.to_vec(), |seed| vec![seed]);
 
@@ -116,7 +117,7 @@ fn main() {
     analyze_triangulation(&dt);
 
     // Extract and analyze convex hull
-    extract_and_analyze_convex_hull(&dt);
+    analyze_hull(&dt);
 
     // Test point containment
     test_point_containment(&dt);
@@ -158,7 +159,7 @@ fn analyze_triangulation(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (),
 }
 
 /// Extract and analyze the convex hull from the triangulation
-fn extract_and_analyze_convex_hull(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (), 3>) {
+fn analyze_hull(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (), 3>) {
     println!("Convex Hull Extraction:");
     println!("=======================");
 
@@ -200,7 +201,7 @@ fn extract_and_analyze_convex_hull(dt: &DelaunayTriangulation<AdaptiveKernel<f64
     if hull.number_of_facets() > 0 {
         println!("\n  Facet Analysis:");
         let facets: Vec<_> = hull.facets().collect();
-        let sample_size = std::cmp::min(5, facets.len());
+        let sample_size = cmp::min(5, facets.len());
 
         for (i, facet_handle) in facets.iter().take(sample_size).enumerate() {
             // Create FacetView to access facet properties
@@ -260,30 +261,30 @@ fn test_point_containment(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), ()
     }
 
     let centroid_point = Point::new(centroid);
-    test_point_containment_single(&hull, &centroid_point, "Centroid", dt);
+    test_contains_point(&hull, &centroid_point, "Centroid", dt);
 
     // Test slightly offset from centroid (should still be inside)
     let near_centroid = Point::new([centroid[0] + 0.1, centroid[1] + 0.1, centroid[2] + 0.1]);
-    test_point_containment_single(&hull, &near_centroid, "Near centroid", dt);
+    test_contains_point(&hull, &near_centroid, "Near centroid", dt);
 
     // Test 2: Points clearly outside the convex hull
     println!("\n  Testing exterior points:");
 
     let far_point = Point::new([50.0, 50.0, 50.0]);
-    test_point_containment_single(&hull, &far_point, "Far exterior", dt);
+    test_contains_point(&hull, &far_point, "Far exterior", dt);
 
     let axis_point = Point::new([20.0, 0.0, 0.0]);
-    test_point_containment_single(&hull, &axis_point, "X-axis exterior", dt);
+    test_contains_point(&hull, &axis_point, "X-axis exterior", dt);
 
     let negative_point = Point::new([-20.0, -20.0, -20.0]);
-    test_point_containment_single(&hull, &negative_point, "Negative exterior", dt);
+    test_contains_point(&hull, &negative_point, "Negative exterior", dt);
 
     // Test 3: Sample triangulation vertices (should be on boundary or inside)
     println!("\n  Testing triangulation vertices:");
-    let sample_vertices = std::cmp::min(3, vertex_count);
+    let sample_vertices = cmp::min(3, vertex_count);
     for (i, (_, vertex)) in dt.tds().vertices().enumerate().take(sample_vertices) {
         let point: Point<f64, 3> = vertex.into();
-        test_point_containment_single(
+        test_contains_point(
             &hull,
             &point,
             &format!("Triangulation vertex {}", i + 1),
@@ -295,7 +296,7 @@ fn test_point_containment(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), ()
 }
 
 /// Test containment for a single point and display results
-fn test_point_containment_single(
+fn test_contains_point(
     hull: &ConvexHull<AdaptiveKernel<f64>, (), (), 3>,
     point: &Point<f64, 3>,
     description: &str,
@@ -443,8 +444,7 @@ fn performance_analysis(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (),
         .collect();
 
     let len_u32 = u32::try_from(extraction_times.len()).unwrap_or(1u32);
-    let avg_extraction_time: std::time::Duration =
-        extraction_times.iter().sum::<std::time::Duration>() / len_u32;
+    let avg_extraction_time: Duration = extraction_times.iter().sum::<Duration>() / len_u32;
     let min_extraction_time = *extraction_times.iter().min().unwrap();
     let max_extraction_time = *extraction_times.iter().max().unwrap();
 
@@ -465,8 +465,7 @@ fn performance_analysis(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (),
         .collect();
 
     let len_u32 = u32::try_from(containment_times.len()).unwrap_or(1u32);
-    let avg_containment_time: std::time::Duration =
-        containment_times.iter().sum::<std::time::Duration>() / len_u32;
+    let avg_containment_time: Duration = containment_times.iter().sum::<Duration>() / len_u32;
 
     println!("\n  Point Containment Queries (10 runs):");
     println!("    • Average time: {avg_containment_time:?}");
@@ -483,8 +482,7 @@ fn performance_analysis(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (),
         .collect();
 
     let len_u32 = u32::try_from(visibility_times.len()).unwrap_or(1u32);
-    let avg_visibility_time: std::time::Duration =
-        visibility_times.iter().sum::<std::time::Duration>() / len_u32;
+    let avg_visibility_time: Duration = visibility_times.iter().sum::<Duration>() / len_u32;
 
     println!("\n  Visible Facet Queries (5 runs):");
     println!("    • Average time: {avg_visibility_time:?}");
@@ -507,9 +505,9 @@ fn performance_analysis(dt: &DelaunayTriangulation<AdaptiveKernel<f64>, (), (),
     }
 
     // Memory usage estimation
-    let hull_size = std::mem::size_of::<ConvexHull<AdaptiveKernel<f64>, (), (), 3>>();
+    let hull_size = mem::size_of::<ConvexHull<AdaptiveKernel<f64>, (), (), 3>>();
     // Phase 3C: Facets are now lightweight (CellKey, u8) tuples
-    let facet_handle_size = std::mem::size_of::<(delaunay::core::CellKey, u8)>();
+    let facet_handle_size = mem::size_of::<(CellKey, u8)>();
     let estimated_hull_memory = hull_size + (facet_count * facet_handle_size);
 
     println!("\n  Memory Usage Estimation:");
diff --git a/examples/pachner_roundtrip_4d.rs b/examples/pachner_roundtrip_4d.rs
index b114645e..aa8cf042 100644
--- a/examples/pachner_roundtrip_4d.rs
+++ b/examples/pachner_roundtrip_4d.rs
@@ -14,10 +14,9 @@
 //! ```
 
 use ::uuid::Uuid;
-use delaunay::geometry::kernel::RobustKernel;
-use delaunay::prelude::triangulation::Vertex;
+use delaunay::prelude::geometry::RobustKernel;
 use delaunay::prelude::triangulation::flips::*;
-use delaunay::triangulation::delaunay::{ConstructionOptions, InsertionOrderStrategy};
+use delaunay::prelude::triangulation::{ConstructionOptions, InsertionOrderStrategy, Vertex};
 use std::time::Instant;
 
 type Dt4 = DelaunayTriangulation<RobustKernel<f64>, (), (), 4>;
diff --git a/justfile b/justfile
index 8dc09a21..d6c1b1f7 100644
--- a/justfile
+++ b/justfile
@@ -127,13 +127,6 @@ bench-compare: _ensure-uv
 bench-compile:
     cargo bench --workspace --no-run
 
-# Compile benchmarks and integration tests without running. This catches
-# release-profile-only warnings (e.g. cfg-gated unused-mut) that debug-mode
-# clippy/test won't see.
-bench-test-compile:
-    cargo bench --workspace --no-run
-    cargo test --tests --release --no-run
-
 # Development benchmark comparison: perf profile with reduced sample sizes.
 bench-dev: _ensure-uv
     CRIT_SAMPLE_SIZE=10 CRIT_MEASUREMENT_MS=1000 CRIT_WARMUP_MS=500 uv run benchmark-utils compare --baseline baseline-artifact/baseline_results.txt --dev
@@ -147,6 +140,13 @@ bench-perf-summary: _ensure-uv
 bench-smoke:
     CRIT_SAMPLE_SIZE=10 CRIT_MEASUREMENT_MS=500 CRIT_WARMUP_MS=200 cargo bench --workspace --profile perf
 
+# Compile benchmarks and integration tests without running. This catches
+# release-profile-only warnings (e.g. cfg-gated unused-mut) that debug-mode
+# clippy/test won't see.
+bench-test-compile:
+    cargo bench --workspace --no-run
+    cargo test --tests --release --no-run
+
 # Build commands
 build:
     cargo build
@@ -174,6 +174,11 @@ changelog-update: changelog
 check: lint
     @echo "✅ Checks complete!"
 
+# Optional SlotMap compatibility canary. DenseSlotMap is the default production
+# backend; run this when changing storage abstractions or before releases.
+check-storage-backends:
+    cargo clippy --workspace --all-targets --no-default-features -- -D warnings -W clippy::pedantic -W clippy::nursery -W clippy::cargo
+
 # CI simulation: comprehensive validation (matches .github/workflows/ci.yml)
 # Runs: checks + test workflow + examples
 ci: check test examples
@@ -197,9 +202,6 @@ clean:
 
 # Code quality and formatting
 clippy:
-    # SlotMap backend (disabled default DenseSlotMap)
-    cargo clippy --workspace --all-targets --no-default-features -- -D warnings -W clippy::pedantic -W clippy::nursery -W clippy::cargo
-
     # DenseSlotMap backend (default)
     cargo clippy --workspace --all-targets -- -D warnings -W clippy::pedantic -W clippy::nursery -W clippy::cargo
 
@@ -277,7 +279,7 @@ help-workflows:
     @echo "  just debug-large-scale-3d [n] # Issue #341: 3D scalability (default n=10000)"
     @echo "  just debug-large-scale-5d [n] # Issue #342: 5D feasibility (default n=1000)"
     @echo ""
-    @echo "Benchmark workflows (explicit perf-profile runs):"
+    @echo "Benchmark workflows:"
     @echo "  just bench-smoke        # Smoke-test benchmark harnesses (minimal samples)"
     @echo "  just bench              # Run all benchmarks with perf profile (ThinLTO)"
     @echo "  just bench-baseline     # Generate perf-profile performance baseline"
@@ -285,10 +287,12 @@ help-workflows:
     @echo "  just bench-compare      # Compare against baseline with perf profile"
     @echo "  just bench-dev          # Reduced-sample perf-profile comparison (~1-2 min)"
     @echo "  just bench-perf-summary # Generate perf-profile release summary (~30-45 min)"
+    @echo "  just profile [toolchain] [code_ref] # Run ci_performance_suite for a compiler/code pair"
     @echo ""
     @echo "Larger/optional workflows:"
     @echo "  just ci-slow             # CI + slow tests (100+ vertices)"
     @echo "  just ci-baseline         # CI + save performance baseline"
+    @echo "  just check-storage-backends # Optional SlotMap compatibility canary"
     @echo "  just coverage            # Generate coverage report (HTML)"
     @echo "  just semgrep             # Run repository-owned Semgrep rules"
     @echo "  just compare-storage       # Compare SlotMap vs DenseSlotMap (~4-6 hours)"
@@ -371,9 +375,11 @@ perf-help:
     @echo "  just bench-smoke           # Smoke-test benchmark harnesses"
     @echo ""
     @echo "Profiling Commands:"
-    @echo "  just profile               # Profile full triangulation_scaling benchmark"
-    @echo "  just profile-dev           # Profile 3D dev mode (faster iteration)"
-    @echo "  just profile-mem           # Profile memory allocations (with count-allocations feature)"
+    @echo "  just profile               # Run ci_performance_suite for the current tree/toolchain"
+    @echo "  just profile [toolchain] [code_ref]"
+    @echo "                              # Run ci_performance_suite for a compiler/code pair"
+    @echo "  just profile-dev           # Samply profile 3D dev mode (faster iteration)"
+    @echo "  just profile-mem           # Samply profile memory allocations (with count-allocations feature)"
     @echo ""
     @echo "Benchmark System (Delaunay-specific):"
     @echo "  just bench-baseline        # Generate baseline via benchmark-utils"
@@ -395,10 +401,98 @@ perf-help:
     @echo "  just bench-dev             # Reduced-sample benchmark iteration"
     @echo "  CRIT_SAMPLE_SIZE=100 just bench  # Custom sample size"
     @echo "  just bench-ci              # Final optimized CI-suite benchmark run"
+    @echo "  just profile v0.7.5        # v0.7.5 code on its declared Rust toolchain"
+    @echo "  just profile 1.95          # Current tree on Rust 1.95"
+    @echo "  just profile 1.95 v0.7.5   # v0.7.5 code on Rust 1.95"
 
-# Profiling
-profile:
-    samply record cargo bench --profile perf --bench profiling_suite -- triangulation_scaling
+# Run the selected CI benchmark suite for one compiler/code pair.
+profile toolchain="" code_ref="current":
+    #!/usr/bin/env bash
+    set -euo pipefail
+
+    command -v rustup >/dev/null || { echo "❌ 'rustup' not found. Install Rust via https://rustup.rs"; exit 1; }
+
+    repo_root="$(pwd)"
+    requested_toolchain="{{toolchain}}"
+    requested_ref="{{code_ref}}"
+    workdir="$repo_root"
+    cleanup_worktree=0
+
+    cleanup() {
+        if [[ "$cleanup_worktree" -eq 1 ]]; then
+            git worktree remove --force "$workdir" >/dev/null 2>&1 || true
+            rm -rf "$(dirname "$workdir")"
+        fi
+    }
+
+    if [[ "$requested_ref" == "current" && -n "$requested_toolchain" ]]; then
+        if [[ ! "$requested_toolchain" =~ ^([0-9]+(\.[0-9]+){0,2}|stable|beta|nightly)([-+].*)?$ ]]; then
+            requested_ref="$requested_toolchain"
+            requested_toolchain=""
+        fi
+    fi
+
+    if [[ "$requested_ref" != "current" && "$requested_ref" != "." ]]; then
+        tmp_parent="$(mktemp -d "${TMPDIR:-/tmp}/delaunay-profile.XXXXXX")"
+        workdir="$tmp_parent/worktree"
+        cleanup_worktree=1
+        trap cleanup EXIT
+        git worktree add --detach "$workdir" "$requested_ref"
+    fi
+
+    if [[ -z "$requested_toolchain" ]]; then
+        requested_toolchain="$(
+            grep -E '^[[:space:]]*channel[[:space:]]*=' "$workdir/rust-toolchain.toml" \
+                | head -n 1 \
+                | cut -d '=' -f 2 \
+                | tr -d ' "' \
+                || true
+        )"
+    fi
+
+    if [[ -z "$requested_toolchain" ]]; then
+        echo "❌ No toolchain argument provided and no rust-toolchain.toml channel found."
+        exit 1
+    fi
+
+    safe_ref="$(
+        if [[ "$requested_ref" == "current" || "$requested_ref" == "." ]]; then
+            printf 'current'
+        else
+            printf '%s' "$requested_ref"
+        fi | tr -c 'A-Za-z0-9._-' '_'
+    )"
+    safe_toolchain="$(printf '%s' "$requested_toolchain" | tr -c 'A-Za-z0-9._-' '_')"
+    run_dir="$repo_root/target/profile-runs/${safe_ref}-${safe_toolchain}"
+    mkdir -p "$run_dir"
+
+    echo "📌 Code ref: $requested_ref"
+    echo "🦀 Rust toolchain: $requested_toolchain"
+    echo "📊 Benchmark: ci_performance_suite"
+    echo "📁 Results: $run_dir"
+
+    rustup toolchain install "$requested_toolchain" --profile minimal
+
+    {
+        echo "# Profile Run"
+        echo
+        echo "- Code ref: $requested_ref"
+        echo "- Workdir: $workdir"
+        echo "- Commit: $(git -C "$workdir" rev-parse HEAD)"
+        echo "- Dirty tree: $(if [[ "$workdir" == "$repo_root" && -n "$(git status --short)" ]]; then echo yes; else echo no; fi)"
+        echo "- Requested toolchain: $requested_toolchain"
+        echo "- rustc: $(rustup run "$requested_toolchain" rustc --version)"
+        echo "- cargo: $(rustup run "$requested_toolchain" cargo --version)"
+        echo "- Cargo profile: cargo bench --profile perf"
+        echo "- Benchmark harness: ci_performance_suite"
+    } > "$run_dir/profile_metadata.md"
+
+    (
+        cd "$workdir"
+        CARGO_TARGET_DIR="$run_dir/target" \
+            rustup run "$requested_toolchain" cargo bench --profile perf --bench ci_performance_suite \
+            2>&1 | tee "$run_dir/ci_performance_suite.log"
+    )
 
 profile-dev:
     PROFILING_DEV_MODE=1 samply record cargo bench --profile perf --bench profiling_suite -- "triangulation_scaling_3d/tds_new/random_3d"
@@ -715,11 +809,6 @@ tag-force version: python-sync
 test: bench-test-compile test-all
     @echo "✅ Test workflow passed!"
 
-# test-unit: runs lib and doc tests.
-test-unit:
-    cargo test --lib --verbose
-    cargo test --doc --verbose
-
 # test-all: runs lib, doc, integration, and Python tests (comprehensive)
 test-all: test-unit test-integration test-python
     @echo "✅ All tests passed!"
@@ -759,6 +848,11 @@ test-slow:
 test-slow-release:
     cargo test --release --features slow-tests
 
+# test-unit: runs lib and doc tests.
+test-unit:
+    cargo test --lib --verbose
+    cargo test --doc --verbose
+
 toml-fmt: _ensure-taplo
     #!/usr/bin/env bash
     set -euo pipefail
diff --git a/pyproject.toml b/pyproject.toml
index e8533ed3..a07be6f3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,6 +67,10 @@ select = [
     "E",
     "F",
     "W",
+    "ANN201",
+    "ANN202",
+    "ANN204",
+    "C90",
     "I",
     "N",
     "UP",
@@ -138,7 +142,7 @@ ignore = [
     "PLR2004", # Magic value used in comparison - acceptable for CLI constants and thresholds
     "FBT001", # Boolean-typed positional argument - appropriate for CLI flag arguments
     "FBT002", # Boolean default positional argument - standard CLI pattern
-    "BLE001", # Do not catch blind exception - intentional defensive programming for CLI robustness
+    # "BLE001" - Re-enabled: broad exception catches must name recoverable error families
     # "S603" - Re-enabled: subprocess call: check for execution of untrusted input - now using secure subprocess wrappers
     # "S607" - Re-enabled: Starting a process with a partial executable path - now using full paths
     "T201", # print found - appropriate for CLI output and user feedback
@@ -161,6 +165,9 @@ ignore = [
 # docstrings for each pytest case while still checking production scripts.
 "**/tests/test_*.py" = [ "S101", "SLF001", "D101", "D102", "D103" ]
 
+[tool.ruff.lint.mccabe]
+max-complexity = 10
+
 # Import sorting and organization configuration
 [tool.ruff.lint.isort]
 known-first-party = [
diff --git a/scripts/README.md b/scripts/README.md
index e870176b..92f9ab9d 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -682,7 +682,8 @@ gh release create vX.Y.Z --notes-from-tag
 # 1. Run benchmarks directly (CI performance suite)
 cargo bench --profile perf --bench ci_performance_suite
 
-# Generate release performance summary with fresh perf-profile data
+# Generate release performance summary with fresh perf-profile public API
+# and circumsphere predicate data
 uv run benchmark-utils generate-summary --run-benchmarks --profile perf
 
 # 2. Generate new baseline
@@ -692,12 +693,16 @@ uv run benchmark-utils generate-baseline
 uv run benchmark-utils compare --baseline baseline-artifact/baseline_results.txt
 ```
 
-**CI Performance Suite**: The benchmark utilities now use `benches/ci_performance_suite.rs` for CI/CD-optimized performance testing:
+**CI Performance Suite**: The benchmark utilities use `benches/ci_performance_suite.rs` for CI/CD-optimized
+performance testing and as the primary generated performance-summary source:
 
 - **Dimensions**: 2D, 3D, 4D, and 5D triangulations.
 - **Point counts**: [10, 25, 50].
 - **Runtime**: ~5–10 minutes.
-- **Coverage**: Core triangulation performance across all supported dimensions.
+- **Coverage**: Public construction, hull, validation, insertion, boundary, and bistellar-flip workflows across supported dimensions.
+
+Circumsphere predicate benchmarks remain part of `generate-summary` as a
+dedicated subsection because they track `la-stack`-backed predicate performance.
 
 **Migration Notes**:
 
diff --git a/scripts/benchmark_models.py b/scripts/benchmark_models.py
index 98039ca6..ebec8197 100644
--- a/scripts/benchmark_models.py
+++ b/scripts/benchmark_models.py
@@ -14,7 +14,7 @@
 class BenchmarkData:
     """Represents benchmark data for a single test case."""
 
-    points: int
+    points: int | None
     dimension: str
     time_low: float = 0.0
     time_mean: float = 0.0
@@ -24,6 +24,28 @@ class BenchmarkData:
     throughput_mean: float | None = None
     throughput_high: float | None = None
     throughput_unit: str | None = None
+    benchmark_id: str = ""
+
+    @property
+    def comparison_key(self) -> str:
+        """Return the stable key used for baseline/regression matching."""
+        if self.benchmark_id:
+            return self.benchmark_id
+        if self.points is None:
+            msg = "Unsized benchmarks require benchmark_id for comparison matching"
+            raise ValueError(msg)
+        return f"{self.points}_{self.dimension}"
+
+    @property
+    def points_label(self) -> str:
+        """Return a display label for the benchmark input size."""
+        return str(self.points) if self.points is not None else "n/a"
+
+    def header_line(self) -> str:
+        """Return the baseline/comparison section header for this benchmark."""
+        if self.points is None:
+            return f"=== Unsized Workload ({self.dimension}) ==="
+        return f"=== {self.points} Points ({self.dimension}) ==="
 
     def with_timing(self, low: float, mean: float, high: float, unit: str) -> "BenchmarkData":
         """Set timing data (fluent interface)."""
@@ -44,9 +66,11 @@ def with_throughput(self, low: float, mean: float, high: float, unit: str) -> "B
     def to_baseline_format(self) -> str:
         """Convert to baseline file format."""
         lines = [
-            f"=== {self.points} Points ({self.dimension}) ===",
-            f"Time: [{self.time_low}, {self.time_mean}, {self.time_high}] {self.time_unit}",
+            self.header_line(),
         ]
+        if self.benchmark_id:
+            lines.append(f"Benchmark ID: {self.benchmark_id}")
+        lines.append(f"Time: [{self.time_low}, {self.time_mean}, {self.time_high}] {self.time_unit}")
 
         if self.throughput_low is not None and self.throughput_mean is not None and self.throughput_high is not None and self.throughput_unit:
             lines.append(f"Throughput: [{self.throughput_low}, {self.throughput_mean}, {self.throughput_high}] {self.throughput_unit}")
@@ -134,10 +158,10 @@ def parse_benchmark_header(line: str) -> BenchmarkData | None:
     Returns:
         BenchmarkData object or None if no match
     """
-    # Match pattern like "=== 1000 Points (2D) ==="
-    match = re.match(r"^=== (\d+) Points \((.+)\) ===$", line.strip())
+    # Match patterns like "=== 1000 Points (2D) ===" or "=== Unsized Workload (4D) ==="
+    match = re.match(r"^=== (?:(\d+) Points|Unsized Workload) \((.+)\) ===$", line.strip())
     if match:
-        points = int(match.group(1))
+        points = int(match.group(1)) if match.group(1) is not None else None
         dimension = match.group(2)
         return BenchmarkData(points=points, dimension=dimension)
     return None
@@ -179,6 +203,15 @@ def parse_time_data(benchmark: BenchmarkData, line: str) -> bool:
     return False
 
 
+def _parse_benchmark_id_data(benchmark: BenchmarkData, line: str) -> bool:
+    """Parse optional baseline benchmark identifier metadata."""
+    match = re.match(r"^Benchmark ID:\s*(.+)$", line.strip())
+    if match:
+        benchmark.benchmark_id = match.group(1).strip()
+        return True
+    return False
+
+
 def parse_throughput_data(benchmark: BenchmarkData, line: str) -> bool:
     """
     Parse throughput data lines to extract throughput information.
@@ -235,6 +268,9 @@ def extract_benchmark_data(baseline_content: str) -> list[BenchmarkData]:
             continue
 
         if current_benchmark:
+            if _parse_benchmark_id_data(current_benchmark, line):
+                continue
+
             # Try to parse time data
             if parse_time_data(current_benchmark, line):
                 continue
@@ -331,19 +367,32 @@ def _dim_key(d: str) -> tuple[int, str]:
         return (int(m.group(1)) if m else 1_000_000, d)
 
     for dimension in sorted(by_dimension.keys(), key=_dim_key):
-        dim_benchmarks = sorted(by_dimension[dimension], key=lambda b: b.points)
-
-        lines.extend(
-            [
-                f"### {dimension} Triangulation Performance",
-                "",
-                "| Points | Time (mean) | Throughput (mean) | Scaling |",
-                "|--------|-------------|-------------------|----------|",
-            ],
+        dim_benchmarks = sorted(
+            by_dimension[dimension],
+            key=lambda b: (b.points is None, b.points or 0, b.comparison_key),
         )
+        include_benchmark_id = any(bench.benchmark_id for bench in dim_benchmarks)
+
+        lines.extend([f"### {dimension} Triangulation Performance", ""])
+        if include_benchmark_id:
+            lines.extend(
+                [
+                    "| Benchmark ID | Points | Time (mean) | Throughput (mean) | Scaling |",
+                    "|--------------|--------|-------------|-------------------|----------|",
+                ],
+            )
+        else:
+            lines.extend(
+                [
+                    "| Points | Time (mean) | Throughput (mean) | Scaling |",
+                    "|--------|-------------|-------------------|----------|",
+                ],
+            )
 
-        # Calculate scaling relative to smallest benchmark
-        first_nonzero = next((b for b in dim_benchmarks if b.time_mean and b.time_mean > 0), None)
+        # Calculate scaling relative to the smallest numeric workload only for
+        # legacy homogeneous tables. Expanded benchmark IDs mix different API
+        # surfaces, so a single per-dimension scaling baseline is misleading.
+        first_nonzero = None if include_benchmark_id else next((b for b in dim_benchmarks if b.time_mean and b.time_mean > 0), None)
         baseline_time = first_nonzero.time_mean if first_nonzero else None
 
         for bench in dim_benchmarks:
@@ -362,7 +411,12 @@ def _dim_key(d: str) -> tuple[int, str]:
             else:
                 scaling_str = "N/A"
 
-            lines.append(f"| {bench.points} | {time_str} | {throughput_str} | {scaling_str} |")
+            if include_benchmark_id:
+                lines.append(
+                    f"| `{bench.comparison_key}` | {bench.points_label} | {time_str} | {throughput_str} | {scaling_str} |",
+                )
+            else:
+                lines.append(f"| {bench.points_label} | {time_str} | {throughput_str} | {scaling_str} |")
 
         lines.append("")  # Empty line between tables
 
diff --git a/scripts/benchmark_utils.py b/scripts/benchmark_utils.py
index 87c4c7e1..4a10e097 100755
--- a/scripts/benchmark_utils.py
+++ b/scripts/benchmark_utils.py
@@ -24,13 +24,14 @@
 from collections.abc import Mapping
 from dataclasses import dataclass
 from datetime import UTC, datetime
+from itertools import product
 from pathlib import Path
 from shutil import copy2 as copyfile  # NOTE: Use copy2 (metadata-preserving) under the 'copyfile' alias for tests/patching convenience.
 from typing import TYPE_CHECKING, TextIO
 from urllib.parse import urlparse
 from uuid import uuid4
 
-from packaging.version import Version
+from packaging.version import InvalidVersion, Version
 
 logger = logging.getLogger(__name__)
 
@@ -97,15 +98,164 @@
             run_safe_command,
         )
 
+_RECOVERABLE_CLI_ERRORS: tuple[type[BaseException], ...] = (
+    ExecutableNotFoundError,
+    ProjectRootNotFoundError,
+    OSError,
+    RuntimeError,
+    TypeError,
+    ValueError,
+    KeyError,
+    subprocess.SubprocessError,
+)
+
 # Trusted benchmark commands use this Cargo profile so local, CI, and release
 # numbers are generated with the same ThinLTO/codegen-units settings.
 TRUSTED_BENCH_PROFILE = "perf"
 
+CI_PERFORMANCE_SUITE_GROUPS = {
+    "construction": (
+        "Construction",
+        "DelaunayTriangulation::new_with_options",
+    ),
+    "boundary_facets": (
+        "Boundary facets",
+        "DelaunayTriangulation::boundary_facets",
+    ),
+    "convex_hull": (
+        "Convex hull",
+        "ConvexHull::from_triangulation",
+    ),
+    "validation": (
+        "Validation",
+        "DelaunayTriangulation::validate",
+    ),
+    "incremental_insert": (
+        "Incremental insert",
+        "DelaunayTriangulation::insert",
+    ),
+    "bistellar_flips": (
+        "Bistellar flips",
+        "BistellarFlips",
+    ),
+}
+
+CI_PERFORMANCE_SUITE_GROUP_ORDER = tuple(CI_PERFORMANCE_SUITE_GROUPS)
+_CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE = "ci_performance_suite_manifest_ids.txt"
+
+
+def ci_suite_group_key(first_path_part: str) -> str | None:
+    """Map a Criterion path prefix to a ci_performance_suite group key."""
+    if first_path_part.startswith("tds_new_"):
+        return "construction"
+    if first_path_part.startswith("bistellar_flips"):
+        return "bistellar_flips"
+    if first_path_part in CI_PERFORMANCE_SUITE_GROUPS:
+        return first_path_part
+    return None
+
+
+def ci_suite_dimension(benchmark_id: str) -> str:
+    """Extract the dimension label from a ci_performance_suite benchmark ID."""
+    match = re.search(r"(?:^|_|/)(\d+)d(?:_|/|$)", benchmark_id)
+    if match:
+        return f"{match.group(1)}D"
+    return "n/a"
+
+
+def _expand_ci_benchmark_id_pattern(pattern: str) -> set[str]:
+    """Expand the simple brace patterns emitted by ci_performance_suite."""
+    segments = []
+    for segment in pattern.split("/"):
+        if segment.startswith("{") and segment.endswith("}"):
+            segments.append([option for option in segment[1:-1].split(",") if option])
+        else:
+            segments.append([segment])
+    return {"/".join(parts) for parts in product(*segments)}
+
+
+def _parse_ci_performance_manifest_ids(stdout: str) -> set[str]:
+    """Parse benchmark IDs from ci_performance_suite manifest stdout lines."""
+    manifest_ids: set[str] = set()
+    for line in stdout.splitlines():
+        if not line.startswith("api_benchmark "):
+            continue
+        fields = dict(token.split("=", 1) for token in line.split()[1:] if "=" in token)
+        benchmark_ids = fields.get("benchmark_ids", "")
+        for pattern in benchmark_ids.split(";"):
+            if pattern:
+                manifest_ids.update(_expand_ci_benchmark_id_pattern(pattern))
+    return manifest_ids
+
+
+def _ci_performance_manifest_ids_path(criterion_dir: Path) -> Path:
+    """Return the sidecar manifest path used to filter ci_performance_suite results."""
+    return criterion_dir / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE
+
+
+def _write_ci_performance_manifest_ids(project_root: Path, stdout: str) -> None:
+    """Persist the runtime ci_performance_suite manifest beside Criterion results."""
+    if not isinstance(stdout, str):
+        msg = "ci_performance_suite completed but stdout was not text; cannot extract api_benchmark manifest"
+        raise TypeError(msg)
+    criterion_dir = project_root / "target" / "criterion"
+    manifest_path = _ci_performance_manifest_ids_path(criterion_dir)
+    manifest_ids = _parse_ci_performance_manifest_ids(stdout)
+    if not manifest_ids:
+        msg = f"ci_performance_suite completed but emitted no api_benchmark manifest in stdout: {stdout!r}"
+        raise RuntimeError(msg)
+    criterion_dir.mkdir(parents=True, exist_ok=True)
+    manifest_path.write_text(
+        "\n".join(sorted(manifest_ids)) + "\n",
+        encoding="utf-8",
+    )
+
+
+def _load_ci_performance_manifest_ids(criterion_dir: Path) -> set[str] | None:
+    """Load ci_performance_suite benchmark IDs when a runtime manifest exists."""
+    manifest_path = _ci_performance_manifest_ids_path(criterion_dir)
+    if not manifest_path.exists():
+        return None
+    try:
+        manifest_ids = {line.strip() for line in manifest_path.read_text(encoding="utf-8").splitlines() if line.strip()}
+    except OSError:
+        return None
+    return manifest_ids or None
+
+
+def _collect_ci_suite_estimates(criterion_dir: Path) -> list[tuple[tuple[str, ...], Path]]:
+    """Collect deduplicated ci_performance_suite estimates, preferring new over base."""
+    manifest_ids = _load_ci_performance_manifest_ids(criterion_dir)
+    estimates_by_id: dict[tuple[str, ...], tuple[str, Path]] = {}
+
+    for estimates_path in sorted(criterion_dir.glob("**/estimates.json")):
+        if estimates_path.parent.name not in {"base", "new"}:
+            continue
+
+        try:
+            path_parts = estimates_path.relative_to(criterion_dir).parts[:-2]
+        except ValueError:
+            continue
+
+        if not path_parts or ci_suite_group_key(path_parts[0]) is None:
+            continue
+
+        benchmark_id = "/".join(path_parts)
+        if manifest_ids is not None and benchmark_id not in manifest_ids:
+            continue
+
+        existing = estimates_by_id.get(path_parts)
+        if existing is None or (existing[0] == "base" and estimates_path.parent.name == "new"):
+            estimates_by_id[path_parts] = (estimates_path.parent.name, estimates_path)
+
+    return [(path_parts, estimates_path) for path_parts, (_, estimates_path) in estimates_by_id.items()]
+
+
 # Development mode arguments - centralized to keep baseline generation and comparison in sync
 # Reduces samples for faster iteration during development (10x faster than full benchmarks)
 #
-# Note: These are Criterion CLI arguments. Alternatively, benchmarks can be configured via
-# environment variables (see benches/microbenchmarks.rs bench_config()):
+# Note: These are Criterion CLI arguments. Some benchmarks can also be configured via
+# environment variables documented in benches/README.md:
 #   CRIT_SAMPLE_SIZE=10 CRIT_MEASUREMENT_MS=2000 CRIT_WARMUP_MS=1000
 # The CLI arguments take precedence over env vars when both are present.
 DEV_MODE_BENCH_ARGS = [
@@ -119,6 +269,26 @@
 ]
 
 
+@dataclass(frozen=True)
+class CiPerformanceResult:
+    """Parsed Criterion result for one ci_performance_suite benchmark ID."""
+
+    group_key: str
+    benchmark_id: str
+    dimension: str
+    input_size: str
+    mean_ns: float
+    low_ns: float
+    high_ns: float
+
+    @property
+    def variant(self) -> str:
+        """Return the geometry/input variant label for this benchmark."""
+        if "adversarial" in self.benchmark_id:
+            return "adversarial"
+        return "well-conditioned"
+
+
 def _criterion_arg_value(args: list[str], flag: str) -> str:
     """Return the Criterion value that follows flag in args."""
     try:
@@ -166,7 +336,7 @@ def _sampling_metadata(dev_mode: bool) -> dict[str, str]:
 class PerformanceSummaryGenerator:
     """Generate performance summary markdown from benchmark results."""
 
-    def __init__(self, project_root: Path):
+    def __init__(self, project_root: Path) -> None:
         """Initialize with project root directory."""
         self.project_root = project_root
         # Prefer CI artifact location; fall back to benches/ for local runs
@@ -174,7 +344,7 @@ def __init__(self, project_root: Path):
         self._baseline_fallback = project_root / "benches" / "baseline_results.txt"
         self.comparison_file = project_root / "benches" / "compare_results.txt"
 
-        # Path for storing circumsphere benchmark results
+        # Path for storing Criterion benchmark results
         self.circumsphere_results_dir = project_root / "target" / "criterion"
 
         # Storage for numerical accuracy data from benchmarks
@@ -196,7 +366,7 @@ def generate_summary(
 
         Args:
             output_path: Output file path (defaults to benches/PERFORMANCE_RESULTS.md)
-            run_benchmarks: Whether to run fresh circumsphere benchmarks
+            run_benchmarks: Whether to run fresh public API and circumsphere benchmarks
             generator_name: Name of the tool generating the summary (for attribution)
             cargo_profile: Optional Cargo profile for fresh benchmark runs.  When
                 ``run_benchmarks`` is True and no profile is specified, defaults
@@ -219,10 +389,11 @@ def generate_summary(
                 # comparable with baseline/compare output.
                 if cargo_profile is None:
                     cargo_profile = TRUSTED_BENCH_PROFILE
-                success, accuracy_data = self._run_circumsphere_benchmarks(cargo_profile=cargo_profile)
-                if success:
+                ci_success = self._run_ci_performance_suite(cargo_profile=cargo_profile)
+                circumsphere_success, accuracy_data = self._run_circumsphere_benchmarks(cargo_profile=cargo_profile)
+                if circumsphere_success:
                     self.numerical_accuracy_data = accuracy_data
-                else:
+                if not ci_success or not circumsphere_success:
                     print("⚠️ Benchmark run failed, using existing/fallback data")
 
             # Generate markdown content
@@ -235,7 +406,7 @@ def generate_summary(
             print(f"📊 Generated performance summary: {output_path}")
             return True
 
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             print(f"❌ Failed to generate performance summary: {e}", file=sys.stderr)
             return False
 
@@ -268,7 +439,7 @@ def _generate_markdown_content(self, generator_name: str | None = None) -> str:
             commit_hash = get_git_commit_hash(cwd=self.project_root)
             if commit_hash and commit_hash != "unknown":
                 lines.append(f"**Git Commit**: {commit_hash}")
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             logger.debug("Could not get git commit hash: %s", e)
 
         # Add hardware information
@@ -283,7 +454,7 @@ def _generate_markdown_content(self, generator_name: str | None = None) -> str:
                     f"**Rust**: {hw_info['RUST']}",
                 ],
             )
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             logger.debug("Could not get hardware info: %s", e)
             lines.append("**Hardware**: Unknown")
 
@@ -295,7 +466,12 @@ def _generate_markdown_content(self, generator_name: str | None = None) -> str:
             ],
         )
 
-        # Add circumsphere performance results from actual benchmark data
+        # Add public API performance results from the CI suite first. This is
+        # the versioned benchmark contract used by baseline/comparison tooling.
+        lines.extend(self._get_ci_performance_suite_results())
+
+        # Add circumsphere predicate results as a focused subsection. These
+        # remain important because they exercise la-stack-backed predicates.
         lines.extend(self._get_circumsphere_performance_results())
 
         # Add baseline results if available
@@ -334,7 +510,7 @@ def _get_current_version(self) -> str:
             if result.startswith("v"):
                 return result[1:]  # Remove 'v' prefix
             return "unknown"
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             # Fallback: try to get any recent tag
             try:
                 cp = run_git_command(["tag", "-l", "--sort=-version:refname"], cwd=self.project_root)
@@ -345,7 +521,7 @@ def _get_current_version(self) -> str:
                         if tag.startswith("v") and len(tag) > 1:
                             return tag[1:]
                 return "unknown"
-            except Exception:
+            except _RECOVERABLE_CLI_ERRORS:
                 return "unknown"
 
     def _get_version_date(self) -> str:
@@ -366,7 +542,7 @@ def _get_version_date(self) -> str:
 
             # Fallback to current date
             return datetime.now(UTC).strftime("%Y-%m-%d")
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             return datetime.now(UTC).strftime("%Y-%m-%d")
 
     def _run_circumsphere_benchmarks(self, cargo_profile: str | None = None) -> tuple[bool, dict[str, str] | None]:
@@ -402,10 +578,58 @@ def _run_circumsphere_benchmarks(self, cargo_profile: str | None = None) -> tupl
             print("✅ Circumsphere benchmarks completed successfully")
             return True, numerical_accuracy_data
 
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             print(f"❌ Error running circumsphere benchmarks: {e}")
             return False, None
 
+    def _run_ci_performance_suite(self, cargo_profile: str | None = None, *, use_dev_mode: bool = False) -> bool:
+        """
+        Run the public API CI performance suite to generate fresh Criterion data.
+
+        Args:
+            cargo_profile: Cargo profile for the fresh run. Defaults to
+                :data:`TRUSTED_BENCH_PROFILE` so summary, baseline, and
+                comparison measurements use the same optimized profile.
+            use_dev_mode: When true, pass reduced Criterion sampling arguments
+                for local development feedback. Full sampling is used by
+                default.
+
+        Returns:
+            True if the benchmark completed successfully, False otherwise.
+        """
+        try:
+            print("🔄 Running ci_performance_suite benchmarks...")
+
+            profile = cargo_profile if cargo_profile is not None else TRUSTED_BENCH_PROFILE
+            cargo_args = ["bench", "--profile", profile, "--bench", "ci_performance_suite"]
+            if use_dev_mode:
+                cargo_args.extend(["--", *DEV_MODE_BENCH_ARGS])
+
+            result = run_cargo_command(
+                cargo_args,
+                cwd=self.project_root,
+                timeout=900,
+                capture_output=True,
+                check=False,
+            )
+            if result.returncode != 0:
+                print(f"❌ Error running ci_performance_suite benchmarks: cargo exited with status {result.returncode}")
+                return False
+
+            _write_ci_performance_manifest_ids(self.project_root, result.stdout)
+            print("✅ ci_performance_suite benchmarks completed successfully")
+            return True
+
+        except ExecutableNotFoundError as e:
+            print(f"❌ Error running ci_performance_suite benchmarks: {e}")
+            return False
+        except subprocess.TimeoutExpired as e:
+            print(f"❌ Error running ci_performance_suite benchmarks: {e}")
+            return False
+        except OSError as e:
+            print(f"❌ Error running ci_performance_suite benchmarks: {e}")
+            return False
+
     def _parse_numerical_accuracy_output(self, stdout: str) -> dict[str, str] | None:
         """
         Parse numerical accuracy data from circumsphere benchmark stdout.
@@ -444,7 +668,7 @@ def _parse_numerical_accuracy_output(self, stdout: str) -> dict[str, str] | None
 
             return accuracy_data or None
 
-        except Exception:
+        except (IndexError, TypeError, ValueError):
             return None
 
     def _get_numerical_accuracy_analysis(self) -> list[str]:
@@ -656,7 +880,7 @@ def _parse_single_method_result(self, criterion_path: Path, method_name: str) ->
                 mean_ns = estimates["mean"]["point_estimate"]
                 return CircumspherePerformanceData(method=method_name, time_ns=mean_ns)
 
-            except Exception as e:
+            except (OSError, KeyError, TypeError, ValueError, json.JSONDecodeError) as e:
                 print(f"⚠️ Could not parse {estimates_file}: {e}")
 
         return None
@@ -787,6 +1011,151 @@ def _get_fallback_circumsphere_data(self) -> list[CircumsphereTestCase]:
             ),
         ]
 
+    @staticmethod
+    def _format_duration_ns(time_ns: float) -> str:
+        """Format nanosecond Criterion timings with readable units."""
+        if time_ns >= 1_000_000_000:
+            return f"{time_ns / 1_000_000_000:.3f} s"
+        if time_ns >= 1_000_000:
+            return f"{time_ns / 1_000_000:.3f} ms"
+        if time_ns >= 1_000:
+            return f"{time_ns / 1_000:.1f} µs"
+        return f"{time_ns:.0f} ns"
+
+    @staticmethod
+    def _ci_suite_input_size(path_parts: tuple[str, ...]) -> str:
+        """Extract a human-readable input size from Criterion benchmark path parts."""
+        if path_parts and path_parts[-1].isdigit():
+            return path_parts[-1]
+        return "roundtrip"
+
+    @staticmethod
+    def _load_criterion_estimate(estimates_path: Path) -> tuple[float, float, float] | None:
+        """Load mean and confidence interval values from a Criterion estimates file."""
+        try:
+            with estimates_path.open("r", encoding="utf-8") as f:
+                data = json.load(f)
+
+            mean_data = data.get("mean", {})
+            mean_ns = float(mean_data["point_estimate"])
+            confidence_interval = mean_data.get("confidence_interval", {})
+            low_ns = float(confidence_interval.get("lower_bound", mean_ns))
+            high_ns = float(confidence_interval.get("upper_bound", mean_ns))
+            if mean_ns <= 0:
+                return None
+            return mean_ns, low_ns, high_ns
+        except (OSError, KeyError, TypeError, ValueError, json.JSONDecodeError):
+            return None
+
+    def _parse_ci_performance_suite_results(self) -> list[CiPerformanceResult]:
+        """
+        Parse Criterion data for the versioned ci_performance_suite benchmark IDs.
+
+        Criterion stores each benchmark under a path derived from its group and
+        benchmark ID. This parser keeps those IDs intact so the generated
+        summary can compare API surfaces side-by-side as the suite grows.
+        """
+        criterion_dir = self.circumsphere_results_dir
+        if not criterion_dir.exists():
+            return []
+
+        results = []
+        for path_parts, estimates_path in _collect_ci_suite_estimates(criterion_dir):
+            estimates = self._load_criterion_estimate(estimates_path)
+            if estimates is None:
+                continue
+
+            benchmark_id = "/".join(path_parts)
+            group_key = ci_suite_group_key(path_parts[0])
+            if group_key is None:
+                continue
+
+            mean_ns, low_ns, high_ns = estimates
+            results.append(
+                CiPerformanceResult(
+                    group_key=group_key,
+                    benchmark_id=benchmark_id,
+                    dimension=ci_suite_dimension(benchmark_id),
+                    input_size=self._ci_suite_input_size(path_parts),
+                    mean_ns=mean_ns,
+                    low_ns=low_ns,
+                    high_ns=high_ns,
+                ),
+            )
+
+        group_order = {group: index for index, group in enumerate(CI_PERFORMANCE_SUITE_GROUP_ORDER)}
+        results.sort(
+            key=lambda result: (
+                group_order.get(result.group_key, sys.maxsize),
+                int(result.dimension.removesuffix("D")) if result.dimension.removesuffix("D").isdigit() else sys.maxsize,
+                int(result.input_size) if result.input_size.isdigit() else sys.maxsize,
+                result.benchmark_id,
+            ),
+        )
+        return results
+
+    def _get_ci_performance_suite_results(self) -> list[str]:
+        """
+        Generate the public API performance summary from ci_performance_suite data.
+
+        Returns:
+            List of markdown lines with ci_performance_suite benchmark data.
+        """
+        results = self._parse_ci_performance_suite_results()
+
+        lines = [
+            "### Public API Performance Contract (`ci_performance_suite`)",
+            "",
+            "This suite is the versioned benchmark contract for public Delaunay workflows.",
+            "It covers construction, hull extraction, validation, incremental insertion,",
+            "boundary traversal, and explicit bistellar flip roundtrips.",
+            "",
+        ]
+
+        if not results:
+            lines.extend(
+                [
+                    "⚠️ No `ci_performance_suite` Criterion results available. Run:",
+                    "```bash",
+                    f"cargo bench --profile {TRUSTED_BENCH_PROFILE} --bench ci_performance_suite",
+                    "```",
+                    "",
+                ],
+            )
+            return lines
+
+        results_by_group: dict[str, list[CiPerformanceResult]] = {}
+        for result in results:
+            results_by_group.setdefault(result.group_key, []).append(result)
+
+        for group_key in CI_PERFORMANCE_SUITE_GROUP_ORDER:
+            group_results = results_by_group.get(group_key)
+            if not group_results:
+                continue
+
+            group_label, public_api = CI_PERFORMANCE_SUITE_GROUPS[group_key]
+            lines.extend(
+                [
+                    f"#### {group_label}",
+                    "",
+                    f"Public API: `{public_api}`",
+                    "",
+                    "| Benchmark ID | Dimension | Input | Variant | Mean | 95% CI |",
+                    "|--------------|-----------|-------|---------|------|--------|",
+                ],
+            )
+
+            for result in group_results:
+                confidence_interval = f"{self._format_duration_ns(result.low_ns)} - {self._format_duration_ns(result.high_ns)}"
+                lines.append(
+                    f"| `{result.benchmark_id}` | {result.dimension} | {result.input_size} | {result.variant} | "
+                    f"{self._format_duration_ns(result.mean_ns)} | {confidence_interval} |",
+                )
+
+            lines.append("")
+
+        return lines
+
     def _get_circumsphere_performance_results(self) -> list[str]:
         """
         Generate circumsphere containment performance results section with dynamic data.
@@ -799,7 +1168,7 @@ def _get_circumsphere_performance_results(self) -> list[str]:
 
         if not test_cases:
             return [
-                "### Circumsphere Performance Results",
+                "### Circumsphere Predicate Performance",
                 "",
                 f"#### Version {self.current_version} Results ({self.current_date})",
                 "",
@@ -811,7 +1180,10 @@ def _get_circumsphere_performance_results(self) -> list[str]:
             ]
 
         lines = [
-            "### Circumsphere Performance Results",
+            "### Circumsphere Predicate Performance",
+            "",
+            "This focused predicate suite tracks `la-stack`-backed circumsphere and",
+            "insphere query performance independently from full triangulation workflows.",
             "",
             f"#### Version {self.current_version} Results ({self.current_date})",
             "",
@@ -911,7 +1283,7 @@ def _parse_baseline_results(self) -> list[str]:
             if benchmarks:
                 lines.extend(format_benchmark_tables(benchmarks))
 
-        except Exception as e:
+        except (OSError, TypeError, ValueError, KeyError) as e:
             lines.extend(
                 [
                     "### Baseline Results",
@@ -958,7 +1330,7 @@ def _parse_comparison_results(self) -> list[str]:
                     ],
                 )
 
-        except Exception:
+        except OSError:
             lines.extend(
                 [
                     "### Comparison Results",
@@ -981,7 +1353,7 @@ def _get_dynamic_analysis_sections(self) -> list[str]:
         performance_ranking = self._analyze_performance_ranking(test_data)
 
         lines = [
-            "## Key Findings",
+            "## Circumsphere Predicate Analysis",
             "",
             "### Performance Ranking",
             "",
@@ -996,7 +1368,7 @@ def _get_dynamic_analysis_sections(self) -> list[str]:
 
         lines.extend(
             [
-                "## Recommendations",
+                "### Recommendations",
                 "",
             ],
         )
@@ -1009,7 +1381,7 @@ def _get_dynamic_analysis_sections(self) -> list[str]:
             lines.extend(
                 [
                     "",
-                    "## Conclusion",
+                    "### Conclusion",
                     "",
                     "All three methods are mathematically correct and produce valid results. Performance characteristics vary by dimension:",
                     "",
@@ -1035,23 +1407,13 @@ def _get_dynamic_analysis_sections(self) -> list[str]:
 
         return lines
 
-    def _analyze_performance_ranking(self, test_data: list[CircumsphereTestCase]) -> list[tuple[str, float, str]]:
-        """
-        Analyze performance data to generate dynamic rankings.
-
-        Args:
-            test_data: List of CircumsphereTestCase objects
-
-        Returns:
-            List of tuples (method_name, average_performance, description)
-        """
+    @staticmethod
+    def _collect_method_performance(test_data: list[CircumsphereTestCase]) -> tuple[dict[str, list[float]], dict[str, list[str]]]:
+        """Collect per-method timings and dimension wins, excluding trivial boundary cases."""
         method_totals: dict[str, list[float]] = {"insphere": [], "insphere_distance": [], "insphere_lifted": []}
         method_wins: dict[str, list[str]] = {"insphere": [], "insphere_distance": [], "insphere_lifted": []}
 
-        # Collect performance data from non-boundary test cases only
-        # Boundary cases are trivial outliers with early-exit optimizations
         for test_case in test_data:
-            # Skip boundary vertex cases as they're trivial outliers (3-4ns)
             if test_case.is_boundary_case:
                 continue
 
@@ -1062,6 +1424,36 @@ def _analyze_performance_ranking(self, test_data: list[CircumsphereTestCase]) ->
             for method_name, perf_data in test_case.methods.items():
                 method_totals[method_name].append(perf_data.time_ns)
 
+        return method_totals, method_wins
+
+    @staticmethod
+    def _ranking_description(method: str, avg_time: float, fastest_time: float, method_wins: dict[str, list[str]]) -> str:
+        """Describe relative method performance for the dynamic ranking table."""
+        if avg_time == float("inf"):
+            return "No benchmark data available"
+
+        slowdown = (avg_time / fastest_time) if fastest_time > 0 and fastest_time != float("inf") else 1
+        wins = method_wins.get(method, [])
+        if not wins:
+            return f"~{slowdown:.1f}x slower than fastest on average"
+
+        dims_text = ", ".join(sorted(set(wins)))
+        if slowdown > 1.01:
+            return f"(best in {dims_text}) - ~{slowdown:.1f}x average vs fastest"
+        return f"(best in {dims_text}) - Best average performance"
+
+    def _analyze_performance_ranking(self, test_data: list[CircumsphereTestCase]) -> list[tuple[str, float, str]]:
+        """
+        Analyze performance data to generate dynamic rankings.
+
+        Args:
+            test_data: List of CircumsphereTestCase objects
+
+        Returns:
+            List of tuples (method_name, average_performance, description)
+        """
+        method_totals, method_wins = self._collect_method_performance(test_data)
+
         # Calculate averages and determine ranking
         method_averages = {}
         for method, times in method_totals.items():
@@ -1073,33 +1465,12 @@ def _analyze_performance_ranking(self, test_data: list[CircumsphereTestCase]) ->
         # Sort by performance (lowest time first)
         sorted_methods = sorted(method_averages.items(), key=lambda x: x[1])
 
-        # Generate descriptions with relative performance and dimension wins
         rankings = []
         if sorted_methods:
             fastest_time = sorted_methods[0][1]
 
             for method, avg_time in sorted_methods:
-                # Handle missing data (float("inf") from no samples)
-                if avg_time == float("inf"):
-                    desc = "No benchmark data available"
-                    rankings.append((method, avg_time, desc))
-                    continue
-
-                slowdown = (avg_time / fastest_time) if fastest_time > 0 and fastest_time != float("inf") else 1
-
-                # Generate description based on actual wins by dimension
-                wins = method_wins.get(method, [])
-                if wins:
-                    dims_text = ", ".join(sorted(set(wins)))
-                    desc = (
-                        f"(best in {dims_text}) - ~{slowdown:.1f}x average vs fastest"
-                        if slowdown > 1.01
-                        else f"(best in {dims_text}) - Best average performance"
-                    )
-                else:
-                    desc = f"~{slowdown:.1f}x slower than fastest on average"
-
-                rankings.append((method, avg_time, desc))
+                rankings.append((method, avg_time, self._ranking_description(method, avg_time, fastest_time, method_wins)))
 
         return rankings
 
@@ -1117,7 +1488,7 @@ def _generate_dynamic_recommendations(self, performance_ranking: list[tuple[str,
             return []
 
         lines = [
-            "### Method Selection Guide",
+            "#### Method Selection Guide",
             "",
             "**All three methods are mathematically correct** (they produce valid insphere test results).",
             "Choose based on your specific requirements:",
@@ -1125,7 +1496,7 @@ def _generate_dynamic_recommendations(self, performance_ranking: list[tuple[str,
         ]
 
         # Add dimension-specific performance recommendations
-        lines.append("#### Performance Optimization by Dimension")
+        lines.append("##### Performance Optimization by Dimension")
         lines.append("")
 
         for method, _avg_time, desc in performance_ranking:
@@ -1136,7 +1507,7 @@ def _generate_dynamic_recommendations(self, performance_ranking: list[tuple[str,
         lines.extend(
             [
                 "",
-                "#### General Recommendations",
+                "##### General Recommendations",
                 "",
                 "**For maximum performance**: Choose the method that performs best in your target dimension (see above)",
                 "",
@@ -1146,7 +1517,7 @@ def _generate_dynamic_recommendations(self, performance_ranking: list[tuple[str,
                 "**For algorithm transparency**: `insphere_distance` explicitly calculates the circumcenter,",
                 "making it excellent for educational purposes, debugging, and algorithm validation",
                 "",
-                "#### Performance Comparison",
+                "##### Performance Comparison",
                 "",
                 "Average performance across all non-boundary test cases:",
                 "",
@@ -1235,6 +1606,11 @@ def _get_static_sections(self) -> list[str]:
             "",
             "## Benchmark Structure",
             "",
+            "The `ci_performance_suite.rs` benchmark is the primary regression and",
+            "release-summary suite. It emits a versioned `api_benchmark_manifest` and",
+            "covers public construction, hull, validation, insertion, boundary, and",
+            "bistellar-flip workflows across supported dimensions.",
+            "",
             "The `circumsphere_containment.rs` benchmark includes:",
             "",
             "- **Random queries**: Batch processing performance with 1000 random test points",
@@ -1260,7 +1636,7 @@ def _get_update_instructions(self) -> list[str]:
             "# Generate performance summary with current data",
             "uv run benchmark-utils generate-summary",
             "",
-            "# Run fresh perf-profile benchmarks and generate summary (includes numerical accuracy)",
+            "# Run fresh perf-profile public API and circumsphere benchmarks",
             f"uv run benchmark-utils generate-summary --run-benchmarks --profile {TRUSTED_BENCH_PROFILE}",
             "",
             "# Generate baseline results for regression testing",
@@ -1281,7 +1657,7 @@ class CriterionParser:
     """Parse Criterion benchmark output and JSON data."""
 
     @staticmethod
-    def parse_estimates_json(estimates_path: Path, points: int, dimension: str) -> BenchmarkData | None:
+    def parse_estimates_json(estimates_path: Path, points: int | None, dimension: str) -> BenchmarkData | None:
         """
         Parse Criterion estimates.json file to extract benchmark data.
 
@@ -1310,27 +1686,62 @@ def parse_estimates_json(estimates_path: Path, points: int, dimension: str) -> B
             low_us = low_ns / 1000
             high_us = high_ns / 1000
 
-            # Calculate throughput in Kelem/s
-            # Throughput = points / time_in_seconds
-            # For time in microseconds: throughput = points * 1,000,000 / time_us
-            # For Kelem/s: throughput_kelem = (points * 1,000,000 / time_us) / 1000 = points * 1000 / time_us
-            # Guard against division by zero for very fast benchmarks
-            eps = 1e-9  # µs - minimum time to prevent division by zero
-            thrpt_mean = points * 1000 / max(mean_us, eps)
-            thrpt_low = points * 1000 / max(high_us, eps)  # Lower time = higher throughput
-            thrpt_high = points * 1000 / max(low_us, eps)  # Higher time = lower throughput
-
-            return (
-                BenchmarkData(points, dimension)
-                # Baseline timing values are rounded to 2 decimal places for consistency
-                # This standardizes storage format and avoids spurious precision differences
-                .with_timing(round(low_us, 2), round(mean_us, 2), round(high_us, 2), "µs")
-                .with_throughput(round(thrpt_low, 3), round(thrpt_mean, 3), round(thrpt_high, 3), "Kelem/s")
-            )
+            benchmark = BenchmarkData(points, dimension).with_timing(round(low_us, 2), round(mean_us, 2), round(high_us, 2), "µs")
+
+            if points is not None:
+                # Calculate throughput in Kelem/s
+                # Throughput = points / time_in_seconds
+                # For time in microseconds: throughput = points * 1,000,000 / time_us
+                # For Kelem/s: throughput_kelem = (points * 1,000,000 / time_us) / 1000 = points * 1000 / time_us
+                # Guard against division by zero for very fast benchmarks
+                eps = 1e-9  # µs - minimum time to prevent division by zero
+                thrpt_mean = points * 1000 / max(mean_us, eps)
+                thrpt_low = points * 1000 / max(high_us, eps)  # Lower time = higher throughput
+                thrpt_high = points * 1000 / max(low_us, eps)  # Higher time = lower throughput
+                benchmark.with_throughput(round(thrpt_low, 3), round(thrpt_mean, 3), round(thrpt_high, 3), "Kelem/s")
+
+            return benchmark
 
         except (FileNotFoundError, json.JSONDecodeError, KeyError, ZeroDivisionError, ValueError):
             return None
 
+    @staticmethod
+    def _ci_suite_input_points(path_parts: tuple[str, ...]) -> int | None:
+        """Extract the numeric input size when the Criterion ID has one."""
+        if path_parts and path_parts[-1].isdigit():
+            return int(path_parts[-1])
+        return None
+
+    @staticmethod
+    def _process_ci_performance_suite_results(criterion_dir: Path) -> list[BenchmarkData]:
+        """Discover ci_performance_suite Criterion results with expanded benchmark IDs."""
+        results: list[BenchmarkData] = []
+        for path_parts, estimates_path in _collect_ci_suite_estimates(criterion_dir):
+            benchmark_id = "/".join(path_parts)
+            dimension = ci_suite_dimension(benchmark_id)
+            if dimension == "n/a":
+                continue
+
+            points = CriterionParser._ci_suite_input_points(path_parts)
+            benchmark_data = CriterionParser.parse_estimates_json(estimates_path, points, dimension)
+            if benchmark_data is None:
+                continue
+
+            benchmark_data.benchmark_id = benchmark_id
+            results.append(benchmark_data)
+
+        group_order = {group: index for index, group in enumerate(CI_PERFORMANCE_SUITE_GROUP_ORDER)}
+        results.sort(
+            key=lambda result: (
+                group_order.get(ci_suite_group_key(result.benchmark_id.split("/", 1)[0]) or "", sys.maxsize),
+                int(result.dimension.removesuffix("D")) if result.dimension.removesuffix("D").isdigit() else sys.maxsize,
+                result.points is None,
+                result.points or 0,
+                result.benchmark_id,
+            ),
+        )
+        return results
+
     @staticmethod
     def _extract_dimension_from_dir(dim_dir: Path) -> str | None:
         """Extract dimension string from directory name (e.g., '2d' -> '2')."""
@@ -1371,7 +1782,7 @@ def _process_point_directory(point_dir: Path, dim: str) -> BenchmarkData | None:
     def _process_fallback_discovery(criterion_dir: Path) -> list[BenchmarkData]:
         """Recursively discover estimates.json files when structured search fails."""
         results = []
-        seen: set[tuple[int, str]] = set()
+        seen: set[str] = set()
 
         for estimates_file in criterion_dir.rglob("estimates.json"):
             parent_name = estimates_file.parent.name
@@ -1390,7 +1801,7 @@ def _process_fallback_discovery(criterion_dir: Path) -> list[BenchmarkData]:
 
             points = int(points_dir.name)
             dimension = f"{dim_match.group(1)}D"
-            key = (points, dimension)
+            key = f"{points}_{dimension}"
 
             # Prefer "new" over "base" when duplicates exist
             if key in seen and parent_name == "base":
@@ -1420,6 +1831,10 @@ def find_criterion_results(target_dir: Path) -> list[BenchmarkData]:
         if not criterion_dir.exists():
             return results
 
+        results = CriterionParser._process_ci_performance_suite_results(criterion_dir)
+        if results:
+            return results
+
         # Look for benchmark results in *d directories (group names can change)
         for dim_dir in sorted(p for p in criterion_dir.iterdir() if p.is_dir() and re.search(r"\d+[dD]$", p.name)):
             dim = CriterionParser._extract_dimension_from_dir(dim_dir)
@@ -1438,15 +1853,16 @@ def find_criterion_results(target_dir: Path) -> list[BenchmarkData]:
         if not results:
             results = CriterionParser._process_fallback_discovery(criterion_dir)
 
-        # Sort by dimension, then by point count
-        results.sort(key=lambda x: (int(x.dimension.rstrip("D")), x.points))
+        # Sort by dimension, then by point count. Unsized benchmarks sort after
+        # numeric workloads within the same dimension.
+        results.sort(key=lambda x: (int(x.dimension.rstrip("D")), x.points is None, x.points or 0))
         return results
 
 
 class BaselineGenerator:
     """Generate performance baselines from benchmark data."""
 
-    def __init__(self, project_root: Path, tag: str | None = None):
+    def __init__(self, project_root: Path, tag: str | None = None) -> None:
         """Initialize baseline generation for a project root and optional tag."""
         self.project_root = project_root
         self.hardware = HardwareInfo()
@@ -1474,7 +1890,7 @@ def generate_baseline(self, dev_mode: bool = False, output_file: Path | None = N
 
             # Run fresh benchmark - using secure subprocess wrapper
             if dev_mode:
-                run_cargo_command(
+                result = run_cargo_command(
                     [
                         "bench",
                         "--profile",
@@ -1489,12 +1905,13 @@ def generate_baseline(self, dev_mode: bool = False, output_file: Path | None = N
                     capture_output=True,
                 )
             else:
-                run_cargo_command(
+                result = run_cargo_command(
                     ["bench", "--profile", TRUSTED_BENCH_PROFILE, "--bench", "ci_performance_suite"],
                     cwd=self.project_root,
                     timeout=bench_timeout,
                     capture_output=True,
                 )
+            _write_ci_performance_manifest_ids(self.project_root, result.stdout)
 
             # Parse Criterion results
             target_dir = self.project_root / "target"
@@ -1526,7 +1943,7 @@ def generate_baseline(self, dev_mode: bool = False, output_file: Path | None = N
                 print("=== end stdout ===\n", file=sys.stderr)
             logger.exception("Error in generate_baseline")
             return False
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             logger.exception("Error in generate_baseline")
             return False
 
@@ -1540,7 +1957,7 @@ def _write_baseline_file(self, benchmark_results: list[BenchmarkData], output_fi
         try:
             # Use secure subprocess wrapper for git command
             git_commit = get_git_commit_hash(cwd=self.project_root)
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             git_commit = "unknown"
 
         hardware_info = self.hardware.format_hardware_info(cwd=self.project_root)
@@ -1568,7 +1985,7 @@ def _write_baseline_file(self, benchmark_results: list[BenchmarkData], output_fi
 class PerformanceComparator:
     """Compare current performance against baseline."""
 
-    def __init__(self, project_root: Path):
+    def __init__(self, project_root: Path) -> None:
         """Initialize comparison state for benchmark results under a project root."""
         self.project_root = project_root
         self.hardware = HardwareInfo()
@@ -1608,7 +2025,7 @@ def compare_with_baseline(
         try:
             # Run fresh benchmark - using secure subprocess wrapper
             if dev_mode:
-                run_cargo_command(
+                result = run_cargo_command(
                     [
                         "bench",
                         "--profile",
@@ -1623,12 +2040,13 @@ def compare_with_baseline(
                     capture_output=True,
                 )
             else:
-                run_cargo_command(
+                result = run_cargo_command(
                     ["bench", "--profile", TRUSTED_BENCH_PROFILE, "--bench", "ci_performance_suite"],
                     cwd=self.project_root,
                     timeout=bench_timeout,
                     capture_output=True,
                 )
+            _write_ci_performance_manifest_ids(self.project_root, result.stdout)
 
             # Parse current results
             target_dir = self.project_root / "target"
@@ -1667,7 +2085,7 @@ def compare_with_baseline(
             self._write_error_file(output_file, "Benchmark execution error", str(e))
             logger.exception("Error in compare_with_baseline")
             return False, False
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             self._write_error_file(output_file, "Benchmark execution error", str(e))
             logger.exception("Error in compare_with_baseline")
             return False, False
@@ -1682,14 +2100,22 @@ def _parse_baseline_file(self, baseline_content: str) -> dict[str, BenchmarkData
             line = lines[i].strip()
 
             # Look for benchmark sections
-            match = re.match(r"=== (\d+) Points \((\d+)D\) ===", line)
+            match = re.match(r"=== (?:(\d+) Points|Unsized Workload) \((\d+)D\) ===", line)
             if match:
-                points = int(match.group(1))
+                points = int(match.group(1)) if match.group(1) is not None else None
                 dimension = f"{match.group(2)}D"
+                benchmark_id = ""
+                next_line_index = i + 1
+
+                if next_line_index < len(lines):
+                    id_match = re.match(r"Benchmark ID:\s*(.+)", lines[next_line_index].strip())
+                    if id_match:
+                        benchmark_id = id_match.group(1).strip()
+                        next_line_index += 1
 
                 # Parse time line
-                if i + 1 < len(lines):
-                    time_line = lines[i + 1].strip()
+                if next_line_index < len(lines):
+                    time_line = lines[next_line_index].strip()
                     time_match = re.match(r"Time: \[([0-9.]+), ([0-9.]+), ([0-9.]+)\] (.+)", time_line)
                     if time_match:
                         time_low = float(time_match.group(1))
@@ -1701,8 +2127,8 @@ def _parse_baseline_file(self, baseline_content: str) -> dict[str, BenchmarkData
                         throughput_low = throughput_mean = throughput_high = None
                         throughput_unit = None
 
-                        if i + 2 < len(lines):
-                            thrpt_line = lines[i + 2].strip()
+                        if next_line_index + 1 < len(lines):
+                            thrpt_line = lines[next_line_index + 1].strip()
                             thrpt_match = re.match(r"Throughput: \[([0-9.]+), ([0-9.]+), ([0-9.]+)\] (.+)", thrpt_line)
                             if thrpt_match:
                                 throughput_low = float(thrpt_match.group(1))
@@ -1710,8 +2136,7 @@ def _parse_baseline_file(self, baseline_content: str) -> dict[str, BenchmarkData
                                 throughput_high = float(thrpt_match.group(3))
                                 throughput_unit = thrpt_match.group(4)
 
-                        key = f"{points}_{dimension}"
-                        benchmark = BenchmarkData(points, dimension).with_timing(time_low, time_mean, time_high, time_unit)
+                        benchmark = BenchmarkData(points, dimension, benchmark_id=benchmark_id).with_timing(time_low, time_mean, time_high, time_unit)
                         if throughput_mean is not None and throughput_low is not None and throughput_high is not None and throughput_unit is not None:
                             benchmark.with_throughput(
                                 throughput_low,
@@ -1722,13 +2147,13 @@ def _parse_baseline_file(self, baseline_content: str) -> dict[str, BenchmarkData
                         else:
                             logger.debug(
                                 "Missing throughput data for %s: low=%s mean=%s high=%s unit=%s",
-                                key,
+                                benchmark.comparison_key,
                                 throughput_low,
                                 throughput_mean,
                                 throughput_high,
                                 throughput_unit,
                             )
-                        results[key] = benchmark
+                        results[benchmark.comparison_key] = benchmark
 
             i += 1
 
@@ -1783,7 +2208,7 @@ def _prepare_comparison_metadata(self, baseline_content: str) -> dict[str, str]:
 
         try:
             git_commit = get_git_commit_hash(cwd=self.project_root)
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             git_commit = "unknown"
 
         # Parse baseline metadata
@@ -1871,14 +2296,23 @@ def _write_comparison_header(self, f, metadata: dict[str, str], hardware_report:
             f.write(f"{sampling_warning}\n\n")
         f.write(hardware_report)
 
+    @staticmethod
+    def _matching_baseline(current: BenchmarkData, baseline_results: dict[str, BenchmarkData]) -> BenchmarkData | None:
+        """Return the matching baseline entry, using legacy keys only for legacy current IDs."""
+        baseline_benchmark = baseline_results.get(current.comparison_key)
+        if baseline_benchmark is not None or current.benchmark_id:
+            return baseline_benchmark
+        if current.points is None:
+            return None
+        return baseline_results.get(f"{current.points}_{current.dimension}")
+
     def _write_performance_comparison(self, f: TextIO, current_results: list[BenchmarkData], baseline_results: dict[str, BenchmarkData]) -> bool:
         """Write performance comparison section and return whether average regression exceeds threshold."""
         time_changes = []  # Track all time changes for average calculation
         individual_regressions = 0
 
         for current_benchmark in current_results:
-            key = f"{current_benchmark.points}_{current_benchmark.dimension}"
-            baseline_benchmark = baseline_results.get(key)
+            baseline_benchmark = self._matching_baseline(current_benchmark, baseline_results)
 
             self._write_benchmark_header(f, current_benchmark)
             self._write_current_benchmark_data(f, current_benchmark)
@@ -1968,7 +2402,9 @@ def _write_performance_comparison(self, f: TextIO, current_results: list[Benchma
 
     def _write_benchmark_header(self, f, benchmark: BenchmarkData) -> None:
         """Write benchmark section header."""
-        f.write(f"=== {benchmark.points} Points ({benchmark.dimension}) ===\n")
+        f.write(f"{benchmark.header_line()}\n")
+        if benchmark.benchmark_id:
+            f.write(f"Benchmark ID: {benchmark.benchmark_id}\n")
 
     def _write_current_benchmark_data(self, f, benchmark: BenchmarkData) -> None:
         """Write current benchmark data."""
@@ -2072,7 +2508,7 @@ def _write_error_file(self, output_file: Path, error_title: str, error_detail: s
                 f.write(f"Details: {error_detail}\n\n")
                 f.write("This error prevented the benchmark comparison from completing successfully.\n")
                 f.write("Please check the CI logs for more information.\n")
-        except Exception:
+        except OSError:
             logger.exception("Failed to write error file")
 
 
@@ -2151,7 +2587,7 @@ def create_metadata(tag_name: str, output_dir: Path) -> bool:
             print(f"📦 Created metadata file: {metadata_file}")
             return True
 
-        except Exception as e:
+        except (OSError, TypeError, ValueError) as e:
             print(f"❌ Failed to create metadata: {e}", file=sys.stderr)
             return False
 
@@ -2187,7 +2623,7 @@ def display_baseline_summary(baseline_file: Path) -> bool:
 
             return True
 
-        except Exception as e:
+        except OSError as e:
             print(f"❌ Failed to display baseline summary: {e}", file=sys.stderr)
             return False
 
@@ -2357,7 +2793,7 @@ def _version_key(p: Path) -> tuple[int, Version | str, str]:
                     version = Version(version_str)
                     # Valid version: priority 1 (sorts first when reversed)
                     return (1, version, p.name)
-                except Exception as e:
+                except InvalidVersion as e:
                     # Invalid version format, treat as non-semver
                     logger.debug("Invalid version format in %s: %s", p.name, e)
             # Fallback: put non-matching names last (priority 0, sorts after valid versions when reversed)
@@ -2493,7 +2929,7 @@ def determine_benchmark_skip(baseline_commit: str, current_commit: str) -> tuple
 
         except subprocess.CalledProcessError:
             return False, "baseline_commit_not_found"
-        except Exception:
+        except _RECOVERABLE_CLI_ERRORS:
             return False, "error_checking_changes"
 
     @staticmethod
@@ -2560,7 +2996,7 @@ def run_regression_test(baseline_path: Path, bench_timeout: int = 1800, dev_mode
             print("✅ No significant performance regressions detected")
             return True
 
-        except Exception as e:
+        except _RECOVERABLE_CLI_ERRORS as e:
             print(f"❌ Error running regression test: {e}", file=sys.stderr)
             return False
 
@@ -2735,7 +3171,7 @@ def _parse_baseline_metadata(baseline_content: str) -> dict[str, str]:
 
 def _sorted_benchmark_list(results: Mapping[str, "BenchmarkData"]) -> list["BenchmarkData"]:
     """Return benchmarks sorted by (dimension, point count) for stable output."""
-    return sorted(results.values(), key=lambda b: (int(b.dimension.rstrip("D")), b.points))
+    return sorted(results.values(), key=lambda b: (int(b.dimension.rstrip("D")), b.points is None, b.points or 0))
 
 
 def _find_downloaded_baseline_file(download_dir: Path) -> Path:
@@ -3068,7 +3504,11 @@ def _add_performance_summary_subcommands(subparsers: "argparse._SubParsersAction
     """Add performance summary generation subcommands."""
     perf_summary_parser = subparsers.add_parser("generate-summary", help="Generate performance summary markdown")
     perf_summary_parser.add_argument("--output", type=Path, help="Output file path (defaults to benches/PERFORMANCE_RESULTS.md)")
-    perf_summary_parser.add_argument("--run-benchmarks", action="store_true", help="Run fresh circumsphere benchmarks before generating summary")
+    perf_summary_parser.add_argument(
+        "--run-benchmarks",
+        action="store_true",
+        help="Run fresh ci_performance_suite and circumsphere benchmarks before generating summary",
+    )
     perf_summary_parser.add_argument(
         "--profile",
         default=TRUSTED_BENCH_PROFILE,
@@ -3348,7 +3788,7 @@ def execute_command(args: argparse.Namespace, project_root: Path) -> None:
         return
 
 
-def main():
+def main() -> None:
     """Command-line interface for benchmark utilities."""
     parser = create_argument_parser()
     args = parser.parse_args()
diff --git a/scripts/ci/capture_profiling_metadata.sh b/scripts/ci/capture_profiling_metadata.sh
new file mode 100755
index 00000000..c67d5d6f
--- /dev/null
+++ b/scripts/ci/capture_profiling_metadata.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+mkdir -p profiling-results
+
+declared_toolchain="$(
+	grep -E '^[[:space:]]*channel[[:space:]]*=' rust-toolchain.toml 2>/dev/null |
+		head -n 1 |
+		cut -d '=' -f 2 |
+		tr -d ' "' ||
+		true
+)"
+rust_version="$(
+	grep -E '^rust-version[[:space:]]*=' Cargo.toml 2>/dev/null |
+		head -n 1 |
+		cut -d '=' -f 2 |
+		tr -d ' "' ||
+		true
+)"
+
+profiling_mode="${PROFILE_METADATA_MODE:-}"
+if [[ -z "$profiling_mode" ]]; then
+	profiling_mode="production"
+	if [[ "${PROFILING_DEV_MODE:-}" == "1" ]]; then
+		profiling_mode="development"
+	fi
+fi
+
+benchmark_filter="${BENCH_FILTER_VALUE:-${PROFILE_METADATA_FILTER:-All benchmarks}}"
+metadata_title="${PROFILE_METADATA_TITLE:-Profiling Environment}"
+
+{
+	echo "# ${metadata_title}"
+	echo
+	echo "- Code ref: ${GITHUB_REF_NAME:-local}"
+	echo "- Commit: $(git rev-parse HEAD)"
+	echo "- Declared rust-toolchain.toml channel: ${declared_toolchain}"
+	echo "- Cargo.toml rust-version: ${rust_version}"
+	echo "- rustc: $(rustc --version)"
+	echo "- cargo: $(cargo --version)"
+	echo "- Cargo profile: perf"
+	echo "- Benchmark filter: ${benchmark_filter}"
+	echo "- Profiling mode: ${profiling_mode}"
+	echo "- Runner: ${RUNNER_OS:-$(uname -s)}"
+} >profiling-results/environment_metadata.md
diff --git a/scripts/compare_storage_backends.py b/scripts/compare_storage_backends.py
index 40fed144..9b8207f6 100644
--- a/scripts/compare_storage_backends.py
+++ b/scripts/compare_storage_backends.py
@@ -29,6 +29,7 @@
 import logging
 import re
 import shutil
+import subprocess
 import sys
 from datetime import UTC, datetime
 from json import loads
@@ -36,18 +37,28 @@
 
 try:
     from benchmark_utils import TRUSTED_BENCH_PROFILE  # type: ignore[import-not-found]
-    from subprocess_utils import find_project_root, run_cargo_command  # type: ignore[import-not-found]
+    from subprocess_utils import ExecutableNotFoundError, find_project_root, run_cargo_command  # type: ignore[import-not-found]
 except ModuleNotFoundError:
     from scripts.benchmark_utils import TRUSTED_BENCH_PROFILE  # type: ignore[no-redef,import-not-found]
-    from scripts.subprocess_utils import find_project_root, run_cargo_command  # type: ignore[no-redef,import-not-found]
+    from scripts.subprocess_utils import ExecutableNotFoundError, find_project_root, run_cargo_command  # type: ignore[no-redef,import-not-found]
 
 logger = logging.getLogger(__name__)
 
+_RECOVERABLE_COMPARISON_ERRORS: tuple[type[BaseException], ...] = (
+    ExecutableNotFoundError,
+    OSError,
+    RuntimeError,
+    TypeError,
+    ValueError,
+    KeyError,
+    subprocess.SubprocessError,
+)
+
 
 class StorageBackendComparator:
     """Compare performance between SlotMap and DenseSlotMap storage backends."""
 
-    def __init__(self, project_root: Path):
+    def __init__(self, project_root: Path) -> None:
         """Initialize with project root directory."""
         self.project_root = project_root
         self.criterion_dir = project_root / "target" / "criterion"
@@ -117,7 +128,7 @@ def run_comparison(
             print(f"\n✅ Comparison report saved: {output_path}")
             return True
 
-        except Exception as e:
+        except _RECOVERABLE_COMPARISON_ERRORS as e:
             print(f"❌ Comparison failed: {e}", file=sys.stderr)
             logger.exception("Comparison failed")
             return False
@@ -199,7 +210,7 @@ def _run_benchmark(self, benchmark_name: str, use_dense_slotmap: bool, dev_mode:
 
             return results
 
-        except Exception:
+        except _RECOVERABLE_COMPARISON_ERRORS:
             logger.exception("Benchmark execution failed")
             return None
 
@@ -247,10 +258,10 @@ def _parse_criterion_output(self, output: str) -> dict:
                         )
                         json_found = True
                         logger.debug("Parsed JSON for benchmark: %s", name)
-                    except Exception as e:
+                    except (OSError, KeyError, TypeError, ValueError) as e:
                         logger.debug("Failed to parse JSON from %s: %s", path, e)
                         continue
-        except Exception:
+        except OSError:
             logger.debug("JSON parsing failed, falling back to regex")
 
         # Fallback to stdout regex parsing if no JSON found
@@ -446,7 +457,7 @@ def _generate_comparison_report(
         return "\n".join(lines)
 
 
-def main():
+def main() -> None:
     """Main entry point for storage backend comparison."""
     parser = argparse.ArgumentParser(
         description="Compare SlotMap vs DenseSlotMap storage backend performance",
@@ -510,7 +521,7 @@ def main():
 
         sys.exit(0 if success else 1)
 
-    except Exception as e:
+    except _RECOVERABLE_COMPARISON_ERRORS as e:
         print(f"❌ Error: {e}", file=sys.stderr)
         logger.exception("Fatal error")
         sys.exit(1)
diff --git a/scripts/hardware_utils.py b/scripts/hardware_utils.py
index 9be2244e..cfc22348 100755
--- a/scripts/hardware_utils.py
+++ b/scripts/hardware_utils.py
@@ -37,7 +37,7 @@
 class HardwareInfo:
     """Cross-platform hardware information detection."""
 
-    def __init__(self):
+    def __init__(self) -> None:
         """Initialize cached platform identifiers for hardware probes."""
         self.os_type = platform.system()
         self.machine = platform.machine()
@@ -118,33 +118,29 @@ def _get_linux_cpu_model(self) -> str:
 
         return "Unknown"
 
-    def _get_linux_cpu_cores(self) -> str:
-        """
-        Get CPU core count on Linux.
-
-        Returns:
-            CPU core count or "Unknown"
-        """
-        if not shutil.which("lscpu"):
-            # Fallback: parse physical core count from /proc/cpuinfo
-            try:
-                physical_cores: set[tuple[str, str]] = set()
-                with open("/proc/cpuinfo", encoding="utf-8") as f:
-                    physical_id = core_id = None
-                    for line in f:
-                        if line.startswith("physical id"):
-                            physical_id = line.split(":", 1)[1].strip()
-                        elif line.startswith("core id"):
-                            core_id = line.split(":", 1)[1].strip()
-                        if physical_id is not None and core_id is not None:
-                            physical_cores.add((physical_id, core_id))
-                            physical_id = core_id = None
-                if physical_cores:
-                    return str(len(physical_cores))
-            except (FileNotFoundError, PermissionError, ValueError):
-                return "Unknown"
+    def _get_linux_cpu_cores_from_proc(self) -> str:
+        """Parse physical CPU cores from /proc/cpuinfo when lscpu is unavailable."""
+        try:
+            physical_cores: set[tuple[str, str]] = set()
+            with open("/proc/cpuinfo", encoding="utf-8") as f:
+                physical_id = core_id = None
+                for line in f:
+                    if line.startswith("physical id"):
+                        physical_id = line.split(":", 1)[1].strip()
+                    elif line.startswith("core id"):
+                        core_id = line.split(":", 1)[1].strip()
+                    if physical_id is not None and core_id is not None:
+                        physical_cores.add((physical_id, core_id))
+                        physical_id = core_id = None
+            if physical_cores:
+                return str(len(physical_cores))
+        except (FileNotFoundError, PermissionError, ValueError):
             return "Unknown"
 
+        return "Unknown"
+
+    def _get_linux_cpu_cores_from_lscpu(self) -> str:
+        """Parse physical CPU cores from lscpu output."""
         try:
             lscpu_output = self._run_command(["lscpu"])
             cores_per_socket = None
@@ -159,10 +155,21 @@ def _get_linux_cpu_cores(self) -> str:
             if cores_per_socket is not None and sockets is not None:
                 return str(cores_per_socket * sockets)
         except (subprocess.CalledProcessError, ValueError, IndexError):
-            pass
+            return "Unknown"
 
         return "Unknown"
 
+    def _get_linux_cpu_cores(self) -> str:
+        """
+        Get CPU core count on Linux.
+
+        Returns:
+            CPU core count or "Unknown"
+        """
+        if not shutil.which("lscpu"):
+            return self._get_linux_cpu_cores_from_proc()
+        return self._get_linux_cpu_cores_from_lscpu()
+
     def _get_linux_cpu_threads(self) -> str:
         """
         Get CPU thread count on Linux.
@@ -353,7 +360,7 @@ def get_rust_info(self, cwd: Path | None = None) -> tuple[str, str]:
                         break
         except subprocess.CalledProcessError as e:
             logger.debug("rustc command failed: %s", e)
-        except Exception as e:
+        except (OSError, subprocess.SubprocessError) as e:
             logger.debug("Failed to get Rust info: %s", e)
 
         return rust_version, rust_target
@@ -638,7 +645,7 @@ def _extract_memory_value(memory_str: str) -> float | None:
         return None
 
 
-def main():
+def main() -> None:
     """Command-line interface for hardware utilities."""
     parser = argparse.ArgumentParser(description="Cross-platform hardware information detection and comparison")
     parser.add_argument("command", choices=["info", "kv", "compare"], help="Command to run")
diff --git a/scripts/postprocess_changelog.py b/scripts/postprocess_changelog.py
index 8fd91726..1ee4ee3b 100644
--- a/scripts/postprocess_changelog.py
+++ b/scripts/postprocess_changelog.py
@@ -453,6 +453,42 @@ def _process_code_fence(line: str, result: list[str], in_code_block: bool) -> tu
     return True, in_code_block
 
 
+def _update_entry_summary(line: str, current_entry_summary: str | None) -> str | None:
+    """Track the active changelog entry summary for squash-body cleanup."""
+    if line.startswith("- ") and _COMMIT_LINK_RE.search(line):
+        return _plain_summary(line)
+    if line.startswith(("### ", "## ", "# ")):
+        return None
+    return current_entry_summary
+
+
+def _should_skip_duplicate_heading(
+    line: str,
+    result: list[str],
+    current_entry_summary: str | None,
+    is_isolated_body_heading: bool,
+) -> tuple[bool, bool]:
+    """Return whether to skip a duplicate squash heading and the following blank."""
+    if is_isolated_body_heading and _is_duplicate_squash_heading(line, current_entry_summary):
+        return True, bool(result and not result[-1].strip())
+    return False, False
+
+
+def _normalize_body_line(line: str, lines: list[str], idx: int, result: list[str], current_entry_summary: str | None) -> str:
+    """Apply markdown hygiene transforms to a non-code line."""
+    is_isolated_body_heading = _is_isolated_body_heading(lines, idx)
+    line = _deindent_orphan(line, lines, idx)
+    line = _normalize_indented_heading(line)
+
+    if is_isolated_body_heading:
+        line = _normalize_squash_heading(line, nested=current_entry_summary is not None)
+
+    if _needs_blank_before(line.lstrip(), result):
+        result.append("")
+
+    return _reflow_line(line) if len(line) > MAX_LINE_WIDTH else line
+
+
 def postprocess(path: Path) -> None:
     """Read *path*, apply hygiene fixes, and write it back."""
     text = path.read_text(encoding="utf-8")
@@ -486,44 +522,25 @@ def postprocess(path: Path) -> None:
         # --- MD030: normalise spaces after list marker ---
         line = _LIST_MARKER_SPACE_RE.sub(r"\1 ", line)
 
-        if line.startswith("- ") and _COMMIT_LINK_RE.search(line):
-            current_entry_summary = _plain_summary(line)
-        elif line.startswith(("### ", "## ", "# ")):
-            current_entry_summary = None
-
+        current_entry_summary = _update_entry_summary(line, current_entry_summary)
         is_isolated_body_heading = _is_isolated_body_heading(lines, idx)
 
         # --- GitHub squash bodies: collapse duplicate pseudo-headings ---
-        if is_isolated_body_heading and _is_duplicate_squash_heading(line, current_entry_summary):
-            drop_next_blank = bool(result and not result[-1].strip())
+        should_skip, next_drop_blank = _should_skip_duplicate_heading(
+            line,
+            result,
+            current_entry_summary,
+            is_isolated_body_heading,
+        )
+        if should_skip:
+            drop_next_blank = next_drop_blank
             continue
         if drop_next_blank and not line.strip():
             drop_next_blank = False
             continue
         drop_next_blank = False
 
-        # --- MD007: de-indent orphaned body list items ---
-        line = _deindent_orphan(line, lines, idx)
-        stripped = line.lstrip()
-
-        # --- MD023: headings must start at the beginning of the line ---
-        line = _normalize_indented_heading(line)
-        stripped = line.lstrip()
-
-        # --- GitHub squash bodies: render pseudo-headings as prose ---
-        if is_isolated_body_heading:
-            line = _normalize_squash_heading(line, nested=current_entry_summary is not None)
-        stripped = line.lstrip()
-
-        # --- MD032: blank line before a list item that follows prose ---
-        if _needs_blank_before(stripped, result):
-            result.append("")
-
-        # --- reflow long lines ---
-        if len(line) > MAX_LINE_WIDTH:
-            result.append(_reflow_line(line))
-        else:
-            result.append(line)
+        result.append(_normalize_body_line(line, lines, idx, result, current_entry_summary))
 
     # 1. Reassemble and strip trailing blank lines.
     text = "\n".join(result)
diff --git a/scripts/tests/conftest.py b/scripts/tests/conftest.py
index 35af23eb..3a102336 100644
--- a/scripts/tests/conftest.py
+++ b/scripts/tests/conftest.py
@@ -5,10 +5,11 @@
 """
 
 import os
+import subprocess
 import sys
-from contextlib import contextmanager
+from collections.abc import Callable, Iterator
+from contextlib import AbstractContextManager, contextmanager
 from pathlib import Path
-from unittest.mock import Mock
 
 import pytest
 
@@ -20,7 +21,7 @@
 
 
 @pytest.fixture
-def temp_chdir():
+def temp_chdir() -> Callable[[os.PathLike | str], AbstractContextManager[None]]:
     """
     Pytest fixture for temporarily changing working directory.
 
@@ -36,7 +37,7 @@ def test_something(temp_chdir):
     """
 
     @contextmanager
-    def _temp_chdir_context(path: os.PathLike | str):
+    def _temp_chdir_context(path: os.PathLike | str) -> Iterator[None]:
         """Context manager for temporarily changing working directory."""
         original_cwd = Path.cwd()
         target = Path(path)
@@ -52,11 +53,11 @@ def _temp_chdir_context(path: os.PathLike | str):
 
 
 @pytest.fixture
-def mock_git_command_result():
+def mock_git_command_result() -> Callable[[str], subprocess.CompletedProcess[str]]:
     """
     Pytest fixture for creating mock CompletedProcess objects for git commands.
 
-    Returns a function that creates a mock object with the specified stdout output.
+    Returns a function that creates a CompletedProcess with the specified stdout output.
     This standardizes git command mocking across all test files.
 
     Usage:
@@ -65,12 +66,8 @@ def test_something(mock_git_command_result):
             # mock_result.stdout.strip() will return "v0.4.2"
     """
 
-    def _create_mock_result(output: str) -> Mock:
-        """Create a mock CompletedProcess object for git commands."""
-        mock_result = Mock()
-        mock_result.stdout = output  # mimic CompletedProcess.stdout (str)
-        mock_result.returncode = 0
-        mock_result.args = ["git"]
-        return mock_result
+    def _create_mock_result(output: str) -> subprocess.CompletedProcess[str]:
+        """Create a typed CompletedProcess object for git commands."""
+        return subprocess.CompletedProcess(args=["git"], returncode=0, stdout=output, stderr="")
 
     return _create_mock_result
diff --git a/scripts/tests/test_benchmark_models.py b/scripts/tests/test_benchmark_models.py
index ed5107b9..5b8c529c 100644
--- a/scripts/tests/test_benchmark_models.py
+++ b/scripts/tests/test_benchmark_models.py
@@ -27,7 +27,7 @@
 class TestBenchmarkData:
     """Test cases for BenchmarkData class."""
 
-    def test_init(self):
+    def test_init(self) -> None:
         """Test BenchmarkData initialization."""
         data = BenchmarkData(points=1000, dimension="2D")
         assert data.points == 1000
@@ -35,7 +35,7 @@ def test_init(self):
         assert data.time_mean == 0.0
         assert data.throughput_mean is None
 
-    def test_with_timing_fluent_interface(self):
+    def test_with_timing_fluent_interface(self) -> None:
         """Test fluent interface for setting timing data."""
         data = BenchmarkData(1000, "3D").with_timing(100.0, 110.0, 120.0, "µs")
 
@@ -44,7 +44,7 @@ def test_with_timing_fluent_interface(self):
         assert data.time_high == 120.0
         assert data.time_unit == "µs"
 
-    def test_with_throughput_fluent_interface(self):
+    def test_with_throughput_fluent_interface(self) -> None:
         """Test fluent interface for setting throughput data."""
         data = BenchmarkData(1000, "2D").with_throughput(800.0, 900.0, 1000.0, "Kelem/s")
 
@@ -53,7 +53,7 @@ def test_with_throughput_fluent_interface(self):
         assert data.throughput_high == 1000.0
         assert data.throughput_unit == "Kelem/s"
 
-    def test_to_baseline_format_with_timing_only(self):
+    def test_to_baseline_format_with_timing_only(self) -> None:
         """Test baseline format output with timing data only."""
         data = BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs")
 
@@ -63,7 +63,7 @@ def test_to_baseline_format_with_timing_only(self):
 """
         assert result == expected
 
-    def test_to_baseline_format_with_timing_and_throughput(self):
+    def test_to_baseline_format_with_timing_and_throughput(self) -> None:
         """Test baseline format output with both timing and throughput data."""
         data = BenchmarkData(1000, "3D").with_timing(100.0, 110.0, 120.0, "µs").with_throughput(800.0, 900.0, 1000.0, "Kelem/s")
 
@@ -74,11 +74,28 @@ def test_to_baseline_format_with_timing_and_throughput(self):
 """
         assert result == expected
 
+    def test_to_baseline_format_with_unsized_workload(self) -> None:
+        """Test baseline format output for workloads without numeric input size."""
+        data = BenchmarkData(None, "4D", benchmark_id="bistellar_flips_4d/k2_roundtrip").with_timing(0.8, 0.95, 1.1, "µs")
+
+        result = data.to_baseline_format()
+
+        assert "=== Unsized Workload (4D) ===" in result
+        assert "Benchmark ID: bistellar_flips_4d/k2_roundtrip" in result
+        assert "0 Points" not in result
+
+    def test_unsized_comparison_key_requires_benchmark_id(self) -> None:
+        """Test unsized workloads cannot silently collide on comparison keys."""
+        data = BenchmarkData(None, "4D")
+
+        with pytest.raises(ValueError, match="Unsized benchmarks require benchmark_id"):
+            _ = data.comparison_key
+
 
 class TestCircumspherePerformanceData:
     """Test cases for CircumspherePerformanceData class."""
 
-    def test_init(self):
+    def test_init(self) -> None:
         """Test CircumspherePerformanceData initialization."""
         data = CircumspherePerformanceData(method="insphere", time_ns=1000.0)
         assert data.method == "insphere"
@@ -90,7 +107,7 @@ def test_init(self):
 class TestCircumsphereTestCase:
     """Test cases for CircumsphereTestCase class."""
 
-    def test_init_and_get_winner(self):
+    def test_init_and_get_winner(self) -> None:
         """Test CircumsphereTestCase initialization and winner detection."""
         methods = {
             "insphere": CircumspherePerformanceData("insphere", 1000.0),
@@ -103,7 +120,7 @@ def test_init_and_get_winner(self):
         assert test_case.dimension == "3D"
         assert test_case.get_winner() == "insphere_lifted"  # Lowest time
 
-    def test_get_relative_performance(self):
+    def test_get_relative_performance(self) -> None:
         """Test relative performance calculation."""
         methods = {
             "insphere": CircumspherePerformanceData("insphere", 1000.0),
@@ -117,12 +134,12 @@ def test_get_relative_performance(self):
         assert test_case.get_relative_performance("insphere") == pytest.approx(1.25)  # 1000/800
         assert test_case.get_relative_performance("insphere_distance") == pytest.approx(1.5)  # 1200/800
 
-    def test_get_winner_empty_methods(self):
+    def test_get_winner_empty_methods(self) -> None:
         """Test get_winner with empty methods dict."""
         test_case = CircumsphereTestCase("test_empty", "3D", {})
         assert test_case.get_winner() is None
 
-    def test_get_relative_performance_nonexistent_method(self):
+    def test_get_relative_performance_nonexistent_method(self) -> None:
         """Test get_relative_performance with non-existent method returns 0.0."""
         methods = {
             "insphere": CircumspherePerformanceData("insphere", 1000.0),
@@ -132,7 +149,7 @@ def test_get_relative_performance_nonexistent_method(self):
         # Should return 0.0 for non-existent method
         assert test_case.get_relative_performance("nonexistent_method") == pytest.approx(0.0)
 
-    def test_version_comparison_data_division_by_zero_edge_case(self):
+    def test_version_comparison_data_division_by_zero_edge_case(self) -> None:
         """Test VersionComparisonData handles edge case gracefully."""
         # This doesn't raise an exception but demonstrates pytest usage for edge case testing
         comparison = VersionComparisonData(
@@ -152,7 +169,7 @@ def test_version_comparison_data_division_by_zero_edge_case(self):
 class TestVersionComparisonData:
     """Test cases for VersionComparisonData class."""
 
-    def test_improvement_calculation(self):
+    def test_improvement_calculation(self) -> None:
         """Test improvement percentage calculation."""
         comparison = VersionComparisonData(
             test_case="Basic 3D",
@@ -167,7 +184,7 @@ def test_improvement_calculation(self):
         expected_improvement = ((808.0 - 805.0) / 808.0) * 100
         assert comparison.improvement_pct == pytest.approx(expected_improvement, abs=0.001)
 
-    def test_zero_old_value(self):
+    def test_zero_old_value(self) -> None:
         """Test improvement calculation with zero old value."""
         comparison = VersionComparisonData(
             test_case="Basic 3D",
@@ -185,7 +202,7 @@ def test_zero_old_value(self):
 class TestParsingFunctions:
     """Test cases for parsing functions."""
 
-    def test_extract_benchmark_data(self):
+    def test_extract_benchmark_data(self) -> None:
         """Test extracting benchmark data from baseline content."""
         baseline_content = """Date: 2024-01-15 10:30:00 UTC
 Git commit: abc123def456
@@ -217,7 +234,7 @@ def test_extract_benchmark_data(self):
         assert second.dimension == "3D"
         assert second.time_mean == 550.0
 
-    def test_parse_benchmark_header(self):
+    def test_parse_benchmark_header(self) -> None:
         """Test parsing benchmark header lines."""
         # Valid header
         result = parse_benchmark_header("=== 1000 Points (2D) ===")
@@ -225,11 +242,16 @@ def test_parse_benchmark_header(self):
         assert result.points == 1000
         assert result.dimension == "2D"
 
+        result = parse_benchmark_header("=== Unsized Workload (4D) ===")
+        assert result is not None
+        assert result.points is None
+        assert result.dimension == "4D"
+
         # Invalid header
         result = parse_benchmark_header("Invalid header")
         assert result is None
 
-    def test_parse_time_data(self):
+    def test_parse_time_data(self) -> None:
         """Test parsing time data lines."""
         benchmark = BenchmarkData(1000, "2D")
 
@@ -244,7 +266,7 @@ def test_parse_time_data(self):
         success = parse_time_data(benchmark2, "Invalid time data")
         assert success is False
 
-    def test_parse_throughput_data(self):
+    def test_parse_throughput_data(self) -> None:
         """Test parsing throughput data lines."""
         benchmark = BenchmarkData(1000, "2D")
 
@@ -263,7 +285,7 @@ def test_parse_throughput_data(self):
 class TestFormattingFunctions:
     """Test cases for formatting functions."""
 
-    def test_format_benchmark_tables(self):
+    def test_format_benchmark_tables(self) -> None:
         """Test formatting benchmark data as markdown tables."""
         # Create test benchmarks
         benchmarks = [
@@ -288,7 +310,45 @@ def test_format_benchmark_tables(self):
         assert "| 5000 |" in markdown_content  # Should contain the 5000 point row
         assert "4.5x" in markdown_content  # Scaling: 500/110 ≈ 4.5
 
-    def test_format_time_value(self):
+    def test_format_benchmark_tables_includes_benchmark_ids(self) -> None:
+        """Test expanded benchmark IDs are shown in baseline summary tables."""
+        benchmarks = [
+            BenchmarkData(50, "3D", benchmark_id="boundary_facets/boundary_facets_3d/50")
+            .with_timing(9.0, 10.0, 11.0, "µs")
+            .with_throughput(4.545, 5.0, 5.556, "Kelem/s"),
+            BenchmarkData(50, "3D", benchmark_id="validation/validate_3d/50").with_timing(
+                19.0,
+                20.0,
+                21.0,
+                "µs",
+            ),
+        ]
+
+        lines = format_benchmark_tables(benchmarks)
+        markdown_content = "\n".join(lines)
+
+        assert "| Benchmark ID | Points | Time (mean) | Throughput (mean) | Scaling |" in markdown_content
+        assert "| `boundary_facets/boundary_facets_3d/50` | 50 | 10.00 µs | 5.00 Kelem/s | N/A |" in markdown_content
+        assert "| `validation/validate_3d/50` | 50 | 20.00 µs | N/A | N/A |" in markdown_content
+
+    def test_format_benchmark_tables_renders_unsized_points(self) -> None:
+        """Test unsized workloads render without fake numeric point counts."""
+        benchmarks = [
+            BenchmarkData(None, "4D", benchmark_id="bistellar_flips_4d/k2_roundtrip").with_timing(
+                0.8,
+                0.95,
+                1.1,
+                "µs",
+            ),
+        ]
+
+        lines = format_benchmark_tables(benchmarks)
+        markdown_content = "\n".join(lines)
+
+        assert "| `bistellar_flips_4d/k2_roundtrip` | n/a | 0.950 µs | N/A | N/A |" in markdown_content
+        assert "0 Points" not in markdown_content
+
+    def test_format_time_value(self) -> None:
         """Test formatting time values with appropriate precision."""
         # Test zero and negative values (should return N/A)
         assert format_time_value(0.0, "µs") == "N/A"
@@ -301,7 +361,7 @@ def test_format_time_value(self):
         assert format_time_value(2500.0, "ms") == "2.5000 s"  # Converts to s
         assert format_time_value(50000.0, "ms") == "50.0000 s"  # Large values convert to s
 
-    def test_format_throughput_value(self):
+    def test_format_throughput_value(self) -> None:
         """Test formatting throughput values with appropriate precision."""
         # Test different value ranges
         assert format_throughput_value(0.5, "Kelem/s") == "0.500 Kelem/s"
@@ -312,7 +372,7 @@ def test_format_throughput_value(self):
         assert format_throughput_value(None, "Kelem/s") == "N/A"
         assert format_throughput_value(110.0, None) == "N/A"
 
-    def test_format_time_value_with_unit_aliases(self):
+    def test_format_time_value_with_unit_aliases(self) -> None:
         """Test time value formatting with microsecond unit aliases."""
         # Test microsecond alias normalization
         assert format_time_value(500.0, "us") == "500.00 µs"  # us -> µs
@@ -323,7 +383,7 @@ def test_format_time_value_with_unit_aliases(self):
         assert format_time_value(1500.0, "us") == "1.500 ms"  # us -> µs -> ms conversion
         assert format_time_value(2500.0, "μs") == "2.500 ms"  # μs -> µs -> ms conversion
 
-    def test_parse_time_data_with_scientific_notation(self):
+    def test_parse_time_data_with_scientific_notation(self) -> None:
         """Test parsing time data with scientific notation and flexible formatting."""
         benchmark = BenchmarkData(1000, "3D")
 
@@ -346,7 +406,7 @@ def test_parse_time_data_with_scientific_notation(self):
         assert benchmark3.time_mean == 110.0
         assert benchmark3.time_unit == "µs"
 
-    def test_parse_throughput_data_with_scientific_notation(self):
+    def test_parse_throughput_data_with_scientific_notation(self) -> None:
         """Test parsing throughput data with scientific notation and flexible formatting."""
         benchmark = BenchmarkData(1000, "2D")
 
@@ -363,7 +423,7 @@ def test_parse_throughput_data_with_scientific_notation(self):
         assert benchmark2.throughput_mean == 9090.9
         assert benchmark2.throughput_unit == "Kelem/s"
 
-    def test_format_benchmark_tables_dimension_sorting(self):
+    def test_format_benchmark_tables_dimension_sorting(self) -> None:
         """Test that dimensions are sorted numerically rather than lexically."""
         # Create benchmarks with dimensions that would sort incorrectly lexically
         benchmarks = [
@@ -385,7 +445,7 @@ def test_format_benchmark_tables_dimension_sorting(self):
         # Verify they appear in numeric order: 1D < 2D < 3D < 10D
         assert pos_1d < pos_2d < pos_3d < pos_10d
 
-    def test_format_benchmark_tables_mixed_dimension_formats(self):
+    def test_format_benchmark_tables_mixed_dimension_formats(self) -> None:
         """Test dimension sorting with mixed formats and edge cases."""
         benchmarks = [
             BenchmarkData(1000, "2D").with_timing(50.0, 55.0, 60.0, "µs"),
@@ -406,7 +466,7 @@ def test_format_benchmark_tables_mixed_dimension_formats(self):
         # Numeric dimensions should come first (1d, 2D, 3D), then non-numeric (custom_format)
         assert pos_1d < pos_2d < pos_3d < pos_custom
 
-    def test_format_benchmark_tables_scaling_baseline_with_zero_first_entry(self):
+    def test_format_benchmark_tables_scaling_baseline_with_zero_first_entry(self) -> None:
         """Test scaling baseline calculation when first entry has zero/empty time.
 
         This tests the fix for the issue where using 1.0 as fallback when the
@@ -433,7 +493,7 @@ def test_format_benchmark_tables_scaling_baseline_with_zero_first_entry(self):
         # Should not contain inflated scaling that would result from 1.0 fallback
         assert "500.0x" not in markdown_content  # This would be 500/1.0 if bug existed
 
-    def test_format_benchmark_tables_scaling_baseline_all_zero_times(self):
+    def test_format_benchmark_tables_scaling_baseline_all_zero_times(self) -> None:
         """Test scaling baseline calculation when all entries have zero/empty time."""
         benchmarks = [
             BenchmarkData(1000, "2D").with_timing(0.0, 0.0, 0.0, "µs"),
diff --git a/scripts/tests/test_benchmark_utils.py b/scripts/tests/test_benchmark_utils.py
index e0037337..a1edbfd8 100644
--- a/scripts/tests/test_benchmark_utils.py
+++ b/scripts/tests/test_benchmark_utils.py
@@ -20,6 +20,7 @@
 import time
 from io import StringIO
 from pathlib import Path
+from typing import Any
 from unittest.mock import Mock, patch
 
 import pytest
@@ -30,6 +31,7 @@
     CircumsphereTestCase,
 )
 from benchmark_utils import (
+    _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE,
     DEFAULT_REGRESSION_THRESHOLD,
     DEV_MODE_BENCH_ARGS,
     TRUSTED_BENCH_PROFILE,
@@ -40,6 +42,7 @@
     PerformanceSummaryGenerator,
     ProjectRootNotFoundError,
     WorkflowHelper,
+    _expand_ci_benchmark_id_pattern,
     configure_logging,
     create_argument_parser,
     find_project_root,
@@ -47,9 +50,55 @@
 )
 
 THRESHOLD_PERCENT = f"{DEFAULT_REGRESSION_THRESHOLD:.1f}%"
+CI_MANIFEST_STDOUT = (
+    "api_benchmark group=boundary_facets public_api=DelaunayTriangulation::boundary_facets "
+    "dimensions=3 benchmark_ids=boundary_facets/boundary_facets_3d/50 note=test\n"
+)
+PUBLIC_API_TITLE = "### Public API Performance Contract (`ci_performance_suite`)"
+CIRCUMSPHERE_TITLE = "## Circumsphere Predicate Analysis"
+PERFORMANCE_RANKING_TITLE = "### Performance Ranking"
+RECOMMENDATIONS_TITLE = "### Recommendations"
+PERFORMANCE_UPDATES_TITLE = "## Performance Data Updates"
+
+
+def completed_process(
+    stdout: str = "",
+    *,
+    returncode: int = 0,
+    stderr: str = "",
+    args: list[str] | None = None,
+) -> subprocess.CompletedProcess[str]:
+    """Return a typed subprocess result for command-wrapper mocks."""
+    return subprocess.CompletedProcess(args=args or [], returncode=returncode, stdout=stdout, stderr=stderr)
+
+
+def write_estimate(target_dir: Path, path_parts, mean_ns) -> None:
+    """Write a minimal Criterion estimates.json fixture."""
+    estimates_dir = target_dir / "criterion" / Path(*path_parts) / "base"
+    estimates_dir.mkdir(parents=True)
+    estimates = {
+        "mean": {
+            "point_estimate": mean_ns,
+            "confidence_interval": {
+                "lower_bound": mean_ns * 0.9,
+                "upper_bound": mean_ns * 1.1,
+            },
+        },
+    }
+    (estimates_dir / "estimates.json").write_text(json.dumps(estimates), encoding="utf-8")
 
 
-def compute_average_time_change(current_results, baseline_results):
+def write_ci_performance_manifest(target_dir: Path, benchmark_ids: list[str]) -> None:
+    """Write the ci_performance_suite runtime manifest sidecar."""
+    criterion_dir = target_dir / "criterion"
+    criterion_dir.mkdir(parents=True, exist_ok=True)
+    (criterion_dir / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE).write_text(
+        "\n".join(benchmark_ids) + "\n",
+        encoding="utf-8",
+    )
+
+
+def compute_average_time_change(current_results, baseline_results) -> float:
     """Replicate PerformanceComparator's geometric mean logic for tests."""
     time_changes = []
     for current in current_results:
@@ -73,7 +122,7 @@ def compute_average_time_change(current_results, baseline_results):
 
 
 @pytest.fixture
-def sample_estimates_data():
+def sample_estimates_data() -> dict[str, object]:
     """Fixture for common estimates.json test data."""
     return {
         "mean": {
@@ -84,7 +133,7 @@ def sample_estimates_data():
 
 
 @pytest.fixture
-def sample_benchmark_data():
+def sample_benchmark_data() -> dict[str, BenchmarkData]:
     """Fixture for common BenchmarkData test objects."""
     return {
         "2d_1000": BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs"),
@@ -96,7 +145,7 @@ def sample_benchmark_data():
 class TestCriterionParser:
     """Test cases for CriterionParser class."""
 
-    def test_parse_estimates_json_valid_data(self, sample_estimates_data):
+    def test_parse_estimates_json_valid_data(self, sample_estimates_data) -> None:
         """Test parsing valid estimates.json data."""
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             json.dump(sample_estimates_data, f)
@@ -118,7 +167,35 @@ def test_parse_estimates_json_valid_data(self, sample_estimates_data):
         finally:
             estimates_path.unlink()
 
-    def test_parse_estimates_json_zero_mean(self):
+    def test_benchmark_data_positional_timing_compatibility(self) -> None:
+        """Test legacy positional construction still maps the third argument to time_low."""
+        benchmark = BenchmarkData(1000, "2D", 1.0, 2.0, 3.0, "µs")
+
+        assert benchmark.time_low == 1.0
+        assert benchmark.time_mean == 2.0
+        assert benchmark.time_high == 3.0
+        assert benchmark.time_unit == "µs"
+        assert benchmark.benchmark_id == ""
+
+    def test_parse_estimates_json_preserves_unsized_workload(self, sample_estimates_data) -> None:
+        """Test Criterion estimates without numeric input size do not get fake throughput."""
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+            json.dump(sample_estimates_data, f)
+            f.flush()
+            estimates_path = Path(f.name)
+
+        try:
+            result = CriterionParser.parse_estimates_json(estimates_path, None, "4D")
+
+            assert result is not None
+            assert result.points is None
+            assert result.dimension == "4D"
+            assert result.throughput_mean is None
+            assert "0 Points" not in result.to_baseline_format()
+        finally:
+            estimates_path.unlink()
+
+    def test_parse_estimates_json_zero_mean(self) -> None:
         """Test parsing estimates.json with zero mean time."""
         estimates_data = {"mean": {"point_estimate": 0.0, "confidence_interval": {"lower_bound": 0.0, "upper_bound": 0.0}}}
 
@@ -133,7 +210,7 @@ def test_parse_estimates_json_zero_mean(self):
         finally:
             estimates_path.unlink()
 
-    def test_parse_estimates_json_very_fast_benchmark_division_by_zero_protection(self):
+    def test_parse_estimates_json_very_fast_benchmark_division_by_zero_protection(self) -> None:
         """Test division by zero protection for very fast benchmarks with near-zero confidence intervals."""
         estimates_data = {
             "mean": {
@@ -170,12 +247,12 @@ def test_parse_estimates_json_very_fast_benchmark_division_by_zero_protection(se
         finally:
             estimates_path.unlink()
 
-    def test_parse_estimates_json_invalid_file(self):
+    def test_parse_estimates_json_invalid_file(self) -> None:
         """Test parsing non-existent estimates.json file."""
         result = CriterionParser.parse_estimates_json(Path("nonexistent.json"), 1000, "2D")
         assert result is None
 
-    def test_parse_estimates_json_malformed_json(self):
+    def test_parse_estimates_json_malformed_json(self) -> None:
         """Test parsing malformed JSON file."""
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             f.write("{ invalid json")
@@ -190,7 +267,7 @@ def test_parse_estimates_json_malformed_json(self):
 
     @patch("benchmark_utils.Path.exists")
     @patch("benchmark_utils.Path.iterdir")
-    def test_find_criterion_results_no_criterion_dir(self, mock_iterdir, mock_exists):  # noqa: ARG002
+    def test_find_criterion_results_no_criterion_dir(self, mock_iterdir, mock_exists) -> None:  # noqa: ARG002
         """Test finding criterion results when criterion directory doesn't exist."""
         mock_exists.return_value = False
 
@@ -199,7 +276,7 @@ def test_find_criterion_results_no_criterion_dir(self, mock_iterdir, mock_exists
 
         assert results == []
 
-    def test_find_criterion_results_sorting(self):
+    def test_find_criterion_results_sorting(self) -> None:
         """Test that results are sorted by dimension and points."""
         # Create test data that would be unsorted initially
         test_results = [
@@ -222,7 +299,7 @@ def test_find_criterion_results_sorting(self):
         assert test_results[3].dimension == "4D"
         assert test_results[3].points == 1000
 
-    def test_ci_performance_suite_patterns(self):
+    def test_ci_performance_suite_patterns(self) -> None:
         """Test CI performance suite benchmark patterns (2D, 3D, 4D, 5D with 10, 25, 50 points)."""
         # Test data representing CI performance suite dimensions and point counts
         ci_suite_results = [
@@ -248,18 +325,78 @@ def test_ci_performance_suite_patterns(self):
         actual_order = [(b.dimension, b.points) for b in ci_suite_results]
         assert actual_order == expected_order
 
+    def test_ci_benchmark_id_pattern_expands_braced_segments(self) -> None:
+        """Test ci_performance_suite manifest brace patterns expand to concrete IDs."""
+        result = _expand_ci_benchmark_id_pattern("tds_new_2d/{tds_new,tds_new_adversarial}/{10,25}")
+
+        assert result == {
+            "tds_new_2d/tds_new/10",
+            "tds_new_2d/tds_new/25",
+            "tds_new_2d/tds_new_adversarial/10",
+            "tds_new_2d/tds_new_adversarial/25",
+        }
+
+    def test_find_criterion_results_preserves_ci_suite_ids(self) -> None:
+        """Test ci_performance_suite results keep expanded Criterion benchmark IDs."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            target_dir = Path(temp_dir) / "target"
+
+            write_estimate(target_dir, ("boundary_facets", "boundary_facets_3d", "50"), 10_000.0)
+            write_estimate(target_dir, ("validation", "validate_3d", "50"), 20_000.0)
+            write_estimate(target_dir, ("boundary_facets", "boundary_facets_3d_adversarial", "50"), 30_000.0)
+            write_estimate(target_dir, ("bistellar_flips_4d", "k2_roundtrip"), 40_000.0)
+
+            results = CriterionParser.find_criterion_results(target_dir)
+
+            assert [result.comparison_key for result in results] == [
+                "boundary_facets/boundary_facets_3d/50",
+                "boundary_facets/boundary_facets_3d_adversarial/50",
+                "validation/validate_3d/50",
+                "bistellar_flips_4d/k2_roundtrip",
+            ]
+            sized_results = [result for result in results if result.comparison_key != "bistellar_flips_4d/k2_roundtrip"]
+            assert {(result.points, result.dimension) for result in sized_results} == {(50, "3D")}
+
+            roundtrip = next(result for result in results if result.comparison_key == "bistellar_flips_4d/k2_roundtrip")
+            assert roundtrip.points is None
+            assert roundtrip.dimension == "4D"
+            assert roundtrip.throughput_mean is None
+
+    def test_find_criterion_results_filters_stale_ci_suite_ids_with_manifest(self) -> None:
+        """Test ci_performance_suite parsing ignores stale Criterion files outside the manifest."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            target_dir = Path(temp_dir) / "target"
+
+            write_estimate(target_dir, ("boundary_facets", "boundary_facets_3d", "50"), 10_000.0)
+            write_estimate(target_dir, ("validation", "validate_3d", "50"), 20_000.0)
+            write_estimate(target_dir, ("boundary_facets", "old_boundary_facets_3d", "50"), 30_000.0)
+            write_ci_performance_manifest(
+                target_dir,
+                [
+                    "boundary_facets/boundary_facets_3d/50",
+                    "validation/validate_3d/50",
+                ],
+            )
+
+            results = CriterionParser.find_criterion_results(target_dir)
+
+            assert [result.comparison_key for result in results] == [
+                "boundary_facets/boundary_facets_3d/50",
+                "validation/validate_3d/50",
+            ]
+
 
 class TestPerformanceComparator:
     """Test cases for PerformanceComparator class."""
 
     @pytest.fixture
-    def comparator(self):
+    def comparator(self) -> PerformanceComparator:
         """Fixture for PerformanceComparator instance."""
         project_root = Path("/fake/project")
         return PerformanceComparator(project_root)
 
     @pytest.fixture
-    def sample_baseline_content(self):
+    def sample_baseline_content(self) -> str:
         """Fixture for sample baseline content."""
         return """Date: 2023-06-15 10:30:00 PDT
 Git commit: abc123def456
@@ -285,7 +422,7 @@ def sample_baseline_content(self):
 Throughput: [4.167, 4.545, 5.0] Kelem/s
 """
 
-    def test_parse_baseline_file(self, comparator, sample_baseline_content):
+    def test_parse_baseline_file(self, comparator, sample_baseline_content) -> None:
         """Test parsing baseline file content."""
         results = comparator._parse_baseline_file(sample_baseline_content)
 
@@ -301,7 +438,98 @@ def test_parse_baseline_file(self, comparator, sample_baseline_content):
         assert bench_2d_1000.time_mean == 110.0
         assert bench_2d_1000.throughput_mean == 9.091
 
-    def test_write_time_comparison_no_regression(self, comparator):
+    def test_parse_baseline_file_with_benchmark_ids(self, comparator) -> None:
+        """Test parsing expanded ci_performance_suite baseline identifiers."""
+        baseline_content = """Date: 2023-06-15 10:30:00 PDT
+Git commit: abc123def456
+
+=== 50 Points (3D) ===
+Benchmark ID: boundary_facets/boundary_facets_3d/50
+Time: [9.0, 10.0, 11.0] µs
+Throughput: [4.545, 5.0, 5.556] Kelem/s
+
+=== 50 Points (3D) ===
+Benchmark ID: validation/validate_3d/50
+Time: [19.0, 20.0, 21.0] µs
+Throughput: [2.381, 2.5, 2.632] Kelem/s
+"""
+
+        results = comparator._parse_baseline_file(baseline_content)
+
+        assert set(results) == {
+            "boundary_facets/boundary_facets_3d/50",
+            "validation/validate_3d/50",
+        }
+        assert results["boundary_facets/boundary_facets_3d/50"].time_mean == 10.0
+        assert results["validation/validate_3d/50"].time_mean == 20.0
+
+    def test_parse_baseline_file_with_unsized_benchmark_id(self, comparator) -> None:
+        """Test parsing expanded CI benchmarks without numeric input sizes."""
+        baseline_content = """Date: 2023-06-15 10:30:00 PDT
+Git commit: abc123def456
+
+=== Unsized Workload (4D) ===
+Benchmark ID: bistellar_flips_4d/k2_roundtrip
+Time: [0.8, 0.95, 1.1] µs
+"""
+
+        results = comparator._parse_baseline_file(baseline_content)
+
+        benchmark = results["bistellar_flips_4d/k2_roundtrip"]
+        assert benchmark.points is None
+        assert benchmark.dimension == "4D"
+        assert benchmark.throughput_mean is None
+
+    def test_write_performance_comparison_matches_benchmark_ids(self, comparator) -> None:
+        """Test comparison uses expanded benchmark IDs instead of point/dimension collisions."""
+        current_results = [
+            BenchmarkData(50, "3D", benchmark_id="boundary_facets/boundary_facets_3d/50").with_timing(9.0, 10.0, 11.0, "µs"),
+            BenchmarkData(50, "3D", benchmark_id="validation/validate_3d/50").with_timing(19.0, 20.0, 21.0, "µs"),
+        ]
+        baseline_results = {
+            "boundary_facets/boundary_facets_3d/50": BenchmarkData(
+                50,
+                "3D",
+                benchmark_id="boundary_facets/boundary_facets_3d/50",
+            ).with_timing(9.0, 10.0, 11.0, "µs"),
+            "validation/validate_3d/50": BenchmarkData(
+                50,
+                "3D",
+                benchmark_id="validation/validate_3d/50",
+            ).with_timing(38.0, 40.0, 42.0, "µs"),
+        }
+
+        output = StringIO()
+        comparator._write_performance_comparison(output, current_results, baseline_results)
+        content = output.getvalue()
+
+        assert "Benchmark ID: boundary_facets/boundary_facets_3d/50" in content
+        assert "Benchmark ID: validation/validate_3d/50" in content
+        assert "OK: Time change +0.0%" in content
+        assert "IMPROVEMENT: Time decreased by 50.0%" in content
+
+    def test_write_performance_comparison_no_legacy_fallback_for_benchmark_id(self, comparator) -> None:
+        """Test expanded IDs do not compare against unrelated collapsed legacy baselines."""
+        current_results = [
+            BenchmarkData(50, "3D", benchmark_id="validation/validate_3d/50").with_timing(
+                19.0,
+                20.0,
+                21.0,
+                "µs",
+            ),
+        ]
+        baseline_results = {
+            "50_3D": BenchmarkData(50, "3D").with_timing(38.0, 40.0, 42.0, "µs"),
+        }
+
+        output = StringIO()
+        comparator._write_performance_comparison(output, current_results, baseline_results)
+        content = output.getvalue()
+
+        assert "Baseline: N/A (no matching entry)" in content
+        assert "IMPROVEMENT: Time decreased by 50.0%" not in content
+
+    def test_write_time_comparison_no_regression(self, comparator) -> None:
         """Test time comparison writing with no regression."""
         current = BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs")
         baseline = BenchmarkData(1000, "2D").with_timing(95.0, 105.0, 115.0, "µs")
@@ -317,7 +545,7 @@ def test_write_time_comparison_no_regression(self, comparator):
         assert "4.8%" in result
         assert "✅ OK: Time change +4.8% within acceptable range" in result
 
-    def test_write_time_comparison_with_regression(self, comparator):
+    def test_write_time_comparison_with_regression(self, comparator) -> None:
         """Test time comparison writing with regression."""
         current = BenchmarkData(1000, "2D").with_timing(100.0, 115.0, 130.0, "µs")
         baseline = BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs")
@@ -333,7 +561,7 @@ def test_write_time_comparison_with_regression(self, comparator):
         assert "15.0%" in result
         assert "⚠️  REGRESSION" in result
 
-    def test_write_time_comparison_with_improvement(self, comparator):
+    def test_write_time_comparison_with_improvement(self, comparator) -> None:
         """Test time comparison writing with significant improvement."""
         current = BenchmarkData(1000, "2D").with_timing(80.0, 90.0, 100.0, "µs")
         baseline = BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs")
@@ -349,7 +577,7 @@ def test_write_time_comparison_with_improvement(self, comparator):
         assert "10.0%" in result
         assert "✅ IMPROVEMENT: Time decreased by 10.0% (faster performance)" in result
 
-    def test_write_time_comparison_zero_baseline(self, comparator):
+    def test_write_time_comparison_zero_baseline(self, comparator) -> None:
         """Test time comparison with zero baseline time."""
         current = BenchmarkData(1000, "2D").with_timing(100.0, 110.0, 120.0, "µs")
         baseline = BenchmarkData(1000, "2D").with_timing(0.0, 0.0, 0.0, "µs")
@@ -365,7 +593,7 @@ def test_write_time_comparison_zero_baseline(self, comparator):
 
     @pytest.mark.parametrize("dev_mode", [False, True])
     @patch("benchmark_utils.run_cargo_command")
-    def test_compare_omits_quiet_flag(self, mock_cargo, dev_mode):
+    def test_compare_omits_quiet_flag(self, mock_cargo, dev_mode) -> None:
         """Test that PerformanceComparator invokes cargo without --quiet flag (removed for better error visibility)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_path = Path(temp_dir)
@@ -380,10 +608,7 @@ def test_compare_omits_quiet_flag(self, mock_cargo, dev_mode):
             baseline_file.write_text(baseline_content)
 
             # Mock successful cargo command
-            mock_result = Mock()
-            mock_result.returncode = 0
-            mock_result.stdout = ""
-            mock_cargo.return_value = mock_result
+            mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
 
             comparator = PerformanceComparator(temp_path)
             comparator.compare_with_baseline(baseline_file, dev_mode=dev_mode)
@@ -399,7 +624,7 @@ def test_compare_omits_quiet_flag(self, mock_cargo, dev_mode):
             # And output is captured
             assert mock_cargo.call_args.kwargs.get("capture_output") is True
 
-    def test_write_performance_comparison_no_average_regression(self, comparator):
+    def test_write_performance_comparison_no_average_regression(self, comparator) -> None:
         """Test performance comparison with individual regressions but no average regression."""
         # Create current results with mixed performance changes
         current_results = [
@@ -432,7 +657,7 @@ def test_write_performance_comparison_no_average_regression(self, comparator):
         assert re.search(r"Average time change:\s*-?0\.0%", result)
         assert "✅ OVERALL OK" in result
 
-    def test_write_performance_comparison_with_average_regression(self, comparator):
+    def test_write_performance_comparison_with_average_regression(self, comparator) -> None:
         """Test performance comparison with average regression exceeding threshold."""
         # Create current results with overall performance degradation
         current_results = [
@@ -465,7 +690,7 @@ def test_write_performance_comparison_with_average_regression(self, comparator):
         assert "Average time change: 11.0%" in result
         assert "🚨 OVERALL REGRESSION" in result
 
-    def test_write_performance_comparison_with_average_improvement(self, comparator):
+    def test_write_performance_comparison_with_average_improvement(self, comparator) -> None:
         """Test performance comparison with significant average improvement."""
         # Create current results with overall performance improvement
         current_results = [
@@ -500,7 +725,7 @@ def test_write_performance_comparison_with_average_improvement(self, comparator)
         assert expected_average_line in result
         assert "✅ OVERALL OK" in result
 
-    def test_write_performance_comparison_missing_baseline(self, comparator):
+    def test_write_performance_comparison_missing_baseline(self, comparator) -> None:
         """Test performance comparison when some baselines are missing."""
         current_results = [
             BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
@@ -522,7 +747,7 @@ def test_write_performance_comparison_missing_baseline(self, comparator):
         assert "Total benchmarks compared: 1" in result
         assert "3000 Points (2D)" in result  # Should still show the benchmark without baseline
 
-    def test_write_performance_comparison_no_benchmarks(self, comparator):
+    def test_write_performance_comparison_no_benchmarks(self, comparator) -> None:
         """Test performance comparison with no benchmarks."""
         output = StringIO()
         regression_found = comparator._write_performance_comparison(output, [], {})
@@ -532,7 +757,7 @@ def test_write_performance_comparison_no_benchmarks(self, comparator):
 
     @patch("benchmark_utils.get_git_commit_hash")
     @patch("benchmark_utils.datetime")
-    def test_prepare_comparison_metadata(self, mock_datetime, mock_git, comparator, sample_baseline_content):
+    def test_prepare_comparison_metadata(self, mock_datetime, mock_git, comparator, sample_baseline_content) -> None:
         """Test preparation of comparison metadata."""
         # Mock current datetime
         mock_now = Mock()
@@ -550,15 +775,15 @@ def test_prepare_comparison_metadata(self, mock_datetime, mock_git, comparator,
         assert metadata["baseline_commit"] == "abc123def456"
 
     @patch("benchmark_utils.get_git_commit_hash")
-    def test_prepare_comparison_metadata_git_failure(self, mock_git, comparator, sample_baseline_content):
+    def test_prepare_comparison_metadata_git_failure(self, mock_git, comparator, sample_baseline_content) -> None:
         """Test metadata preparation when git command fails."""
-        mock_git.side_effect = Exception("Git not available")
+        mock_git.side_effect = RuntimeError("Git not available")
 
         metadata = comparator._prepare_comparison_metadata(sample_baseline_content)
 
         assert metadata["current_commit"] == "unknown"
 
-    def test_regression_threshold_configuration(self, comparator):
+    def test_regression_threshold_configuration(self, comparator) -> None:
         """Test that regression threshold can be configured."""
         # Test default threshold
         assert comparator.regression_threshold == DEFAULT_REGRESSION_THRESHOLD
@@ -576,7 +801,7 @@ def test_regression_threshold_configuration(self, comparator):
         assert time_change == pytest.approx(7.0, abs=0.001)  # Use pytest.approx for floating-point comparison
         assert not is_regression
 
-    def test_write_error_file_baseline_not_found(self, comparator):
+    def test_write_error_file_baseline_not_found(self, comparator) -> None:
         """Test writing error file when baseline is not found."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_file = Path(temp_dir) / "error_results.txt"
@@ -591,7 +816,7 @@ def test_write_error_file_baseline_not_found(self, comparator):
             assert str(baseline_file) in content
             assert "This error prevented the benchmark comparison from completing successfully" in content
 
-    def test_write_error_file_benchmark_error(self, comparator):
+    def test_write_error_file_benchmark_error(self, comparator) -> None:
         """Test writing error file when benchmark execution fails."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_file = Path(temp_dir) / "error_results.txt"
@@ -605,7 +830,7 @@ def test_write_error_file_benchmark_error(self, comparator):
             assert error_message in content
             assert "Please check the CI logs for more information" in content
 
-    def test_write_error_file_creates_parent_directory(self, comparator):
+    def test_write_error_file_creates_parent_directory(self, comparator) -> None:
         """Test that _write_error_file creates parent directory if it doesn't exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_file = Path(temp_dir) / "nested" / "path" / "error_results.txt"
@@ -617,7 +842,7 @@ def test_write_error_file_creates_parent_directory(self, comparator):
             content = output_file.read_text()
             assert "❌ Error: Test error" in content
 
-    def test_write_error_file_handles_write_failure(self, comparator):
+    def test_write_error_file_handles_write_failure(self, comparator) -> None:
         """Test that _write_error_file handles write failures gracefully."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_file = Path(temp_dir) / "error_results.txt"
@@ -630,7 +855,7 @@ def test_write_error_file_handles_write_failure(self, comparator):
             # File should not exist due to write failure
             assert not output_file.exists()
 
-    def test_sampling_warning_reports_dev_full_mismatch(self):
+    def test_sampling_warning_reports_dev_full_mismatch(self) -> None:
         """Test that comparison warns when baseline and current sampling modes differ."""
         with tempfile.TemporaryDirectory() as temp_dir:
             comparator = PerformanceComparator(Path(temp_dir))
@@ -651,7 +876,7 @@ def test_sampling_warning_reports_dev_full_mismatch(self):
             assert "Criterion measurement time: baseline=2, current=criterion-default" in warning
             assert "Criterion warm-up time: baseline=1, current=criterion-default" in warning
 
-    def test_sampling_warning_reports_missing_baseline_metadata(self, comparator, sample_baseline_content):
+    def test_sampling_warning_reports_missing_baseline_metadata(self, comparator, sample_baseline_content) -> None:
         """Test that legacy baselines without sampling metadata produce a warning."""
         warning = comparator._sampling_warning(sample_baseline_content, dev_mode=False)
 
@@ -674,9 +899,9 @@ def _sample_benchmark_results() -> list[BenchmarkData]:
     @patch("benchmark_utils.get_git_commit_hash", return_value="abc123")
     @patch("benchmark_utils.CriterionParser.find_criterion_results")
     @patch("benchmark_utils.run_cargo_command")
-    def test_generate_baseline_uses_perf_profile(self, mock_cargo, mock_find_results, mock_git):
+    def test_generate_baseline_uses_perf_profile(self, mock_cargo, mock_find_results, mock_git) -> None:
         """Test that full baseline generation benchmarks with the trusted Cargo profile."""
-        mock_cargo.return_value = Mock(stdout="")
+        mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
         mock_find_results.return_value = self._sample_benchmark_results()
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -700,9 +925,9 @@ def test_generate_baseline_uses_perf_profile(self, mock_cargo, mock_find_results
     @patch("benchmark_utils.get_git_commit_hash", return_value="abc123")
     @patch("benchmark_utils.CriterionParser.find_criterion_results")
     @patch("benchmark_utils.run_cargo_command")
-    def test_generate_baseline_dev_mode_keeps_perf_profile(self, mock_cargo, mock_find_results, mock_git):
+    def test_generate_baseline_dev_mode_keeps_perf_profile(self, mock_cargo, mock_find_results, mock_git) -> None:
         """Test that dev baseline mode reduces Criterion settings without changing Cargo profile."""
-        mock_cargo.return_value = Mock(stdout="")
+        mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
         mock_find_results.return_value = self._sample_benchmark_results()
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -731,12 +956,12 @@ class TestIntegrationScenarios:
     """Integration test scenarios for real-world use cases."""
 
     @pytest.fixture
-    def comparator(self):
+    def comparator(self) -> PerformanceComparator:
         """Fixture for PerformanceComparator instance."""
         project_root = Path("/fake/project")
         return PerformanceComparator(project_root)
 
-    def test_realistic_mixed_performance_scenario(self, comparator):
+    def test_realistic_mixed_performance_scenario(self, comparator) -> None:
         """Test a realistic scenario with mixed performance changes."""
         # Simulate a realistic benchmark run with various performance changes
         current_results = [
@@ -775,7 +1000,7 @@ def test_realistic_mixed_performance_scenario(self, comparator):
         assert expected_average_line in result
         assert "✅ OVERALL OK" in result
 
-    def test_gradual_performance_degradation_scenario(self, comparator):
+    def test_gradual_performance_degradation_scenario(self, comparator) -> None:
         """Test scenario where performance gradually degrades across all benchmarks."""
         # Simulate gradual performance degradation that individually isn't alarming
         # but collectively indicates a problem
@@ -809,7 +1034,7 @@ def test_gradual_performance_degradation_scenario(self, comparator):
         assert "Average time change: 9.0%" in result
         assert "🚨 OVERALL REGRESSION" in result
 
-    def test_noisy_benchmarks_scenario(self, comparator):
+    def test_noisy_benchmarks_scenario(self, comparator) -> None:
         """Test scenario with noisy benchmarks that have high individual variance."""
         # Simulate noisy benchmarks where individual results vary significantly
         # but overall trend is acceptable
@@ -850,12 +1075,12 @@ class TestEdgeCases:
     """Test edge cases and error conditions."""
 
     @pytest.fixture
-    def comparator(self):
+    def comparator(self) -> PerformanceComparator:
         """Fixture for PerformanceComparator instance."""
         project_root = Path("/fake/project")
         return PerformanceComparator(project_root)
 
-    def test_empty_current_results(self, comparator):
+    def test_empty_current_results(self, comparator) -> None:
         """Test comparison with empty current results."""
         baseline_results = {
             "1000_2D": BenchmarkData(1000, "2D").with_timing(95.0, 100.0, 105.0, "µs"),
@@ -867,7 +1092,7 @@ def test_empty_current_results(self, comparator):
         assert not regression_found
         assert "SUMMARY" not in output.getvalue()
 
-    def test_empty_baseline_results(self, comparator):
+    def test_empty_baseline_results(self, comparator) -> None:
         """Test comparison with empty baseline results."""
         current_results = [
             BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
@@ -881,7 +1106,7 @@ def test_empty_baseline_results(self, comparator):
         assert "1000 Points (2D)" in result
         assert "SUMMARY" not in result
 
-    def test_all_zero_baseline_times(self, comparator):
+    def test_all_zero_baseline_times(self, comparator) -> None:
         """Test comparison when all baseline times are zero."""
         current_results = [
             BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
@@ -901,7 +1126,7 @@ def test_all_zero_baseline_times(self, comparator):
         assert "N/A (baseline mean is 0)" in result
         assert "SUMMARY" not in result  # No valid comparisons
 
-    def test_mixed_valid_invalid_baselines(self, comparator):
+    def test_mixed_valid_invalid_baselines(self, comparator) -> None:
         """Test comparison with mix of valid and invalid baseline data."""
         current_results = [
             BenchmarkData(1000, "2D").with_timing(105.0, 110.0, 115.0, "µs"),
@@ -929,14 +1154,14 @@ class TestWorkflowHelper:
     """Test cases for WorkflowHelper class."""
 
     @patch.dict(os.environ, {"GITHUB_REF": "refs/tags/v1.2.3"}, clear=False)
-    def test_determine_tag_name_from_github_ref(self):
+    def test_determine_tag_name_from_github_ref(self) -> None:
         """Test tag name determination from GITHUB_REF with tag."""
         tag_name = WorkflowHelper.determine_tag_name()
         assert tag_name == "v1.2.3"
 
     @patch.dict(os.environ, {"GITHUB_REF": "refs/heads/main"}, clear=False)
     @patch("benchmark_utils.datetime")
-    def test_determine_tag_name_generated(self, mock_datetime):
+    def test_determine_tag_name_generated(self, mock_datetime) -> None:
         """Test tag name generation when not from a tag push."""
         # Mock datetime
         mock_now = Mock()
@@ -947,7 +1172,7 @@ def test_determine_tag_name_generated(self, mock_datetime):
         assert tag_name == "manual-20231215-143000"
 
     @patch.dict(os.environ, {"GITHUB_REF": "refs/tags/v2.0.0"}, clear=False)
-    def test_determine_tag_name_with_github_output(self):
+    def test_determine_tag_name_with_github_output(self) -> None:
         """Test tag name determination with GITHUB_OUTPUT file."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
             output_file = f.name
@@ -964,7 +1189,7 @@ def test_determine_tag_name_with_github_output(self):
         finally:
             Path(output_file).unlink(missing_ok=True)
 
-    def test_create_metadata_success(self):
+    def test_create_metadata_success(self) -> None:
         """Test successful metadata creation."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_dir = Path(temp_dir)
@@ -998,7 +1223,7 @@ def test_create_metadata_success(self):
             # Check ISO format timestamp
             assert metadata["generated_at"].endswith("Z")
 
-    def test_create_metadata_with_safe_env_vars(self):
+    def test_create_metadata_with_safe_env_vars(self) -> None:
         """Test metadata creation with SAFE_ prefixed environment variables."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_dir = Path(temp_dir)
@@ -1027,7 +1252,7 @@ def test_create_metadata_with_safe_env_vars(self):
             assert metadata["commit"] == "def456abc789"
             assert metadata["workflow_run_id"] == "987654321"
 
-    def test_create_metadata_missing_env_vars(self):
+    def test_create_metadata_missing_env_vars(self) -> None:
         """Test metadata creation with missing environment variables."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_dir = Path(temp_dir)
@@ -1048,7 +1273,7 @@ def test_create_metadata_missing_env_vars(self):
             assert metadata["runner_os"] == "unknown"
             assert metadata["runner_arch"] == "unknown"
 
-    def test_create_metadata_directory_creation(self):
+    def test_create_metadata_directory_creation(self) -> None:
         """Test that metadata creation creates directory if it doesn't exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             output_dir = Path(temp_dir) / "nested" / "path"
@@ -1058,7 +1283,7 @@ def test_create_metadata_directory_creation(self):
             assert output_dir.exists()
             assert (output_dir / "metadata.json").exists()
 
-    def test_display_baseline_summary_success(self, capsys):
+    def test_display_baseline_summary_success(self, capsys) -> None:
         """Test successful baseline summary display."""
         baseline_content = """Date: 2023-12-15 14:30:00 UTC
 Git commit: abc123def456
@@ -1095,7 +1320,7 @@ def test_display_baseline_summary_success(self, capsys):
         finally:
             baseline_file.unlink()
 
-    def test_display_baseline_summary_nonexistent_file(self, capsys):
+    def test_display_baseline_summary_nonexistent_file(self, capsys) -> None:
         """Test baseline summary with non-existent file."""
         baseline_file = Path("/nonexistent/file.txt")
 
@@ -1106,7 +1331,7 @@ def test_display_baseline_summary_nonexistent_file(self, capsys):
         captured = capsys.readouterr()
         assert "❌ Baseline file not found" in captured.err
 
-    def test_display_baseline_summary_long_file(self, capsys):
+    def test_display_baseline_summary_long_file(self, capsys) -> None:
         """Test baseline summary with file longer than 10 lines."""
         baseline_content = "\n".join([f"Line {i}" for i in range(20)])
 
@@ -1125,17 +1350,17 @@ def test_display_baseline_summary_long_file(self, capsys):
         finally:
             baseline_file.unlink()
 
-    def test_sanitize_artifact_name_basic(self):
+    def test_sanitize_artifact_name_basic(self) -> None:
         """Test basic artifact name sanitization."""
         artifact_name = WorkflowHelper.sanitize_artifact_name("v1.2.3")
         assert artifact_name == "performance-baseline-v1_2_3"
 
-    def test_sanitize_artifact_name_with_special_chars(self):
+    def test_sanitize_artifact_name_with_special_chars(self) -> None:
         """Test artifact name sanitization with special characters."""
         artifact_name = WorkflowHelper.sanitize_artifact_name("manual-2023/12/15-14:30:00")
         assert artifact_name == "performance-baseline-manual-2023_12_15-14_30_00"
 
-    def test_sanitize_artifact_name_with_github_output(self):
+    def test_sanitize_artifact_name_with_github_output(self) -> None:
         """Test artifact name sanitization with GITHUB_OUTPUT file."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
             output_file = f.name
@@ -1160,12 +1385,12 @@ def test_sanitize_artifact_name_with_github_output(self):
             ("v1.0.0+build.123", "performance-baseline-v1_0_0_build_123"),
         ],
     )
-    def test_sanitize_artifact_name_edge_cases(self, input_tag, expected_output):
+    def test_sanitize_artifact_name_edge_cases(self, input_tag, expected_output) -> None:
         """Test artifact name sanitization with edge cases."""
         result = WorkflowHelper.sanitize_artifact_name(input_tag)
         assert result == expected_output
 
-    def test_sanitize_artifact_name_special_characters(self):
+    def test_sanitize_artifact_name_special_characters(self) -> None:
         """Test that special characters are properly replaced in artifact names."""
         special_chars_input = "@#$%^&*()[]{}|\\<>?"
         result = WorkflowHelper.sanitize_artifact_name(special_chars_input)
@@ -1176,7 +1401,7 @@ def test_sanitize_artifact_name_special_characters(self):
 class TestBenchmarkRegressionHelper:
     """Test cases for BenchmarkRegressionHelper class."""
 
-    def test_prepare_baseline_success(self, capsys):
+    def test_prepare_baseline_success(self, capsys) -> None:
         """Test successful baseline preparation."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1217,7 +1442,7 @@ def test_prepare_baseline_success(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_copy_error_handling(self, capsys):
+    def test_prepare_baseline_copy_error_handling(self, capsys) -> None:
         """Test error handling when copying baseline file fails."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1258,7 +1483,7 @@ def test_prepare_baseline_copy_error_handling(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_read_summary_error_handling(self, capsys):
+    def test_prepare_baseline_read_summary_error_handling(self, capsys) -> None:
         """Test graceful error handling when baseline summary cannot be read."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1284,7 +1509,7 @@ def test_prepare_baseline_read_summary_error_handling(self, capsys):
                 # Mock Path.open method to fail for read operations on baseline_results.txt
                 original_path_open = Path.open
 
-                def mock_path_open(self, mode="r", *args, **kwargs):
+                def mock_path_open(self, mode="r", *args, **kwargs) -> Any:
                     if self.name == "baseline_results.txt" and "r" in mode:
                         msg = "Read permission denied"
                         raise OSError(msg)
@@ -1314,7 +1539,7 @@ def mock_path_open(self, mode="r", *args, **kwargs):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_missing_file(self, capsys):
+    def test_prepare_baseline_missing_file(self, capsys) -> None:
         """Test baseline preparation when baseline file is missing."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1341,7 +1566,7 @@ def test_prepare_baseline_missing_file(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_set_no_baseline_status(self, capsys):
+    def test_set_no_baseline_status(self, capsys) -> None:
         """Test setting no baseline status."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
             env_path = env_file.name
@@ -1363,7 +1588,7 @@ def test_set_no_baseline_status(self, capsys):
         finally:
             Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_from_baseline_file(self):
+    def test_extract_baseline_commit_from_baseline_file(self) -> None:
         """Test extracting commit SHA from baseline_results.txt."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1393,7 +1618,7 @@ def test_extract_baseline_commit_from_baseline_file(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_from_metadata(self):
+    def test_extract_baseline_commit_from_metadata(self) -> None:
         """Test extracting commit SHA from metadata.json when baseline file fails."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1421,7 +1646,7 @@ def test_extract_baseline_commit_from_metadata(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_unknown(self):
+    def test_extract_baseline_commit_unknown(self) -> None:
         """Test extracting commit SHA when no valid SHA is found."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -1444,14 +1669,14 @@ def test_extract_baseline_commit_unknown(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_determine_benchmark_skip_unknown_baseline(self):
+    def test_determine_benchmark_skip_unknown_baseline(self) -> None:
         """Test skip determination with unknown baseline commit."""
         should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("unknown", "def4567")
 
         assert not should_skip
         assert reason == "unknown_baseline"
 
-    def test_determine_benchmark_skip_same_commit(self):
+    def test_determine_benchmark_skip_same_commit(self) -> None:
         """Test skip determination with same commit."""
         should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("abc1234", "abc1234")
 
@@ -1459,7 +1684,7 @@ def test_determine_benchmark_skip_same_commit(self):
         assert reason == "same_commit"
 
     @patch("benchmark_utils.run_git_command")
-    def test_determine_benchmark_skip_baseline_not_found(self, mock_git):
+    def test_determine_benchmark_skip_baseline_not_found(self, mock_git) -> None:
         """Test skip determination when baseline commit not found in history."""
         # Simulate git cat-file failing
         mock_git.side_effect = subprocess.CalledProcessError(1, "git")
@@ -1470,12 +1695,12 @@ def test_determine_benchmark_skip_baseline_not_found(self, mock_git):
         assert reason == "baseline_commit_not_found"
 
     @patch("benchmark_utils.run_git_command")
-    def test_determine_benchmark_skip_no_changes(self, mock_git):
+    def test_determine_benchmark_skip_no_changes(self, mock_git) -> None:
         """Test skip determination when no relevant changes found."""
         # Mock successful git commands
         mock_git.side_effect = [
-            Mock(returncode=0),  # git cat-file succeeds
-            Mock(returncode=0, stdout="docs/README.md\n.github/workflows/other.yml\n", stderr=""),  # git diff
+            completed_process(),  # git cat-file succeeds
+            completed_process("docs/README.md\n.github/workflows/other.yml\n"),  # git diff
         ]
 
         should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("abc1234", "def4567")
@@ -1484,12 +1709,12 @@ def test_determine_benchmark_skip_no_changes(self, mock_git):
         assert reason == "no_relevant_changes"
 
     @patch("benchmark_utils.run_git_command")
-    def test_determine_benchmark_skip_changes_detected(self, mock_git):
+    def test_determine_benchmark_skip_changes_detected(self, mock_git) -> None:
         """Test skip determination when relevant changes are detected."""
         # Mock successful git commands
         mock_git.side_effect = [
-            Mock(returncode=0),  # git cat-file succeeds
-            Mock(returncode=0, stdout="src/core/mod.rs\nbenches/performance.rs\n", stderr=""),  # git diff
+            completed_process(),  # git cat-file succeeds
+            completed_process("src/core/mod.rs\nbenches/performance.rs\n"),  # git diff
         ]
 
         should_skip, reason = BenchmarkRegressionHelper.determine_benchmark_skip("abc1234", "def4567")
@@ -1497,14 +1722,14 @@ def test_determine_benchmark_skip_changes_detected(self, mock_git):
         assert not should_skip
         assert reason == "changes_detected"
 
-    def test_display_skip_message(self, capsys):
+    def test_display_skip_message(self, capsys) -> None:
         """Test displaying skip messages."""
         BenchmarkRegressionHelper.display_skip_message("same_commit", "abc1234")
 
         captured = capsys.readouterr()
         assert "🔍 Current commit matches baseline (abc1234)" in captured.out
 
-    def test_display_no_baseline_message(self, capsys):
+    def test_display_no_baseline_message(self, capsys) -> None:
         """Test displaying no baseline message."""
         BenchmarkRegressionHelper.display_no_baseline_message()
 
@@ -1512,7 +1737,7 @@ def test_display_no_baseline_message(self, capsys):
         assert "⚠️ No performance baseline available" in captured.out
         assert "💡 To enable performance regression testing:" in captured.out
 
-    def test_run_regression_test_success(self, capsys):
+    def test_run_regression_test_success(self, capsys) -> None:
         """Test successful regression test run."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_file = Path(temp_dir) / "baseline.txt"
@@ -1531,7 +1756,7 @@ def test_run_regression_test_success(self, capsys):
                 captured = capsys.readouterr()
                 assert "🚀 Running performance regression test" in captured.out
 
-    def test_run_regression_test_dev_mode(self, capsys):
+    def test_run_regression_test_dev_mode(self, capsys) -> None:
         """Test regression test run with dev mode enabled."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_file = Path(temp_dir) / "baseline.txt"
@@ -1550,7 +1775,7 @@ def test_run_regression_test_dev_mode(self, capsys):
                 captured = capsys.readouterr()
                 assert "dev mode (10x faster)" in captured.out
 
-    def test_run_regression_test_failure(self):
+    def test_run_regression_test_failure(self) -> None:
         """Test regression test run failure."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_file = Path(temp_dir) / "baseline.txt"
@@ -1565,7 +1790,7 @@ def test_run_regression_test_failure(self):
 
                 assert not success
 
-    def test_run_regression_test_custom_timeout(self, capsys):
+    def test_run_regression_test_custom_timeout(self, capsys) -> None:
         """Test regression test run with custom bench_timeout parameter."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_file = Path(temp_dir) / "baseline.txt"
@@ -1584,7 +1809,7 @@ def test_run_regression_test_custom_timeout(self, capsys):
                 captured = capsys.readouterr()
                 assert "🚀 Running performance regression test" in captured.out
 
-    def test_display_results_file_exists(self, capsys):
+    def test_display_results_file_exists(self, capsys) -> None:
         """Test displaying results when file exists."""
         with tempfile.TemporaryDirectory() as temp_dir:
             results_file = Path(temp_dir) / "results.txt"
@@ -1597,7 +1822,7 @@ def test_display_results_file_exists(self, capsys):
             assert "=== Performance Regression Test Results ===" in captured.out
             assert "All tests passed" in captured.out
 
-    def test_display_results_file_missing(self, capsys):
+    def test_display_results_file_missing(self, capsys) -> None:
         """Test displaying results when file is missing."""
         missing_file = Path("/nonexistent/results.txt")
 
@@ -1606,7 +1831,7 @@ def test_display_results_file_missing(self, capsys):
         captured = capsys.readouterr()
         assert "⚠️ No comparison results file found" in captured.out
 
-    def test_generate_summary_with_regression(self, temp_chdir, capsys):
+    def test_generate_summary_with_regression(self, temp_chdir, capsys) -> None:
         """Test generating summary when regression is detected."""
         with tempfile.TemporaryDirectory() as temp_dir:
             results_file = Path(temp_dir) / "benches" / "compare_results.txt"
@@ -1631,7 +1856,7 @@ def test_generate_summary_with_regression(self, temp_chdir, capsys):
                 assert "Baseline source: artifact" in captured.out
                 assert "Result: ⚠️ Performance regressions detected" in captured.out
 
-    def test_generate_summary_skip_same_commit(self, capsys):
+    def test_generate_summary_skip_same_commit(self, capsys) -> None:
         """Test generating summary when benchmarks skipped due to same commit."""
         env_vars = {
             "BASELINE_SOURCE": "artifact",
@@ -1647,7 +1872,7 @@ def test_generate_summary_skip_same_commit(self, capsys):
             captured = capsys.readouterr()
             assert "Result: ⏭️ Benchmarks skipped (same commit as baseline)" in captured.out
 
-    def test_generate_summary_no_baseline(self, capsys):
+    def test_generate_summary_no_baseline(self, capsys) -> None:
         """Test generating summary when no baseline available."""
         env_vars = {
             "BASELINE_EXISTS": "false",
@@ -1660,7 +1885,7 @@ def test_generate_summary_no_baseline(self, capsys):
             captured = capsys.readouterr()
             assert "Result: ⏭️ Benchmarks skipped (no baseline available)" in captured.out
 
-    def test_generate_summary_sets_regression_environment_variable(self, temp_chdir, capsys):
+    def test_generate_summary_sets_regression_environment_variable(self, temp_chdir, capsys) -> None:
         """Test that generate_summary sets BENCHMARK_REGRESSION_DETECTED environment variable when regressions are found."""
         with tempfile.TemporaryDirectory() as temp_dir:
             results_file = Path(temp_dir) / "benches" / "compare_results.txt"
@@ -1686,7 +1911,7 @@ def test_generate_summary_sets_regression_environment_variable(self, temp_chdir,
                 captured = capsys.readouterr()
                 assert "Exported BENCHMARK_REGRESSION_DETECTED=true for downstream CI steps" in captured.out
 
-    def test_generate_summary_github_env_export(self, temp_chdir):
+    def test_generate_summary_github_env_export(self, temp_chdir) -> None:
         """Test that BENCHMARK_REGRESSION_DETECTED is also exported to GITHUB_ENV when available."""
         with tempfile.TemporaryDirectory() as temp_dir:
             results_file = Path(temp_dir) / "benches" / "compare_results.txt"
@@ -1708,7 +1933,7 @@ def test_generate_summary_github_env_export(self, temp_chdir):
                 github_env_content = github_env_file.read_text()
                 assert "BENCHMARK_REGRESSION_DETECTED=true" in github_env_content
 
-    def test_generate_summary_with_error_file(self, temp_chdir, capsys):
+    def test_generate_summary_with_error_file(self, temp_chdir, capsys) -> None:
         """Test generating summary when comparison failed with error file."""
         with tempfile.TemporaryDirectory() as temp_dir:
             results_file = Path(temp_dir) / "benches" / "compare_results.txt"
@@ -1748,7 +1973,7 @@ def test_generate_summary_with_error_file(self, temp_chdir, capsys):
 class TestProjectRootHandling:
     """Test cases for find_project_root functionality."""
 
-    def test_find_project_root_success(self, temp_chdir):
+    def test_find_project_root_success(self, temp_chdir) -> None:
         """Test finding project root when Cargo.toml exists."""
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_path = Path(temp_dir)
@@ -1766,7 +1991,7 @@ def test_find_project_root_success(self, temp_chdir):
                 # Resolve both paths to handle symlinks (macOS /var -> /private/var)
                 assert result.resolve() == temp_path.resolve()
 
-    def test_find_project_root_not_found(self, temp_chdir):
+    def test_find_project_root_not_found(self, temp_chdir) -> None:
         """Test finding project root when Cargo.toml doesn't exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_path = Path(temp_dir)
@@ -1793,7 +2018,7 @@ class TestTimeoutHandling:
             ),
         ],
     )
-    def test_timeout_parameter_passed(self, component_class, method_name, setup_func):
+    def test_timeout_parameter_passed(self, component_class, method_name, setup_func) -> None:
         """Test that benchmark components accept and use timeout parameter."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -1826,7 +2051,7 @@ def test_timeout_parameter_passed(self, component_class, method_name, setup_func
                 assert mock_cargo.call_count >= 1
                 assert any(call.kwargs.get("timeout") == 120 for call in mock_cargo.call_args_list)
 
-    def test_timeout_error_handling_baseline_generator(self, capsys):
+    def test_timeout_error_handling_baseline_generator(self, capsys) -> None:
         """Test proper error handling when benchmark times out in BaselineGenerator."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -1844,7 +2069,7 @@ def test_timeout_error_handling_baseline_generator(self, capsys):
                 assert "timed out after 1800 seconds" in captured.err
                 assert "Consider increasing --bench-timeout" in captured.err
 
-    def test_timeout_error_handling_performance_comparator(self, capsys):
+    def test_timeout_error_handling_performance_comparator(self, capsys) -> None:
         """Test proper error handling when benchmark times out in PerformanceComparator."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -1874,7 +2099,7 @@ def test_timeout_error_handling_performance_comparator(self, capsys):
                 assert "cargo bench" in error_content  # Command from exception
                 assert "timeout after 1800 seconds" in error_content  # Explicit timeout value
 
-    def test_cli_bench_timeout_validation(self, monkeypatch, temp_chdir):
+    def test_cli_bench_timeout_validation(self, monkeypatch, temp_chdir) -> None:
         """Test that CLI validates bench_timeout is positive via main()."""
         # Create a temporary project with Cargo.toml to satisfy find_project_root
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -1903,7 +2128,7 @@ def test_cli_bench_timeout_validation(self, monkeypatch, temp_chdir):
                 assert hasattr(args, "validate_bench_timeout")
                 assert args.validate_bench_timeout
 
-    def test_parser_accepts_verbose_flag(self):
+    def test_parser_accepts_verbose_flag(self) -> None:
         """Test that the CLI parser accepts the shared verbose logging flag."""
         parser = create_argument_parser()
         args = parser.parse_args(["--verbose", "generate-summary"])
@@ -1911,7 +2136,7 @@ def test_parser_accepts_verbose_flag(self):
         assert args.verbose
         assert args.command == "generate-summary"
 
-    def test_configure_logging_uses_debug_when_verbose(self):
+    def test_configure_logging_uses_debug_when_verbose(self) -> None:
         """Test that verbose mode configures debug-level CLI logging."""
         with patch("benchmark_utils.logging.basicConfig") as mock_basic_config:
             configure_logging(verbose=True)
@@ -1921,7 +2146,7 @@ def test_configure_logging_uses_debug_when_verbose(self):
             format="%(levelname)s: %(message)s",
         )
 
-    def test_configure_logging_defaults_to_info(self):
+    def test_configure_logging_defaults_to_info(self) -> None:
         """Test that non-verbose mode configures info-level CLI logging."""
         with patch("benchmark_utils.logging.basicConfig") as mock_basic_config:
             configure_logging(verbose=False)
@@ -1935,7 +2160,7 @@ def test_configure_logging_defaults_to_info(self):
 class TestPerformanceSummaryGenerator:
     """Test cases for PerformanceSummaryGenerator class."""
 
-    def test_init(self):
+    def test_init(self) -> None:
         """Test PerformanceSummaryGenerator initialization."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -1949,7 +2174,7 @@ def test_init(self):
             assert isinstance(generator.current_version, str)
             assert isinstance(generator.current_date, str)
 
-    def test_generate_summary_parser_defaults_to_trusted_profile(self):
+    def test_generate_summary_parser_defaults_to_trusted_profile(self) -> None:
         """Test that fresh summary benchmarks default to the trusted Cargo profile."""
         parser = create_argument_parser()
         args = parser.parse_args(["generate-summary", "--run-benchmarks"])
@@ -1957,11 +2182,9 @@ def test_generate_summary_parser_defaults_to_trusted_profile(self):
         assert args.profile == TRUSTED_BENCH_PROFILE
 
     @patch("benchmark_utils.run_git_command")
-    def test_get_current_version_with_tag(self, mock_git_command):
+    def test_get_current_version_with_tag(self, mock_git_command) -> None:
         """Test getting current version from git tags."""
-        mock_result = Mock()
-        mock_result.stdout.strip.return_value = "v1.2.3"
-        mock_git_command.return_value = mock_result
+        mock_git_command.return_value = completed_process("v1.2.3\n")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -1972,14 +2195,13 @@ def test_get_current_version_with_tag(self, mock_git_command):
             mock_git_command.assert_called_with(["describe", "--tags", "--abbrev=0", "--match=v*"], cwd=project_root)
 
     @patch("benchmark_utils.run_git_command")
-    def test_get_current_version_fallback(self, mock_git_command):
+    def test_get_current_version_fallback(self, mock_git_command) -> None:
         """Test fallback version detection when describe fails."""
         # First call (describe) fails, second call (tag -l) succeeds
-        mock_result = Mock()
-        mock_result.stdout.strip.return_value = "v0.1.0\nv0.2.0"
+        mock_result = completed_process("v0.1.0\nv0.2.0")
 
         # The second call is made within the exception handler
-        def side_effect(*args, **kwargs):
+        def side_effect(*args, **kwargs) -> subprocess.CompletedProcess[str]:
             if "describe" in args[0]:
                 raise subprocess.CalledProcessError(1, "git describe", "describe failed")
             return mock_result
@@ -1994,9 +2216,9 @@ def side_effect(*args, **kwargs):
             assert version == "0.1.0"
 
     @patch("benchmark_utils.run_git_command")
-    def test_get_current_version_no_tags(self, mock_git_command):
+    def test_get_current_version_no_tags(self, mock_git_command) -> None:
         """Test version detection when no tags are found."""
-        mock_git_command.side_effect = Exception("No tags found")
+        mock_git_command.side_effect = RuntimeError("No tags found")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2007,11 +2229,9 @@ def test_get_current_version_no_tags(self, mock_git_command):
 
     @patch("benchmark_utils.run_git_command")
     @patch("benchmark_utils.datetime")
-    def test_get_version_date_with_tag(self, mock_datetime, mock_git_command):  # noqa: ARG002
+    def test_get_version_date_with_tag(self, mock_datetime, mock_git_command) -> None:  # noqa: ARG002
         """Test getting version date from git tag."""
-        mock_result = Mock()
-        mock_result.stdout.strip.return_value = "2024-01-15"
-        mock_git_command.return_value = mock_result
+        mock_git_command.return_value = completed_process("2024-01-15\n")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2024,9 +2244,9 @@ def test_get_version_date_with_tag(self, mock_datetime, mock_git_command):  # no
 
     @patch("benchmark_utils.run_git_command")
     @patch("benchmark_utils.datetime")
-    def test_get_version_date_fallback(self, mock_datetime, mock_git_command):
+    def test_get_version_date_fallback(self, mock_datetime, mock_git_command) -> None:
         """Test version date fallback to current date."""
-        mock_git_command.side_effect = Exception("Git command failed")
+        mock_git_command.side_effect = RuntimeError("Git command failed")
         mock_now = Mock()
         mock_now.strftime.return_value = "2024-01-15"
         mock_datetime.now.return_value = mock_now
@@ -2040,7 +2260,7 @@ def test_get_version_date_fallback(self, mock_datetime, mock_git_command):
             assert date == "2024-01-15"
             mock_now.strftime.assert_called_with("%Y-%m-%d")
 
-    def test_parse_baseline_results_nonexistent_file(self):
+    def test_parse_baseline_results_nonexistent_file(self) -> None:
         """Test parsing baseline results when file doesn't exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2052,7 +2272,7 @@ def test_parse_baseline_results_nonexistent_file(self):
             assert "### Baseline Results" in content
             assert "Error parsing baseline results" in content
 
-    def test_parse_baseline_results_with_data(self):
+    def test_parse_baseline_results_with_data(self) -> None:
         """Test parsing baseline results with actual data."""
         baseline_content = """Date: 2024-01-15 10:30:00 UTC
 Git commit: abc123def456
@@ -2089,7 +2309,7 @@ def test_parse_baseline_results_with_data(self):
             assert "### 3D Triangulation Performance" in markdown_content
             assert "| Points | Time (mean) | Throughput (mean) | Scaling |" in markdown_content
 
-    def test_parse_comparison_results_with_regression(self):
+    def test_parse_comparison_results_with_regression(self) -> None:
         """Test parsing comparison results that show regression."""
         comparison_content = """Performance Comparison Results
 ⚠️  REGRESSION: Time increased by 15.2% (slower performance)
@@ -2112,7 +2332,7 @@ def test_parse_comparison_results_with_regression(self):
             assert "REGRESSION: Time increased by 15.2%" in markdown_content
             assert "IMPROVEMENT: Time decreased by 8.5%" in markdown_content
 
-    def test_parse_comparison_results_no_regression(self):
+    def test_parse_comparison_results_no_regression(self) -> None:
         """Test parsing comparison results with no regression."""
         comparison_content = """Performance Comparison Results
 ✅ OK: Time change +2.1% within acceptable range
@@ -2137,10 +2357,10 @@ def test_parse_comparison_results_no_regression(self):
     @patch("benchmark_utils.get_git_commit_hash")
     @patch("benchmark_utils.run_git_command")
     @patch("benchmark_utils.datetime")
-    def test_generate_markdown_content(self, mock_datetime, mock_run_git, mock_git_commit):
+    def test_generate_markdown_content(self, mock_datetime, mock_run_git, mock_git_commit) -> None:
         """Test generating complete markdown content."""
         # Avoid calling actual git in __init__ helpers
-        mock_run_git.side_effect = Exception("git unavailable in test")
+        mock_run_git.side_effect = RuntimeError("git unavailable in test")
         mock_git_commit.return_value = "abc123def456"
         mock_now = Mock()
         mock_now.strftime.return_value = "2024-01-15 10:30:00 UTC"
@@ -2161,12 +2381,38 @@ def test_generate_markdown_content(self, mock_datetime, mock_run_git, mock_git_c
             assert "## Performance Results Summary" in content
 
             # Check static content sections
-            assert "## Key Findings" in content
-            assert "### Performance Ranking" in content
-            assert "## Recommendations" in content
-            assert "## Performance Data Updates" in content
+            assert PUBLIC_API_TITLE in content
+            assert CIRCUMSPHERE_TITLE in content
+            assert PERFORMANCE_RANKING_TITLE in content
+            assert RECOMMENDATIONS_TITLE in content
+            assert PERFORMANCE_UPDATES_TITLE in content
+
+    def test_get_ci_performance_suite_results(self) -> None:
+        """Test public API summary generation from ci_performance_suite Criterion data."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+
+            write_estimate(project_root / "target", ("tds_new_2d", "tds_new", "10"), 120_000.0)
+            write_estimate(project_root / "target", ("boundary_facets", "boundary_facets_3d_adversarial", "50"), 7_500.0)
+            write_estimate(project_root / "target", ("bistellar_flips_4d", "k2_roundtrip"), 950.0)
+
+            generator = PerformanceSummaryGenerator(project_root)
+            lines = generator._get_ci_performance_suite_results()
+            content = "\n".join(lines)
 
-    def test_get_circumsphere_performance_results(self):
+            assert PUBLIC_API_TITLE in content
+            assert "#### Construction" in content
+            assert "Public API: `DelaunayTriangulation::new_with_options`" in content
+            assert "`tds_new_2d/tds_new/10`" in content
+            assert "well-conditioned" in content
+            assert "#### Boundary facets" in content
+            assert "`boundary_facets/boundary_facets_3d_adversarial/50`" in content
+            assert "| `boundary_facets/boundary_facets_3d_adversarial/50` | 3D | 50 | adversarial |" in content
+            assert "adversarial" in content
+            assert "#### Bistellar flips" in content
+            assert "`bistellar_flips_4d/k2_roundtrip`" in content
+
+    def test_get_circumsphere_performance_results(self) -> None:
         """Test getting circumsphere performance results."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2175,11 +2421,11 @@ def test_get_circumsphere_performance_results(self):
             lines = generator._get_circumsphere_performance_results()
             content = "\n".join(lines)
 
-            assert "### Circumsphere Performance Results" in content
+            assert "### Circumsphere Predicate Performance" in content
             # Should contain fallback performance data when no criterion results exist
             assert "Basic 3D" in content or "Version unknown" in content
 
-    def test_get_update_instructions(self):
+    def test_get_update_instructions(self) -> None:
         """Test getting performance data update instructions."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2188,12 +2434,12 @@ def test_get_update_instructions(self):
             lines = generator._get_update_instructions()
             content = "\n".join(lines)
 
-            assert "## Performance Data Updates" in content
+            assert PERFORMANCE_UPDATES_TITLE in content
             assert "uv run benchmark-utils generate-baseline" in content
             assert "uv run benchmark-utils generate-summary" in content
             assert "PerformanceSummaryGenerator" in content
 
-    def test_parse_numerical_accuracy_output_success(self):
+    def test_parse_numerical_accuracy_output_success(self) -> None:
         """Test parsing numerical accuracy output successfully."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2216,7 +2462,7 @@ def test_parse_numerical_accuracy_output_success(self):
             assert result["distance_lifted"] == "20.3%"
             assert result["all_agree"] == "0.8%"
 
-    def test_parse_numerical_accuracy_output_no_data(self):
+    def test_parse_numerical_accuracy_output_no_data(self) -> None:
         """Test parsing numerical accuracy output with no relevant data."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2230,7 +2476,7 @@ def test_parse_numerical_accuracy_output_no_data(self):
 
             assert result is None
 
-    def test_parse_numerical_accuracy_output_malformed(self):
+    def test_parse_numerical_accuracy_output_malformed(self) -> None:
         """Test parsing numerical accuracy output with malformed data."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2245,9 +2491,9 @@ def test_parse_numerical_accuracy_output_malformed(self):
             assert result is None
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_circumsphere_benchmarks_success(self, mock_cargo):
+    def test_run_circumsphere_benchmarks_success(self, mock_cargo) -> None:
         """Test running circumsphere benchmarks successfully."""
-        mock_cargo.return_value = Mock(stdout="")
+        mock_cargo.return_value = completed_process()
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2271,9 +2517,9 @@ def test_run_circumsphere_benchmarks_success(self, mock_cargo):
             ]
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_circumsphere_benchmarks_uses_requested_cargo_profile(self, mock_cargo):
+    def test_run_circumsphere_benchmarks_uses_requested_cargo_profile(self, mock_cargo) -> None:
         """Test running circumsphere benchmarks with an explicit Cargo profile."""
-        mock_cargo.return_value = Mock(stdout="")
+        mock_cargo.return_value = completed_process()
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2289,17 +2535,18 @@ def test_run_circumsphere_benchmarks_uses_requested_cargo_profile(self, mock_car
             assert args[:5] == ["bench", "--profile", requested_profile, "--bench", "circumsphere_containment"]
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_circumsphere_benchmarks_with_numerical_data(self, mock_cargo):
+    def test_run_circumsphere_benchmarks_with_numerical_data(self, mock_cargo) -> None:
         """Test running circumsphere benchmarks with numerical accuracy data."""
         # Mock cargo command to return output with numerical accuracy data
-        mock_result = Mock()
-        mock_result.stdout = """Running benchmarks...
+        mock_result = completed_process(
+            """Running benchmarks...
 Method Comparisons (1000 total tests):
   insphere vs insphere_distance:  820/1000 (82.0%)
   insphere vs insphere_lifted:  5/1000 (0.5%)
   insphere_distance vs insphere_lifted:  180/1000 (18.0%)
   All three methods agree:  2/1000 (0.2%)
-Benchmark completed."""
+Benchmark completed.""",
+        )
         mock_cargo.return_value = mock_result
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -2328,9 +2575,9 @@ def test_run_circumsphere_benchmarks_with_numerical_data(self, mock_cargo):
             ]
 
     @patch("benchmark_utils.run_cargo_command")
-    def test_run_circumsphere_benchmarks_failure(self, mock_cargo, capsys):
+    def test_run_circumsphere_benchmarks_failure(self, mock_cargo, capsys) -> None:
         """Test handling circumsphere benchmark failures."""
-        mock_cargo.side_effect = Exception("Benchmark failed")
+        mock_cargo.side_effect = RuntimeError("Benchmark failed")
 
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2345,10 +2592,117 @@ def test_run_circumsphere_benchmarks_failure(self, mock_cargo, capsys):
             captured = capsys.readouterr()
             assert "Error running circumsphere benchmarks" in captured.out
 
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_success(self, mock_cargo) -> None:
+        """Test running the public API CI performance suite successfully."""
+        mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            generator = PerformanceSummaryGenerator(project_root)
+
+            success = generator._run_ci_performance_suite()
+
+            assert success is True
+            mock_cargo.assert_called_once()
+            args = mock_cargo.call_args.args[0]
+            assert args[:5] == [
+                "bench",
+                "--profile",
+                TRUSTED_BENCH_PROFILE,
+                "--bench",
+                "ci_performance_suite",
+            ]
+            assert "--" not in args
+            manifest_path = project_root / "target" / "criterion" / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE
+            assert manifest_path.read_text(encoding="utf-8") == "boundary_facets/boundary_facets_3d/50\n"
+
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_uses_requested_cargo_profile(self, mock_cargo) -> None:
+        """Test running the public API CI performance suite with an explicit profile."""
+        mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            generator = PerformanceSummaryGenerator(project_root)
+
+            requested_profile = "release"
+            success = generator._run_ci_performance_suite(cargo_profile=requested_profile)
+
+            assert success is True
+            mock_cargo.assert_called_once()
+            args = mock_cargo.call_args.args[0]
+            assert args[:5] == ["bench", "--profile", requested_profile, "--bench", "ci_performance_suite"]
+            assert "--" not in args
+
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_dev_mode_uses_reduced_sampling(self, mock_cargo) -> None:
+        """Test dev mode appends reduced Criterion sampling args explicitly."""
+        mock_cargo.return_value = completed_process(CI_MANIFEST_STDOUT)
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            generator = PerformanceSummaryGenerator(project_root)
+
+            success = generator._run_ci_performance_suite(use_dev_mode=True)
+
+            assert success is True
+            args = mock_cargo.call_args.args[0]
+            assert "--" in args
+            for arg in DEV_MODE_BENCH_ARGS:
+                assert arg in args
+
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_requires_manifest(self, mock_cargo) -> None:
+        """Test successful ci_performance_suite runs must emit the manifest."""
+        mock_cargo.return_value = completed_process()
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            stale_manifest_path = project_root / "target" / "criterion" / _CI_PERFORMANCE_SUITE_MANIFEST_IDS_FILE
+            stale_manifest_path.parent.mkdir(parents=True)
+            stale_manifest_path.write_text("stale/benchmark/id\n", encoding="utf-8")
+            generator = PerformanceSummaryGenerator(project_root)
+
+            with pytest.raises(RuntimeError, match="emitted no api_benchmark manifest"):
+                generator._run_ci_performance_suite()
+
+            assert stale_manifest_path.read_text(encoding="utf-8") == "stale/benchmark/id\n"
+
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_nonzero_exit(self, mock_cargo, capsys) -> None:
+        """Test handling ci_performance_suite nonzero process exits."""
+        mock_cargo.return_value = completed_process(returncode=101, stderr="benchmark failed")
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            generator = PerformanceSummaryGenerator(project_root)
+
+            success = generator._run_ci_performance_suite()
+
+            assert success is False
+            captured = capsys.readouterr()
+            assert "cargo exited with status 101" in captured.out
+
+    @patch("benchmark_utils.run_cargo_command")
+    def test_run_ci_performance_suite_failure(self, mock_cargo, capsys) -> None:
+        """Test handling ci_performance_suite benchmark failures."""
+        mock_cargo.side_effect = OSError("Benchmark failed")
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_root = Path(temp_dir)
+            generator = PerformanceSummaryGenerator(project_root)
+
+            success = generator._run_ci_performance_suite()
+
+            assert success is False
+            captured = capsys.readouterr()
+            assert "Error running ci_performance_suite benchmarks" in captured.out
+
     @patch("benchmark_utils.run_git_command")
-    def test_generate_summary_success(self, mock_git, capsys):
+    def test_generate_summary_success(self, mock_git, capsys) -> None:
         """Test successful generation of performance summary."""
-        mock_git.side_effect = Exception("git unavailable in test")
+        mock_git.side_effect = RuntimeError("git unavailable in test")
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
             generator = PerformanceSummaryGenerator(project_root)
@@ -2370,8 +2724,10 @@ def test_generate_summary_success(self, mock_git, capsys):
             assert "Generated performance summary" in captured.out
 
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
-    def test_generate_summary_with_benchmarks(self, mock_run_benchmarks):
+    @patch("benchmark_utils.PerformanceSummaryGenerator._run_ci_performance_suite")
+    def test_generate_summary_with_benchmarks(self, mock_run_ci_suite, mock_run_benchmarks) -> None:
         """Test generating summary with fresh benchmark run."""
+        mock_run_ci_suite.return_value = True
         mock_run_benchmarks.return_value = (True, None)
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -2385,12 +2741,15 @@ def test_generate_summary_with_benchmarks(self, mock_run_benchmarks):
             assert success is True
             # When run_benchmarks=True without an explicit profile, generate_summary
             # must default to TRUSTED_BENCH_PROFILE.
+            mock_run_ci_suite.assert_called_once_with(cargo_profile=TRUSTED_BENCH_PROFILE)
             mock_run_benchmarks.assert_called_once_with(cargo_profile=TRUSTED_BENCH_PROFILE)
             assert output_file.exists()
 
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
-    def test_generate_summary_passes_cargo_profile_to_benchmarks(self, mock_run_benchmarks):
+    @patch("benchmark_utils.PerformanceSummaryGenerator._run_ci_performance_suite")
+    def test_generate_summary_passes_cargo_profile_to_benchmarks(self, mock_run_ci_suite, mock_run_benchmarks) -> None:
         """Test generating a summary with fresh benchmarks under a specific Cargo profile."""
+        mock_run_ci_suite.return_value = True
         mock_run_benchmarks.return_value = (True, None)
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -2403,12 +2762,15 @@ def test_generate_summary_passes_cargo_profile_to_benchmarks(self, mock_run_benc
             success = generator.generate_summary(output_path=output_file, run_benchmarks=True, cargo_profile=requested_profile)
 
             assert success is True
+            mock_run_ci_suite.assert_called_once_with(cargo_profile=requested_profile)
             mock_run_benchmarks.assert_called_once_with(cargo_profile=requested_profile)
             assert output_file.exists()
 
     @patch("benchmark_utils.PerformanceSummaryGenerator._run_circumsphere_benchmarks")
-    def test_generate_summary_benchmark_failure_continues(self, mock_run_benchmarks, capsys):
+    @patch("benchmark_utils.PerformanceSummaryGenerator._run_ci_performance_suite")
+    def test_generate_summary_benchmark_failure_continues(self, mock_run_ci_suite, mock_run_benchmarks, capsys) -> None:
         """Test that summary generation continues even if benchmark run fails."""
+        mock_run_ci_suite.return_value = False
         mock_run_benchmarks.return_value = (False, None)
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -2426,7 +2788,7 @@ def test_generate_summary_benchmark_failure_continues(self, mock_run_benchmarks,
             captured = capsys.readouterr()
             assert "Benchmark run failed" in captured.out
 
-    def test_generate_summary_exception_handling(self, capsys):
+    def test_generate_summary_exception_handling(self, capsys) -> None:
         """Test exception handling in generate_summary."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2442,7 +2804,7 @@ def test_generate_summary_exception_handling(self, capsys):
             captured = capsys.readouterr()
             assert "Failed to generate performance summary" in captured.err
 
-    def test_get_static_content(self):
+    def test_get_static_content(self) -> None:
         """Test getting static content sections."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2455,7 +2817,7 @@ def test_get_static_content(self):
             assert "## Implementation Notes" in content
             assert "## Benchmark Structure" in content
 
-    def test_empty_benchmark_results_edge_case(self):
+    def test_empty_benchmark_results_edge_case(self) -> None:
         """Test handling of empty benchmark results (edge case)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2465,7 +2827,7 @@ def test_empty_benchmark_results_edge_case(self):
             results = generator._parse_circumsphere_benchmark_results()
             assert len(results) > 0
 
-    def test_malformed_estimates_json_edge_case(self):
+    def test_malformed_estimates_json_edge_case(self) -> None:
         """Test handling of malformed estimates.json files (edge case)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2483,7 +2845,7 @@ def test_malformed_estimates_json_edge_case(self):
             results = generator._parse_circumsphere_benchmark_results()
             assert len(results) > 0
 
-    def test_missing_git_info_edge_case(self):
+    def test_missing_git_info_edge_case(self) -> None:
         """Test handling when git information is not available (edge case)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2493,8 +2855,8 @@ def test_missing_git_info_edge_case(self):
                 patch("benchmark_utils.run_git_command") as mock_git,
                 patch("benchmark_utils.get_git_commit_hash") as mock_commit,
             ):
-                mock_git.side_effect = Exception("Git not available")
-                mock_commit.side_effect = Exception("Git not available")
+                mock_git.side_effect = RuntimeError("Git not available")
+                mock_commit.side_effect = RuntimeError("Git not available")
 
                 generator = PerformanceSummaryGenerator(project_root)
                 success = generator.generate_summary(output_file)
@@ -2505,7 +2867,7 @@ def test_missing_git_info_edge_case(self):
                 content = output_file.read_text()
                 assert "Version unknown" in content
 
-    def test_baseline_fallback_behavior_edge_case(self):
+    def test_baseline_fallback_behavior_edge_case(self) -> None:
         """Test baseline file fallback from primary to secondary location (edge case)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2551,7 +2913,7 @@ def test_baseline_fallback_behavior_edge_case(self):
                 # Performance data "1000 Points (3D)" would come from benchmark parsing,
                 # not baseline parsing. The important test is that the fallback file is read.
 
-    def test_full_generation_workflow_integration(self):
+    def test_full_generation_workflow_integration(self) -> None:
         """Test complete summary generation workflow (integration test)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2592,12 +2954,13 @@ def test_full_generation_workflow_integration(self):
                 assert "Single Query Performance (3D)" in content
                 assert "Triangulation Data Structure Performance" in content
                 assert "Performance Status: Good" in content
-                assert "Key Findings" in content
-                assert "Performance Ranking" in content
-                assert "Recommendations" in content
-                assert "Performance Data Updates" in content
+                assert PUBLIC_API_TITLE.removeprefix("### ") in content
+                assert CIRCUMSPHERE_TITLE.removeprefix("## ") in content
+                assert PERFORMANCE_RANKING_TITLE.removeprefix("### ") in content
+                assert RECOMMENDATIONS_TITLE.removeprefix("### ") in content
+                assert PERFORMANCE_UPDATES_TITLE.removeprefix("## ") in content
 
-    def test_dimension_sorting_numeric_order(self):
+    def test_dimension_sorting_numeric_order(self) -> None:
         """Test that dimensions are sorted numerically, not lexically."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2633,7 +2996,7 @@ def test_dimension_sorting_numeric_order(self):
                 assert "Test9" in content  # 9D test case
                 assert "Test10" in content  # 10D test case
 
-    def test_hardware_metadata_parsing_with_cores(self):
+    def test_hardware_metadata_parsing_with_cores(self) -> None:
         """Test that hardware metadata parsing includes cores and guards against IndexError."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2680,7 +3043,7 @@ def test_hardware_metadata_parsing_with_cores(self):
             assert "Apple M4 Max" in content
             assert "(" not in content.split("Apple M4 Max")[1].split("\n")[0] if "Apple M4 Max" in content else True
 
-    def test_dev_mode_args_consistency(self):
+    def test_dev_mode_args_consistency(self) -> None:
         """Test that DEV_MODE_BENCH_ARGS is used consistently."""
         # Verify the constant exists and has expected structure
         assert isinstance(DEV_MODE_BENCH_ARGS, list)
@@ -2692,7 +3055,7 @@ def test_dev_mode_args_consistency(self):
         # with pairs of argument name and value
         assert len(DEV_MODE_BENCH_ARGS) >= 6  # At least 3 arg-value pairs
 
-    def test_numerical_accuracy_phrasing_flexibility(self):
+    def test_numerical_accuracy_phrasing_flexibility(self) -> None:
         """Test that numerical accuracy section doesn't hardcode sample size."""
         with tempfile.TemporaryDirectory() as temp_dir:
             project_root = Path(temp_dir)
@@ -2710,7 +3073,7 @@ def test_numerical_accuracy_phrasing_flexibility(self):
 class TestTagSpecificBaselineHandling:
     """Test cases for tag-specific baseline file handling functionality."""
 
-    def test_prepare_baseline_with_tag_specific_file(self, capsys):
+    def test_prepare_baseline_with_tag_specific_file(self, capsys) -> None:
         """Test baseline preparation with tag-specific file (baseline-v*.txt)."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -2762,7 +3125,7 @@ def test_prepare_baseline_with_tag_specific_file(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_with_generic_baseline_file(self, capsys):
+    def test_prepare_baseline_with_generic_baseline_file(self, capsys) -> None:
         """Test baseline preparation with generic baseline*.txt file."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -2809,7 +3172,7 @@ def test_prepare_baseline_with_generic_baseline_file(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_prefers_standard_name(self, capsys):
+    def test_prepare_baseline_prefers_standard_name(self, capsys) -> None:
         """Test that prepare_baseline prefers baseline_results.txt over tag-specific files."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -2851,7 +3214,7 @@ def test_prepare_baseline_prefers_standard_name(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_prepare_baseline_no_matching_files(self, capsys):
+    def test_prepare_baseline_no_matching_files(self, capsys) -> None:
         """Test baseline preparation when no matching baseline files are found."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -2884,7 +3247,7 @@ def test_prepare_baseline_no_matching_files(self, capsys):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_from_tag_file(self):
+    def test_extract_baseline_commit_from_tag_file(self) -> None:
         """Test extracting commit SHA from tag-specific baseline file."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -2916,7 +3279,7 @@ def test_extract_baseline_commit_from_tag_file(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_fallback_to_metadata(self):
+    def test_extract_baseline_commit_fallback_to_metadata(self) -> None:
         """Test extracting commit SHA from metadata.json when baseline files have no commit info."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -2954,7 +3317,7 @@ def test_extract_baseline_commit_fallback_to_metadata(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_extract_baseline_commit_handles_multiple_tag_files(self):
+    def test_extract_baseline_commit_handles_multiple_tag_files(self) -> None:
         """Test that extract_baseline_commit selects the highest semver tag file when multiple exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -2988,7 +3351,7 @@ def test_extract_baseline_commit_handles_multiple_tag_files(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_semver_prefers_stable_over_prerelease(self):
+    def test_semver_prefers_stable_over_prerelease(self) -> None:
         """Test that stable releases are preferred over pre-releases of the same version."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3007,7 +3370,7 @@ def test_semver_prefers_stable_over_prerelease(self):
             assert selected is not None
             assert selected.name == "baseline-v1.2.3.txt"
 
-    def test_semver_v043_vs_v043_beta1_preference(self):
+    def test_semver_v043_vs_v043_beta1_preference(self) -> None:
         """Test specific case: v0.4.3 is preferred over v0.4.3-beta.1."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3029,7 +3392,7 @@ def test_semver_v043_vs_v043_beta1_preference(self):
             assert "stable043" in content
             assert "Tag: v0.4.3" in content
 
-    def test_semver_prefers_higher_prerelease_when_no_stable(self):
+    def test_semver_prefers_higher_prerelease_when_no_stable(self) -> None:
         """Test that higher pre-release is selected when only pre-releases exist."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3049,7 +3412,7 @@ def test_semver_prefers_higher_prerelease_when_no_stable(self):
             # Current behavior: lexicographic prerelease ordering; expect beta.2 to win
             assert selected.name == "baseline-v1.2.3-beta.2.txt"
 
-    def test_baseline_commit_source_from_baseline_file(self):
+    def test_baseline_commit_source_from_baseline_file(self) -> None:
         """Test that BASELINE_COMMIT_SOURCE is 'baseline' when commit is extracted from baseline file."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3078,7 +3441,7 @@ def test_baseline_commit_source_from_baseline_file(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_baseline_commit_source_from_metadata_file(self):
+    def test_baseline_commit_source_from_metadata_file(self) -> None:
         """Test that BASELINE_COMMIT_SOURCE is 'metadata' when commit is extracted from metadata.json."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3109,7 +3472,7 @@ def test_baseline_commit_source_from_metadata_file(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_baseline_commit_source_unknown_when_no_commit_found(self):
+    def test_baseline_commit_source_unknown_when_no_commit_found(self) -> None:
         """Test that BASELINE_COMMIT_SOURCE is 'unknown' when no commit is found anywhere."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3132,7 +3495,7 @@ def test_baseline_commit_source_unknown_when_no_commit_found(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_env_vars_mirrored_to_current_process(self):
+    def test_env_vars_mirrored_to_current_process(self) -> None:
         """Test that write_github_env_vars mirrors variables into current process."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
             env_path = env_file.name
@@ -3166,7 +3529,7 @@ def test_env_vars_mirrored_to_current_process(self):
             for key in ["TEST_BASELINE_EXISTS", "TEST_BASELINE_SOURCE"]:
                 os.environ.pop(key, None)
 
-    def test_env_vars_multiline_handling(self):
+    def test_env_vars_multiline_handling(self) -> None:
         """Test that write_github_env_vars correctly handles multiline values with heredoc format."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
             env_path = env_file.name
@@ -3215,7 +3578,7 @@ def test_env_vars_multiline_handling(self):
             for key in ["TEST_MULTILINE", "TEST_SINGLE_LINE", "TEST_WITH_CR"]:
                 os.environ.pop(key, None)
 
-    def test_env_vars_none_value_handling(self):
+    def test_env_vars_none_value_handling(self) -> None:
         """Test that write_github_env_vars correctly handles None values without errors."""
         with tempfile.NamedTemporaryFile(mode="w", delete=False) as env_file:
             env_path = env_file.name
@@ -3250,7 +3613,7 @@ def test_env_vars_none_value_handling(self):
             for key in ["TEST_NONE", "TEST_NORMAL"]:
                 os.environ.pop(key, None)
 
-    def test_baseline_tag_sanitization(self):
+    def test_baseline_tag_sanitization(self) -> None:
         """Test that BASELINE_TAG is sanitized before being exported to GITHUB_ENV."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3291,7 +3654,7 @@ def test_baseline_tag_sanitization(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_baseline_tag_length_capping(self):
+    def test_baseline_tag_length_capping(self) -> None:
         """Test that BASELINE_TAG is capped at 64 characters."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3334,7 +3697,7 @@ def test_baseline_tag_length_capping(self):
             finally:
                 Path(env_path).unlink(missing_ok=True)
 
-    def test_packaging_version_complex_comparisons(self):
+    def test_packaging_version_complex_comparisons(self) -> None:
         """Test that packaging.version handles complex version comparisons correctly."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3360,7 +3723,7 @@ def test_packaging_version_complex_comparisons(self):
             assert selected is not None
             assert selected.name == "baseline-v2.0.0.txt"
 
-    def test_packaging_version_invalid_versions(self):
+    def test_packaging_version_invalid_versions(self) -> None:
         """Test that invalid version formats are handled gracefully."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3382,7 +3745,7 @@ def test_packaging_version_invalid_versions(self):
             assert selected.name == "baseline-v1.2.txt"
             assert "Valid 1.2.0 content" in selected.read_text()
 
-    def test_packaging_version_truly_invalid_versions(self):
+    def test_packaging_version_truly_invalid_versions(self) -> None:
         """Test that truly invalid version formats fall back to generic baseline selection."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3404,7 +3767,7 @@ def test_packaging_version_truly_invalid_versions(self):
             assert selected.name == "baseline_results.txt"
             assert "Generic baseline content" in selected.read_text()
 
-    def test_generic_baseline_prefers_newest_mtime(self):
+    def test_generic_baseline_prefers_newest_mtime(self) -> None:
         """Test that generic baseline files are selected by most recent mtime."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3429,7 +3792,7 @@ def test_generic_baseline_prefers_newest_mtime(self):
             assert selected.name == "baseline-newer.txt"
             assert "Newer baseline content" in selected.read_text()
 
-    def test_prerelease_detection_fix_validation(self):
+    def test_prerelease_detection_fix_validation(self) -> None:
         """Test that prerelease detection correctly identifies stable vs prerelease versions."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
@@ -3447,7 +3810,7 @@ def test_prerelease_detection_fix_validation(self):
             assert selected.name == "baseline-v1.0.0.txt"
             assert "Stable content" in selected.read_text()
 
-    def test_prepare_baseline_and_extract_commit_integration(self):
+    def test_prepare_baseline_and_extract_commit_integration(self) -> None:
         """Test the integration between prepare_baseline and extract_baseline_commit."""
         with tempfile.TemporaryDirectory() as temp_dir:
             baseline_dir = Path(temp_dir)
diff --git a/scripts/tests/test_compare_storage_backends.py b/scripts/tests/test_compare_storage_backends.py
index 3fa981dc..62468ef3 100644
--- a/scripts/tests/test_compare_storage_backends.py
+++ b/scripts/tests/test_compare_storage_backends.py
@@ -21,7 +21,7 @@
 
 
 @pytest.fixture
-def temp_project_root(tmp_path):
+def temp_project_root(tmp_path) -> Path:
     """Create a temporary project root with necessary directories."""
     project_root = tmp_path / "project"
     project_root.mkdir()
@@ -38,13 +38,13 @@ def temp_project_root(tmp_path):
 
 
 @pytest.fixture
-def comparator(temp_project_root):
+def comparator(temp_project_root) -> StorageBackendComparator:
     """Create a StorageBackendComparator instance with temp project root."""
     return StorageBackendComparator(temp_project_root)
 
 
 @pytest.fixture
-def sample_criterion_json():
+def sample_criterion_json() -> dict[str, object]:
     """Sample Criterion estimates.json data."""
     return {
         "mean": {
@@ -58,7 +58,7 @@ def sample_criterion_json():
 
 
 @pytest.fixture
-def sample_criterion_stdout():
+def sample_criterion_stdout() -> str:
     """Sample Criterion stdout output for regex parsing."""
     return """
 Running benchmarks...
@@ -73,7 +73,7 @@ def sample_criterion_stdout():
 
 
 @pytest.fixture
-def completed_ok():
+def completed_ok() -> CompletedProcess[str]:
     """Reusable fixture for successful cargo bench results."""
     return CompletedProcess(
         args=["cargo", "bench"],
@@ -86,14 +86,14 @@ def completed_ok():
 class TestStorageBackendComparator:
     """Test cases for StorageBackendComparator class."""
 
-    def test_init(self, temp_project_root):
+    def test_init(self, temp_project_root) -> None:
         """Test comparator initialization."""
         comparator = StorageBackendComparator(temp_project_root)
 
         assert comparator.project_root == temp_project_root
         assert comparator.criterion_dir == temp_project_root / "target" / "criterion"
 
-    def test_parse_criterion_output_json_success(self, comparator, sample_criterion_json):
+    def test_parse_criterion_output_json_success(self, comparator, sample_criterion_json) -> None:
         """Test parsing Criterion output from JSON files."""
         # Create fake criterion directory structure
         bench_dir = comparator.criterion_dir / "construction" / "2D" / "1000v"
@@ -118,7 +118,7 @@ def test_parse_criterion_output_json_success(self, comparator, sample_criterion_
         assert bench["lower"] == 145000000.0
         assert bench["upper"] == 155000000.0
 
-    def test_parse_criterion_output_regex_fallback(self, comparator, sample_criterion_stdout):
+    def test_parse_criterion_output_regex_fallback(self, comparator, sample_criterion_stdout) -> None:
         """Test parsing Criterion output using regex fallback when JSON unavailable."""
         results = comparator._parse_criterion_output(sample_criterion_stdout)
 
@@ -145,7 +145,7 @@ def test_parse_criterion_output_regex_fallback(self, comparator, sample_criterio
         assert bench3["estimate"] == 9.012
         assert bench3["unit"] == "ms"
 
-    def test_parse_criterion_output_empty(self, comparator):
+    def test_parse_criterion_output_empty(self, comparator) -> None:
         """Test parsing empty Criterion output."""
         results = comparator._parse_criterion_output("")
 
@@ -153,7 +153,7 @@ def test_parse_criterion_output_empty(self, comparator):
         assert len(results["benchmarks"]) == 0
         assert "raw_output" in results
 
-    def test_build_comparison_table_basic(self, comparator):
+    def test_build_comparison_table_basic(self, comparator) -> None:
         """Test building comparison table with matching benchmarks."""
         slotmap_by_name = {
             "test1": {"estimate": 100.0, "unit": "ms"},
@@ -186,7 +186,7 @@ def test_build_comparison_table_basic(self, comparator):
         assert "+10.0%" in lines[1]
         assert "SlotMap" in lines[1]
 
-    def test_build_comparison_table_similar_performance(self, comparator):
+    def test_build_comparison_table_similar_performance(self, comparator) -> None:
         """Test comparison table with similar performance (< 2% difference)."""
         slotmap_by_name = {
             "test": {"estimate": 100.0, "unit": "ms"},
@@ -203,7 +203,7 @@ def test_build_comparison_table_similar_performance(self, comparator):
         assert len(lines) == 1
         assert "~Same" in lines[0]
 
-    def test_build_comparison_table_missing_data(self, comparator):
+    def test_build_comparison_table_missing_data(self, comparator) -> None:
         """Test comparison table with missing data for one backend."""
         slotmap_by_name = {
             "test1": {"estimate": 100.0, "unit": "ms"},
@@ -226,7 +226,7 @@ def test_build_comparison_table_missing_data(self, comparator):
         assert "N/A" in lines[1]
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_benchmark_success(self, mock_run_cargo, comparator, completed_ok):
+    def test_run_benchmark_success(self, mock_run_cargo, comparator, completed_ok) -> None:
         """Test successful benchmark execution."""
         mock_run_cargo.return_value = completed_ok
 
@@ -244,7 +244,7 @@ def test_run_benchmark_success(self, mock_run_cargo, comparator, completed_ok):
         assert args[5] == "test_bench"
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_benchmark_with_dense_slotmap(self, mock_run_cargo, comparator, completed_ok):
+    def test_run_benchmark_with_dense_slotmap(self, mock_run_cargo, comparator, completed_ok) -> None:
         """Test benchmark execution with DenseSlotMap feature."""
         mock_run_cargo.return_value = completed_ok
 
@@ -263,7 +263,7 @@ def test_run_benchmark_with_dense_slotmap(self, mock_run_cargo, comparator, comp
         assert "dense-slotmap" in args
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_benchmark_dev_mode(self, mock_run_cargo, comparator, completed_ok):
+    def test_run_benchmark_dev_mode(self, mock_run_cargo, comparator, completed_ok) -> None:
         """Test benchmark execution in development mode."""
         mock_run_cargo.return_value = completed_ok
 
@@ -281,7 +281,7 @@ def test_run_benchmark_dev_mode(self, mock_run_cargo, comparator, completed_ok):
         assert "--noplot" in args
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_benchmark_with_extra_args(self, mock_run_cargo, comparator, completed_ok):
+    def test_run_benchmark_with_extra_args(self, mock_run_cargo, comparator, completed_ok) -> None:
         """Test benchmark execution with extra arguments."""
         mock_run_cargo.return_value = completed_ok
 
@@ -301,7 +301,7 @@ def test_run_benchmark_with_extra_args(self, mock_run_cargo, comparator, complet
         assert "construction" in args
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_benchmark_failure(self, mock_run_cargo, comparator, capsys):
+    def test_run_benchmark_failure(self, mock_run_cargo, comparator, capsys) -> None:
         """Test benchmark execution failure handling."""
         # Mock failed cargo bench run
         mock_result = CompletedProcess(
@@ -321,7 +321,7 @@ def test_run_benchmark_failure(self, mock_run_cargo, comparator, capsys):
         assert "Benchmark failed" in captured.err
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_comparison_success(self, mock_run_cargo, comparator, tmp_path):
+    def test_run_comparison_success(self, mock_run_cargo, comparator, tmp_path) -> None:
         """Test full comparison workflow success."""
         # Mock successful benchmark runs for both backends
         mock_result = CompletedProcess(
@@ -350,7 +350,7 @@ def test_run_comparison_success(self, mock_run_cargo, comparator, tmp_path):
         assert "DenseSlotMap" in report
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_comparison_slotmap_failure(self, mock_run_cargo, comparator):
+    def test_run_comparison_slotmap_failure(self, mock_run_cargo, comparator) -> None:
         """Test comparison when SlotMap benchmark fails."""
         # Mock failed SlotMap run
         mock_result = CompletedProcess(
@@ -366,7 +366,7 @@ def test_run_comparison_slotmap_failure(self, mock_run_cargo, comparator):
         assert success is False
 
     @patch("compare_storage_backends.run_cargo_command")
-    def test_run_comparison_denseslotmap_failure(self, mock_run_cargo, comparator):
+    def test_run_comparison_denseslotmap_failure(self, mock_run_cargo, comparator) -> None:
         """Test comparison when DenseSlotMap benchmark fails."""
         # Mock successful SlotMap, failed DenseSlotMap
         mock_run_cargo.side_effect = [
@@ -390,7 +390,7 @@ def test_run_comparison_denseslotmap_failure(self, mock_run_cargo, comparator):
 
         assert success is False
 
-    def test_generate_comparison_report_structure(self, comparator):
+    def test_generate_comparison_report_structure(self, comparator) -> None:
         """Test comparison report generation structure."""
         slotmap_results = {
             "backend": "SlotMap",
@@ -431,7 +431,7 @@ def test_generate_comparison_report_structure(self, comparator):
 class TestIntegration:
     """Integration tests for compare_storage_backends module."""
 
-    def test_find_project_root_integration(self, tmp_path):
+    def test_find_project_root_integration(self, tmp_path) -> None:
         """Test integration with find_project_root utility."""
         # Create a fake project with Cargo.toml
         project_root = tmp_path / "project"
diff --git a/scripts/tests/test_hardware_utils.py b/scripts/tests/test_hardware_utils.py
index f36a1e8a..b9f8596f 100644
--- a/scripts/tests/test_hardware_utils.py
+++ b/scripts/tests/test_hardware_utils.py
@@ -8,7 +8,7 @@
 
 import platform
 import subprocess
-from unittest.mock import Mock, mock_open, patch
+from unittest.mock import mock_open, patch
 
 import pytest
 
@@ -16,7 +16,7 @@
 
 
 @pytest.fixture
-def hardware():
+def hardware() -> HardwareInfo:
     """Fixture for HardwareInfo instance."""
     return HardwareInfo()
 
@@ -24,29 +24,32 @@ def hardware():
 class TestHardwareInfo:
     """Test cases for HardwareInfo class."""
 
-    def test_init(self, hardware):
+    def test_init(self, hardware) -> None:
         """Test HardwareInfo initialization."""
         assert hardware.os_type == platform.system()
         assert hardware.machine == platform.machine()
 
     @patch("hardware_utils.platform.system")
-    def test_init_with_different_os(self, mock_system):
+    def test_init_with_different_os(self, mock_system) -> None:
         """Test initialization with different OS types."""
         mock_system.return_value = "Linux"
         hardware = HardwareInfo()
         assert hardware.os_type == "Linux"
 
-    def test_run_command_empty_cmd(self, hardware):
+    def test_run_command_empty_cmd(self, hardware) -> None:
         """Test _run_command with empty command list."""
         with pytest.raises(ValueError, match="Command list cannot be empty"):
             hardware._run_command([])
 
     @patch("hardware_utils.run_safe_command")
-    def test_run_command_success(self, mock_run_safe, hardware):
+    def test_run_command_success(self, mock_run_safe, hardware) -> None:
         """Test successful command execution."""
-        mock_result = Mock()
-        mock_result.stdout = "test output\n"
-        mock_run_safe.return_value = mock_result
+        mock_run_safe.return_value = subprocess.CompletedProcess(
+            args=["echo", "test"],
+            returncode=0,
+            stdout="test output\n",
+            stderr="",
+        )
 
         result = hardware._run_command(["echo", "test"])
 
@@ -61,7 +64,7 @@ def test_run_command_success(self, mock_run_safe, hardware):
         )
 
     @patch("hardware_utils.run_safe_command")
-    def test_run_command_failure(self, mock_run_safe, hardware):
+    def test_run_command_failure(self, mock_run_safe, hardware) -> None:
         """Test command execution failure."""
         mock_run_safe.side_effect = subprocess.CalledProcessError(1, "cmd")
 
@@ -70,7 +73,7 @@ def test_run_command_failure(self, mock_run_safe, hardware):
 
     @patch("hardware_utils.platform.system")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_cpu_info_darwin(self, mock_run_command, mock_system):
+    def test_get_cpu_info_darwin(self, mock_run_command, mock_system) -> None:
         """Test CPU info detection on macOS."""
         mock_system.return_value = "Darwin"
         mock_run_command.side_effect = ["Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz", "6", "12"]
@@ -85,7 +88,7 @@ def test_get_cpu_info_darwin(self, mock_run_command, mock_system):
     @patch("hardware_utils.platform.system")
     @patch("hardware_utils.shutil.which")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_cpu_info_linux_with_lscpu(self, mock_run_command, mock_which, mock_system):
+    def test_get_cpu_info_linux_with_lscpu(self, mock_run_command, mock_which, mock_system) -> None:
         """Test CPU info detection on Linux with lscpu available."""
         mock_system.return_value = "Linux"
         mock_which.side_effect = lambda cmd: cmd in ["lscpu", "nproc"]
@@ -113,7 +116,7 @@ def test_get_cpu_info_linux_with_lscpu(self, mock_run_command, mock_which, mock_
     @patch("hardware_utils.platform.system")
     @patch("hardware_utils.shutil.which")
     @patch("builtins.open", new_callable=mock_open, read_data="processor\t: 0\nmodel name\t: AMD Ryzen 5 3600\nprocessor\t: 1\n")
-    def test_get_cpu_info_linux_fallback_cpuinfo(self, _mock_file, mock_which, mock_system):  # noqa: PT019
+    def test_get_cpu_info_linux_fallback_cpuinfo(self, _mock_file, mock_which, mock_system) -> None:  # noqa: PT019
         """Test CPU info detection on Linux using /proc/cpuinfo fallback."""
         mock_system.return_value = "Linux"
         mock_which.return_value = None  # No commands available
@@ -128,7 +131,7 @@ def test_get_cpu_info_linux_fallback_cpuinfo(self, _mock_file, mock_which, mock_
     @patch("hardware_utils.platform.system")
     @patch("hardware_utils.shutil.which")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_cpu_info_windows(self, mock_run_command, mock_which, mock_system):
+    def test_get_cpu_info_windows(self, mock_run_command, mock_which, mock_system) -> None:
         """Test CPU info detection on Windows."""
         mock_system.return_value = "Windows"
         mock_which.side_effect = lambda cmd: cmd == "powershell"
@@ -143,7 +146,7 @@ def test_get_cpu_info_windows(self, mock_run_command, mock_which, mock_system):
         assert cpu_threads == "16"
 
     @patch("hardware_utils.platform.system")
-    def test_get_cpu_info_unknown_os(self, mock_system):
+    def test_get_cpu_info_unknown_os(self, mock_system) -> None:
         """Test CPU info detection on unknown OS."""
         mock_system.return_value = "UnknownOS"
 
@@ -156,7 +159,7 @@ def test_get_cpu_info_unknown_os(self, mock_system):
 
     @patch("hardware_utils.platform.system")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_cpu_info_command_failure(self, mock_run_command, mock_system):
+    def test_get_cpu_info_command_failure(self, mock_run_command, mock_system) -> None:
         """Test CPU info detection when commands fail."""
         mock_system.return_value = "Darwin"
         mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd")
@@ -170,7 +173,7 @@ def test_get_cpu_info_command_failure(self, mock_run_command, mock_system):
 
     @patch("hardware_utils.platform.system")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_memory_info_darwin(self, mock_run_command, mock_system):
+    def test_get_memory_info_darwin(self, mock_run_command, mock_system) -> None:
         """Test memory info detection on macOS."""
         mock_system.return_value = "Darwin"
         mock_run_command.return_value = "17179869184"  # 16 GB in bytes
@@ -182,7 +185,7 @@ def test_get_memory_info_darwin(self, mock_run_command, mock_system):
 
     @patch("hardware_utils.platform.system")
     @patch("builtins.open", new_callable=mock_open, read_data="MemTotal:       16384000 kB\n")
-    def test_get_memory_info_linux(self, _mock_file, mock_system):  # noqa: PT019
+    def test_get_memory_info_linux(self, _mock_file, mock_system) -> None:  # noqa: PT019
         """Test memory info detection on Linux."""
         mock_system.return_value = "Linux"
 
@@ -194,7 +197,7 @@ def test_get_memory_info_linux(self, _mock_file, mock_system):  # noqa: PT019
     @patch("hardware_utils.platform.system")
     @patch("hardware_utils.shutil.which")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_memory_info_windows(self, mock_run_command, mock_which, mock_system):
+    def test_get_memory_info_windows(self, mock_run_command, mock_which, mock_system) -> None:
         """Test memory info detection on Windows."""
         mock_system.return_value = "Windows"
         mock_which.side_effect = lambda cmd: cmd == "powershell"
@@ -206,7 +209,7 @@ def test_get_memory_info_windows(self, mock_run_command, mock_which, mock_system
         assert memory == "32.0 GB"
 
     @patch("hardware_utils.platform.system")
-    def test_get_memory_info_unknown_os(self, mock_system):
+    def test_get_memory_info_unknown_os(self, mock_system) -> None:
         """Test memory info detection on unknown OS."""
         mock_system.return_value = "UnknownOS"
 
@@ -217,7 +220,7 @@ def test_get_memory_info_unknown_os(self, mock_system):
 
     @patch("hardware_utils.shutil.which")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_rust_info_success(self, mock_run_command, mock_which, hardware):
+    def test_get_rust_info_success(self, mock_run_command, mock_which, hardware) -> None:
         """Test Rust info detection when rustc is available."""
         mock_which.return_value = "/usr/bin/rustc"
         mock_run_command.side_effect = ["rustc 1.70.0 (90c541806 2023-05-31)", "rustc 1.70.0 (90c541806 2023-05-31)\nhost: x86_64-apple-darwin\n"]
@@ -228,7 +231,7 @@ def test_get_rust_info_success(self, mock_run_command, mock_which, hardware):
         assert rust_target == "x86_64-apple-darwin"
 
     @patch("hardware_utils.shutil.which")
-    def test_get_rust_info_no_rustc(self, mock_which, hardware):
+    def test_get_rust_info_no_rustc(self, mock_which, hardware) -> None:
         """Test Rust info detection when rustc is not available."""
         mock_which.return_value = None
 
@@ -239,7 +242,7 @@ def test_get_rust_info_no_rustc(self, mock_which, hardware):
 
     @patch("hardware_utils.shutil.which")
     @patch.object(HardwareInfo, "_run_command")
-    def test_get_rust_info_command_failure(self, mock_run_command, mock_which, hardware):
+    def test_get_rust_info_command_failure(self, mock_run_command, mock_which, hardware) -> None:
         """Test Rust info detection when rustc commands fail."""
         mock_which.return_value = "/usr/bin/rustc"
         mock_run_command.side_effect = subprocess.CalledProcessError(1, "cmd")
@@ -249,7 +252,7 @@ def test_get_rust_info_command_failure(self, mock_run_command, mock_which, hardw
         assert rust_version == "Unknown"
         assert rust_target == "Unknown"
 
-    def test_get_hardware_info(self, hardware):
+    def test_get_hardware_info(self, hardware) -> None:
         """Test comprehensive hardware info collection."""
         with (
             patch.object(hardware, "get_cpu_info") as mock_cpu,
@@ -275,7 +278,7 @@ def test_get_hardware_info(self, hardware):
         ("system_name", "expected_os"), [("Darwin", "macOS"), ("Linux", "Linux"), ("Windows", "Windows"), ("FreeBSD", "Unknown (FreeBSD)")]
     )
     @patch("hardware_utils.platform.system")
-    def test_get_hardware_info_os_mapping(self, mock_system, system_name, expected_os):
+    def test_get_hardware_info_os_mapping(self, mock_system, system_name, expected_os) -> None:
         """Test OS name mapping in hardware info."""
         mock_system.return_value = system_name
         hardware = HardwareInfo()
@@ -292,7 +295,7 @@ def test_get_hardware_info_os_mapping(self, mock_system, system_name, expected_o
             info = hardware.get_hardware_info()
             assert info["OS"] == expected_os
 
-    def test_format_hardware_info(self, hardware):
+    def test_format_hardware_info(self, hardware) -> None:
         """Test hardware info formatting."""
         test_info = {
             "OS": "macOS",
@@ -315,7 +318,7 @@ def test_format_hardware_info(self, hardware):
         assert "Rust: rustc 1.70.0" in formatted
         assert "Target: x86_64-apple-darwin" in formatted
 
-    def test_format_hardware_info_none(self, hardware):
+    def test_format_hardware_info_none(self, hardware) -> None:
         """Test hardware info formatting with None input."""
         with patch.object(hardware, "get_hardware_info") as mock_get_info:
             mock_get_info.return_value = {
@@ -335,7 +338,7 @@ def test_format_hardware_info_none(self, hardware):
 class TestHardwareComparator:
     """Test cases for HardwareComparator class."""
 
-    def test_parse_baseline_hardware_complete(self):
+    def test_parse_baseline_hardware_complete(self) -> None:
         """Test parsing complete baseline hardware info."""
         baseline_content = """Benchmark Results
 Generated on: 2023-06-15 10:30:00
@@ -367,7 +370,7 @@ def test_parse_baseline_hardware_complete(self):
 
         assert info == expected
 
-    def test_parse_baseline_hardware_partial(self):
+    def test_parse_baseline_hardware_partial(self) -> None:
         """Test parsing partial baseline hardware info."""
         baseline_content = """Hardware Information:
   OS: Linux
@@ -385,7 +388,7 @@ def test_parse_baseline_hardware_partial(self):
         assert info["CPU_CORES"] == "Unknown"  # Not specified
         assert info["RUST"] == "Unknown"  # Not specified
 
-    def test_parse_baseline_hardware_empty(self):
+    def test_parse_baseline_hardware_empty(self) -> None:
         """Test parsing baseline with no hardware info."""
         baseline_content = "No hardware information found"
 
@@ -395,7 +398,7 @@ def test_parse_baseline_hardware_empty(self):
         for value in info.values():
             assert value == "Unknown"
 
-    def test_compare_hardware_identical(self):
+    def test_compare_hardware_identical(self) -> None:
         """Test hardware comparison with identical configurations."""
         current_info = {
             "OS": "macOS",
@@ -414,7 +417,7 @@ def test_compare_hardware_identical(self):
         assert not has_warnings
         assert "Hardware configurations are compatible" in report
 
-    def test_compare_hardware_different_os(self):
+    def test_compare_hardware_different_os(self) -> None:
         """Test hardware comparison with different OS."""
         current_info = {
             "OS": "Linux",
@@ -434,7 +437,7 @@ def test_compare_hardware_different_os(self):
         assert has_warnings
         assert "OS differs: Linux vs macOS" in report
 
-    def test_compare_hardware_different_cpu(self):
+    def test_compare_hardware_different_cpu(self) -> None:
         """Test hardware comparison with different CPU."""
         current_info = {
             "OS": "Linux",
@@ -455,7 +458,7 @@ def test_compare_hardware_different_cpu(self):
         assert "CPU differs:" in report
         assert "results may not be directly comparable" in report
 
-    def test_compare_hardware_different_cores(self):
+    def test_compare_hardware_different_cores(self) -> None:
         """Test hardware comparison with different core counts."""
         current_info = {
             "OS": "Linux",
@@ -475,7 +478,7 @@ def test_compare_hardware_different_cores(self):
         assert has_warnings
         assert "CPU core count differs: 8 vs 6 cores" in report
 
-    def test_compare_hardware_memory_tolerance(self):
+    def test_compare_hardware_memory_tolerance(self) -> None:
         """Test memory comparison with numeric (percentage-based) tolerance."""
         current_info = {
             "OS": "Linux",
@@ -501,7 +504,7 @@ def test_compare_hardware_memory_tolerance(self):
         assert has_warnings
         assert "Memory differs:" in report
 
-    def test_compare_hardware_unknown_baseline(self):
+    def test_compare_hardware_unknown_baseline(self) -> None:
         """Test hardware comparison with unknown baseline values."""
         current_info = {
             "OS": "Linux",
@@ -543,7 +546,7 @@ def test_compare_hardware_unknown_baseline(self):
             ("", None),
         ],
     )
-    def test_extract_memory_value(self, memory_str, expected):
+    def test_extract_memory_value(self, memory_str, expected) -> None:
         """Test memory value extraction from strings."""
         result = HardwareComparator._extract_memory_value(memory_str)
         if expected is None:
@@ -555,7 +558,7 @@ def test_extract_memory_value(self, memory_str, expected):
 class TestHardwareUtilsIntegration:
     """Integration tests for hardware_utils functionality."""
 
-    def test_real_hardware_info_structure(self):
+    def test_real_hardware_info_structure(self) -> None:
         """Test that real hardware info returns expected structure."""
         hardware = HardwareInfo()
         info = hardware.get_hardware_info()
@@ -567,7 +570,7 @@ def test_real_hardware_info_structure(self):
         for key, value in info.items():
             assert isinstance(value, str), f"Key {key} should have string value"
 
-    def test_cpu_info_returns_tuples(self):
+    def test_cpu_info_returns_tuples(self) -> None:
         """Test that CPU info methods return proper tuple structure."""
         hardware = HardwareInfo()
 
@@ -580,14 +583,14 @@ def test_cpu_info_returns_tuples(self):
         assert isinstance(rust_version, str)
         assert isinstance(rust_target, str)
 
-    def test_memory_info_returns_string(self):
+    def test_memory_info_returns_string(self) -> None:
         """Test that memory info returns a string."""
         hardware = HardwareInfo()
         memory = hardware.get_memory_info()
 
         assert isinstance(memory, str)
 
-    def test_formatted_output_structure(self):
+    def test_formatted_output_structure(self) -> None:
         """Test that formatted output has expected structure."""
         hardware = HardwareInfo()
         formatted = hardware.format_hardware_info()
diff --git a/scripts/tests/test_subprocess_utils.py b/scripts/tests/test_subprocess_utils.py
index 829fec7b..ae24f8b9 100644
--- a/scripts/tests/test_subprocess_utils.py
+++ b/scripts/tests/test_subprocess_utils.py
@@ -16,6 +16,8 @@
 # Add scripts directory to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
+from typing import Never
+
 from subprocess_utils import (
     ExecutableNotFoundError,
     check_git_history,
@@ -34,7 +36,7 @@ class TestGetSafeExecutable:
     """Test get_safe_executable function."""
 
     @pytest.mark.parametrize("command", ["echo", "git", "ls"])
-    def test_finds_existing_executables(self, command):
+    def test_finds_existing_executables(self, command) -> None:
         """Test that it finds common executables."""
         result = get_safe_executable(command)
         assert isinstance(result, str)
@@ -47,7 +49,7 @@ def test_finds_existing_executables(self, command):
             pytest.skip(f"{command} may not be an external executable on Windows")
 
     @pytest.mark.parametrize("fake_command", ["definitely-nonexistent-command-xyz", "fake-command-for-testing", "nonexistent123"])
-    def test_raises_on_nonexistent_executables(self, fake_command):
+    def test_raises_on_nonexistent_executables(self, fake_command) -> None:
         """Test that it raises ExecutableNotFoundError for nonexistent commands."""
         with pytest.raises(ExecutableNotFoundError, match="not found in PATH") as exc_info:
             get_safe_executable(fake_command)
@@ -58,26 +60,26 @@ def test_raises_on_nonexistent_executables(self, fake_command):
 class TestRunGitCommand:
     """Test run_git_command function."""
 
-    def test_git_version(self):
+    def test_git_version(self) -> None:
         """Test basic git command execution."""
         result = run_git_command(["--version"])
         assert result.returncode == 0
         assert "git version" in result.stdout.lower()
         assert isinstance(result.stdout, str)
 
-    def test_git_command_with_custom_params(self):
+    def test_git_command_with_custom_params(self) -> None:
         """Test git command with custom parameters."""
         result = run_git_command(["status", "--porcelain"], check=False)
         # Should not raise even if there are changes (check=False)
         assert isinstance(result.returncode, int)
         assert isinstance(result.stdout, str)
 
-    def test_git_command_failure_handling(self):
+    def test_git_command_failure_handling(self) -> None:
         """Test that failed git commands raise CalledProcessError when check=True."""
         with pytest.raises(subprocess.CalledProcessError):
             run_git_command(["invalid-git-subcommand-xyz"], check=True)
 
-    def test_git_command_no_failure_with_check_false(self):
+    def test_git_command_no_failure_with_check_false(self) -> None:
         """Test that failed git commands don't raise when check=False."""
         result = run_git_command(["invalid-git-subcommand-xyz"], check=False)
         assert result.returncode != 0
@@ -88,7 +90,7 @@ class TestRunCargoCommand:
     """Test run_cargo_command function."""
 
     @pytest.mark.skipif(shutil.which("cargo") is None, reason="cargo not installed in PATH")
-    def test_cargo_version(self):
+    def test_cargo_version(self) -> None:
         """Test basic cargo command execution."""
         result = run_cargo_command(["--version"])
         assert result.returncode == 0
@@ -96,7 +98,7 @@ def test_cargo_version(self):
         assert isinstance(result.stdout, str)
 
     @pytest.mark.skipif(shutil.which("cargo") is None, reason="cargo not installed in PATH")
-    def test_cargo_command_with_custom_params(self):
+    def test_cargo_command_with_custom_params(self) -> None:
         """Test cargo command with custom parameters."""
         result = run_cargo_command(["check", "--dry-run"], check=False)
         assert isinstance(result.returncode, int)
@@ -106,14 +108,14 @@ def test_cargo_command_with_custom_params(self):
 class TestRunSafeCommand:
     """Test run_safe_command function with various scenarios."""
 
-    def test_basic_command_execution(self):
+    def test_basic_command_execution(self) -> None:
         """Test basic command execution with default parameters."""
         result = run_safe_command("echo", ["hello world"])
         assert result.returncode == 0
         assert result.stdout.strip() == "hello world"
         assert isinstance(result.stdout, str)
 
-    def test_secure_defaults_are_applied(self):
+    def test_secure_defaults_are_applied(self) -> None:
         """Test that secure defaults are applied."""
         result = run_safe_command("echo", ["test"])
         # Should use secure defaults:
@@ -123,21 +125,21 @@ def test_secure_defaults_are_applied(self):
         assert isinstance(result.stdout, str)
         assert result.stdout.strip() == "test"
 
-    def test_text_parameter_enforced(self):
+    def test_text_parameter_enforced(self) -> None:
         """Test that text parameter is enforced for security/stability."""
         # run_safe_command enforces text=True for stable CompletedProcess[str] typing
         result = run_safe_command("echo", ["test output"], text=False)  # text=False is ignored
         assert isinstance(result.stdout, str)  # Should still be string
         assert "test output" in result.stdout
 
-    def test_custom_check_parameter(self):
+    def test_custom_check_parameter(self) -> None:
         """Test overriding check parameter."""
         # Command that will fail
         result = run_safe_command("git", ["invalid-git-subcommand-xyz"], check=False)
         assert result.returncode != 0
         # Should not raise because check=False
 
-    def test_custom_capture_output_parameter(self):
+    def test_custom_capture_output_parameter(self) -> None:
         """Test overriding capture_output parameter."""
         if sys.platform.startswith("win"):
             pytest.skip("echo may not be an external executable on Windows")
@@ -145,19 +147,19 @@ def test_custom_capture_output_parameter(self):
         # When capture_output=False, stdout should be None
         assert result.stdout is None
 
-    def test_multiple_custom_parameters(self):
+    def test_multiple_custom_parameters(self) -> None:
         """Test multiple custom parameters at once (text is enforced)."""
         result = run_safe_command("echo", ["multi param test"], text=False, check=False, capture_output=True)
         assert isinstance(result.stdout, str)  # text=False is ignored, still returns string
         assert result.returncode == 0
         assert "multi param test" in result.stdout
 
-    def test_nonexistent_command_raises_error(self):
+    def test_nonexistent_command_raises_error(self) -> None:
         """Test that nonexistent commands raise ExecutableNotFoundError."""
         with pytest.raises(ExecutableNotFoundError):
             run_safe_command("definitely-nonexistent-command", ["arg"])
 
-    def test_additional_kwargs_passed_through(self):
+    def test_additional_kwargs_passed_through(self) -> None:
         """Test that additional kwargs are passed through to subprocess.run."""
         # Test with timeout (a subprocess.run parameter not explicitly handled)
         result = run_safe_command("echo", ["timeout test"], timeout=10)
@@ -168,19 +170,19 @@ def test_additional_kwargs_passed_through(self):
 class TestGitRepositoryFunctions:
     """Test git repository detection functions."""
 
-    def test_check_git_repo_in_git_repo(self):
+    def test_check_git_repo_in_git_repo(self) -> None:
         """Test check_git_repo returns True when in a git repository."""
         if not check_git_repo():
             pytest.skip("Not running inside a git repository")
         assert check_git_repo() is True
 
-    def test_check_git_history_with_history(self):
+    def test_check_git_history_with_history(self) -> None:
         """Test check_git_history returns True when git history exists."""
         if not check_git_history():
             pytest.skip("Repository has no commit history")
         assert check_git_history() is True
 
-    def test_get_git_commit_hash_returns_hash(self):
+    def test_get_git_commit_hash_returns_hash(self) -> None:
         """Test that get_git_commit_hash returns a valid commit hash."""
         commit_hash = get_git_commit_hash()
         assert isinstance(commit_hash, str)
@@ -188,7 +190,7 @@ def test_get_git_commit_hash_returns_hash(self):
         # Should be hexadecimal
         assert all(c in "0123456789abcdef" for c in commit_hash.lower())
 
-    def test_get_git_remote_url_returns_url(self):
+    def test_get_git_remote_url_returns_url(self) -> None:
         """Test that get_git_remote_url returns a valid URL."""
         remotes = run_git_command(["remote"]).stdout.split()
         if "origin" not in remotes:
@@ -203,17 +205,17 @@ def test_get_git_remote_url_returns_url(self):
 class TestErrorHandling:
     """Test error handling and edge cases."""
 
-    def test_executable_not_found_error_attributes(self):
+    def test_executable_not_found_error_attributes(self) -> None:
         """Test ExecutableNotFoundError has proper attributes."""
         error = ExecutableNotFoundError("test message")
         assert str(error) == "test message"
         assert isinstance(error, Exception)
 
-    def test_git_functions_handle_missing_git(self, monkeypatch):
+    def test_git_functions_handle_missing_git(self, monkeypatch) -> None:
         """Test git functions handle missing git executable gracefully."""
 
         # Mock get_safe_executable to raise ExecutableNotFoundError for git
-        def mock_get_safe_executable(command):
+        def mock_get_safe_executable(command) -> str:
             if command == "git":
                 raise ExecutableNotFoundError(f"Required executable '{command}' not found in PATH")
             return "/bin/echo"  # Return echo for other commands
@@ -235,7 +237,7 @@ def mock_get_safe_executable(command):
 class TestSecurityFeatures:
     """Test security-related features of the utilities."""
 
-    def test_uses_full_executable_paths(self):
+    def test_uses_full_executable_paths(self) -> None:
         """Test that commands use full executable paths."""
         # This is implicitly tested by get_safe_executable tests,
         # but let's verify the behavior
@@ -243,7 +245,7 @@ def test_uses_full_executable_paths(self):
         assert Path(git_path).is_absolute()  # Should be absolute path
         assert "git" in git_path
 
-    def test_no_shell_execution(self):
+    def test_no_shell_execution(self) -> None:
         """Test that commands don't use shell=True."""
         # The functions should not use shell=True, which would be a security risk
         # We can't directly test this, but the implementation uses subprocess.run
@@ -252,7 +254,7 @@ def test_no_shell_execution(self):
         # If shell=True was used, this would expand the environment variable
         assert result.stdout.strip() == "$HOME"
 
-    def test_check_parameter_security_default(self):
+    def test_check_parameter_security_default(self) -> None:
         """Test that check=True is the default for security."""
         # Command that will fail should raise by default
         with pytest.raises(subprocess.CalledProcessError):
@@ -266,11 +268,11 @@ def test_check_parameter_security_default(self):
             (run_safe_command, ("echo", ["test"]), {"executable": "/malicious/fake/command"}),
         ],
     )
-    def test_rejects_executable_override(self, function, args, kwargs, monkeypatch):
+    def test_rejects_executable_override(self, function, args, kwargs, monkeypatch) -> None:
         """Test that functions reject executable override for security."""
         called = {"run": False}
 
-        def fake_run(*_a, **_k):
+        def fake_run(*_a, **_k) -> Never:
             called["run"] = True  # should never be set
             msg = "subprocess.run should not be called on override"
             raise AssertionError(msg)
@@ -280,7 +282,7 @@ def fake_run(*_a, **_k):
             function(*args, **kwargs)
         assert called["run"] is False
 
-    def test_run_git_command_with_input_rejects_executable_override(self):
+    def test_run_git_command_with_input_rejects_executable_override(self) -> None:
         """Test that run_git_command_with_input raises ValueError when executable is overridden."""
         with pytest.raises(ValueError, match="Overriding 'executable' is not allowed"):
             run_git_command_with_input(["hash-object", "--stdin"], "test content", executable="/malicious/fake/git")
diff --git a/scripts/tests/test_tag_release.py b/scripts/tests/test_tag_release.py
index d579204b..26561200 100644
--- a/scripts/tests/test_tag_release.py
+++ b/scripts/tests/test_tag_release.py
@@ -14,7 +14,7 @@
 # ---------------------------------------------------------------------------
 
 
-def _fake_remote(url: str):
+def _fake_remote(url: str) -> subprocess.CompletedProcess[str]:
     """Return a mock CompletedProcess whose stdout is *url*."""
     return subprocess.CompletedProcess(args=[], returncode=0, stdout=url + "\n")
 
diff --git a/src/lib.rs b/src/lib.rs
index ef5238fa..0e3d37a4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -47,6 +47,7 @@
 //! | Build a triangulation, insert/remove vertices | `use delaunay::prelude::triangulation::*` |
 //! | Read-only queries, traversal, convex hull | `use delaunay::prelude::query::*` |
 //! | Geometry helpers, predicates, points | `use delaunay::prelude::geometry::*` |
+//! | Random points / triangulations for examples and tests | `use delaunay::prelude::generators::*` |
 //! | Bistellar flips (Pachner moves) | `use delaunay::prelude::triangulation::flips::*` |
 //! | Delaunay repair and flip-based Level 4 validation | `use delaunay::prelude::triangulation::repair::*` |
 //! | Delaunayize workflow (repair + flip) | `use delaunay::prelude::triangulation::delaunayize::*` |
@@ -987,10 +988,6 @@ pub mod prelude {
     /// This is useful if you want a smaller import surface than `delaunay::prelude::*`,
     /// while still having access to the key public APIs typically used in docs/tests/examples/benches.
     ///
-    /// Note: `query` currently also re-exports a few helpers commonly used in
-    /// docs/tests/examples/benches (e.g., random generators). If this grows over time, it may be
-    /// split into more focused modules (e.g., `prelude::generators`).
-    ///
     /// Includes:
     /// - Topology traversal: [`DelaunayTriangulation::edges`], [`DelaunayTriangulation::incident_edges`],
     ///   [`DelaunayTriangulation::cell_neighbors`]
@@ -998,7 +995,6 @@ pub mod prelude {
     /// - Zero-allocation geometry accessors: [`DelaunayTriangulation::vertex_coords`],
     ///   [`DelaunayTriangulation::cell_vertices`]
     /// - Convex hull extraction: [`ConvexHull::from_triangulation`]
-    /// - Test/example helpers: [`generate_random_triangulation`], [`generate_random_points_seeded`]
     pub mod query {
         // Core read-only traversal / adjacency
         pub use crate::core::adjacency::{AdjacencyIndex, AdjacencyIndexBuildError};
@@ -1024,7 +1020,8 @@ pub mod prelude {
         // Read-only algorithms
         pub use crate::geometry::algorithms::convex_hull::ConvexHull;
 
-        // Convenience generators (commonly used in docs/tests/examples/benches)
+        // Convenience generators kept for compatibility with existing docs/tests/examples/benches.
+        // Prefer prelude::generators for new code that only needs fixture data.
         pub use crate::geometry::util::{
             generate_random_points_seeded, generate_random_triangulation,
         };
@@ -1035,6 +1032,33 @@ pub mod prelude {
         // Convenience macro (commonly used in docs/tests/examples) without importing full `prelude::*`.
         pub use crate::vertex;
     }
+
+    /// Focused exports for generating fixture data in doctests, integration tests,
+    /// examples, and benchmarks.
+    ///
+    /// This module is intentionally separate from [`prelude::query`](crate::prelude::query)
+    /// so read-only traversal imports do not need to imply random data generation.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use delaunay::prelude::generators::generate_random_points_seeded;
+    /// use delaunay::prelude::geometry::Point;
+    ///
+    /// let points: Vec<Point<f64, 3>> =
+    ///     generate_random_points_seeded(4, (0.0, 1.0), 42).unwrap();
+    ///
+    /// assert_eq!(points.len(), 4);
+    /// ```
+    pub mod generators {
+        pub use crate::geometry::util::{
+            RandomPointGenerationError, generate_grid_points, generate_poisson_points,
+            generate_random_points, generate_random_points_in_ball,
+            generate_random_points_in_ball_seeded, generate_random_points_periodic,
+            generate_random_points_seeded, generate_random_triangulation,
+            generate_random_triangulation_with_topology_guarantee,
+        };
+    }
     /// Topology validation & analysis utilities.
     pub mod topology {
         /// Topology validation utilities.
@@ -1057,7 +1081,7 @@ pub mod prelude {
 /// Traits are checked at compile time, so this function is only used for
 /// testing.
 #[must_use]
-pub const fn is_normal<T: Sized + Send + Sync + Unpin>() -> bool {
+pub const fn is_normal<T: Send + Sync + Unpin>() -> bool {
     true
 }
 
@@ -1086,9 +1110,13 @@ mod tests {
             RepairQueueOrder, TopologyGuarantee, verify_delaunay_for_triangulation,
             verify_delaunay_via_flip_predicates, vertex,
         },
+        prelude::*,
         triangulation::delaunay::DelaunayTriangulation,
     };
 
+    #[cfg(feature = "count-allocations")]
+    use allocation_counter::measure;
+
     // =============================================================================
     // TYPE SAFETY TESTS
     // =============================================================================
@@ -1117,9 +1145,7 @@ mod tests {
     }
 
     #[test]
-    fn test_prelude_collections_exports() {
-        use crate::prelude::*;
-
+    fn prelude_collections_exports() {
         // Test that we can use the collections from the prelude
         let mut map: FastHashMap<u64, usize> = FastHashMap::default();
         map.insert(123, 456);
@@ -1147,7 +1173,7 @@ mod tests {
     }
 
     #[test]
-    fn test_prelude_triangulation_repair_exports() {
+    fn prelude_repair_exports() {
         let vertices = vec![
             vertex!([0.0, 0.0]),
             vertex!([1.0, 0.0]),
@@ -1181,9 +1207,7 @@ mod tests {
     }
 
     #[test]
-    fn test_prelude_quality_exports() {
-        use crate::prelude::*;
-
+    fn prelude_quality_exports() {
         // Test that quality functions are accessible from prelude
         let vertices = vec![
             vertex!([0.0, 0.0]),
@@ -1206,8 +1230,6 @@ mod tests {
 
     #[test]
     fn test_prelude_kernel_exports() {
-        use crate::prelude::*;
-
         // Test that kernel types and predicates are accessible from prelude
         let fast_kernel = FastKernel::<f64>::new();
         let robust_kernel = RobustKernel::<f64>::new();
@@ -1252,8 +1274,6 @@ mod tests {
 
     #[test]
     fn test_prelude_core_types() {
-        use crate::prelude::*;
-
         // Test that core types are accessible and work from prelude
         // Point construction
         let p1 = Point::new([0.0, 0.0, 0.0]);
@@ -1292,8 +1312,6 @@ mod tests {
 
     #[test]
     fn test_prelude_point_location() {
-        use crate::prelude::*;
-
         // Test that point location algorithms are accessible
         let vertices = vec![
             vertex!([0.0, 0.0]),
@@ -1326,8 +1344,6 @@ mod tests {
 
     #[test]
     fn test_prelude_geometry_types() {
-        use crate::prelude::*;
-
         // Test Point with Coordinate trait
         let p = Point::new([1.0_f64, 2.0_f64, 3.0_f64]);
         assert!((p.coords()[0] - 1.0_f64).abs() < f64::EPSILON);
@@ -1353,8 +1369,6 @@ mod tests {
 
     #[test]
     fn test_prelude_convex_hull() {
-        use crate::prelude::*;
-
         // Test that convex hull operations are accessible
         let vertices = vec![
             vertex!([0.0, 0.0, 0.0]),
@@ -1390,9 +1404,7 @@ mod tests {
     /// Run these with `cargo test allocation_counting --features count-allocations`
     #[cfg(feature = "count-allocations")]
     #[test]
-    fn test_basic_allocation_counting() {
-        use allocation_counter::measure;
-
+    fn basic_alloc_counting() {
         // Test a trivial operation that should not allocate
         let result = measure(|| {
             let x = 1 + 1;
@@ -1428,9 +1440,7 @@ mod tests {
 
     #[cfg(feature = "count-allocations")]
     #[test]
-    fn test_allocation_counting_with_allocating_operation() {
-        use allocation_counter::measure;
-
+    fn alloc_counting_with_vec() {
         // Test an operation that does allocate memory
         let result = measure(|| {
             let _vec: Vec<i32> = vec![1, 2, 3, 4, 5];
diff --git a/tests/prelude_exports.rs b/tests/prelude_exports.rs
new file mode 100644
index 00000000..824e7c42
--- /dev/null
+++ b/tests/prelude_exports.rs
@@ -0,0 +1,38 @@
+//! Public prelude smoke tests.
+//!
+//! These tests intentionally use focused preludes instead of module-internal
+//! paths so doctests, integration tests, examples, and benchmarks have a small
+//! import contract to copy from.
+
+use delaunay::prelude::generators::generate_random_points_seeded;
+use delaunay::prelude::geometry::{AdaptiveKernel, Point};
+use delaunay::prelude::query::ConvexHull;
+use delaunay::prelude::triangulation::flips::{BistellarFlips, TopologyGuarantee};
+use delaunay::prelude::triangulation::{
+    ConstructionOptions, DelaunayTriangulation, InsertionOrderStrategy, Vertex,
+};
+use delaunay::vertex;
+
+const fn assert_bistellar_flips(_: &impl BistellarFlips<AdaptiveKernel<f64>, (), (), 3>) {}
+
+#[test]
+fn preludes_cover_bench_apis() {
+    let _generated_points: Vec<Point<f64, 2>> =
+        generate_random_points_seeded(3, (0.0, 1.0), 42).unwrap();
+
+    let vertices: Vec<Vertex<f64, (), 3>> = vec![
+        vertex!([0.0, 0.0, 0.0]),
+        vertex!([1.0, 0.0, 0.0]),
+        vertex!([0.0, 1.0, 0.0]),
+        vertex!([0.0, 0.0, 1.0]),
+    ];
+    let options =
+        ConstructionOptions::default().with_insertion_order(InsertionOrderStrategy::Input);
+    let dt = DelaunayTriangulation::new_with_options(&vertices, options).unwrap();
+
+    assert_eq!(dt.topology_guarantee(), TopologyGuarantee::PLManifold);
+    assert!(dt.boundary_facets().count() > 0);
+    assert!(ConvexHull::from_triangulation(dt.as_triangulation()).is_ok());
+    assert!(dt.validate().is_ok());
+    assert_bistellar_flips(&dt);
+}