-
Notifications
You must be signed in to change notification settings - Fork 1
feat(eval): add ai-tools-roundup-2026 deterministic fixture bundle #23639
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| # Intel-to-Brief (Deterministic Fixture Run) | ||
|
|
||
| - Scope: public source intake + transcript fixture condensation. | ||
| - Output posture: evidence-backed, deterministic, policy-gated. | ||
| - Non-goals: image generation, site building, social scheduling. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| { | ||
| "title": "AI Tools Roundup 2026: Summit Subsumption Brief", | ||
| "sections": [ | ||
| "Signal framing", | ||
| "Target cluster coverage", | ||
| "Evidence-backed outputs", | ||
| "Governed automation handoff" | ||
| ], | ||
| "evidence": ["EVID:ai-tools-roundup-2026:deck-outline:0001"] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| { | ||
| "item_slug": "ai-tools-roundup-2026", | ||
| "source_type": "social_roundup", | ||
| "source_attribution": "curated_third_party", | ||
| "evidence_prefix": "EVID:ai-tools-roundup-2026", | ||
| "schema_version": "1.0.0", | ||
| "classification": "market-signal" | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| { | ||
| "item_slug": "ai-tools-roundup-2026", | ||
| "fixtures": [ | ||
| {"id": "SRC-0001", "url": "https://example.org/ai-tools-roundup-source-01", "kind": "public_url"}, | ||
| {"id": "SRC-0002", "url": "https://example.org/ai-tools-roundup-source-02", "kind": "public_url"}, | ||
| {"id": "SRC-0003", "url": "https://example.org/ai-tools-roundup-source-03", "kind": "public_url"}, | ||
| {"id": "SRC-0004", "url": "https://example.org/ai-tools-roundup-source-04", "kind": "public_url"}, | ||
| {"id": "SRC-0005", "url": "https://example.org/ai-tools-roundup-source-05", "kind": "public_url"}, | ||
| {"id": "SRC-0006", "url": "https://example.org/ai-tools-roundup-source-06", "kind": "public_url"}, | ||
| {"id": "SRC-0007", "url": "https://example.org/ai-tools-roundup-source-07", "kind": "public_url"}, | ||
| {"id": "SRC-0008", "url": "https://example.org/ai-tools-roundup-source-08", "kind": "public_url"}, | ||
| {"id": "SRC-0009", "url": "https://example.org/ai-tools-roundup-source-09", "kind": "public_url"}, | ||
| {"id": "SRC-0010", "url": "https://example.org/ai-tools-roundup-source-10", "kind": "public_url"}, | ||
| { | ||
| "id": "TRN-0001", | ||
| "kind": "transcript_fixture", | ||
| "path": ".artifacts/subsumption/ai-tools-roundup-2026/fixtures/transcript-fixture.md" | ||
| } | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| # Transcript Fixture | ||
|
|
||
| Meeting summary fixture for deterministic Intel-to-Brief evaluation. | ||
|
|
||
| - Decision: treat social roundup as market signal. | ||
| - Priority: research, summarization, meeting-intel, chat-over-corpus, briefing/deck, automation-handoff. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| { | ||
| "item_slug": "ai-tools-roundup-2026", | ||
| "allowlisted_exports": ["deck_outline_export", "qa_pack_export"], | ||
| "external_execution": false, | ||
| "mode": "dry-run", | ||
| "evidence": ["EVID:ai-tools-roundup-2026:handoff-manifest:0001"] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| { | ||
| "item_slug": "ai-tools-roundup-2026", | ||
| "schema_version": "1.0.0", | ||
| "determinism": { | ||
| "byte_stable_report": true, | ||
| "byte_stable_metrics": true | ||
| }, | ||
| "coverage": { | ||
| "target_clusters_covered": 5, | ||
| "target_clusters_total": 6 | ||
| }, | ||
| "performance_budget": { | ||
| "p50_seconds": 90, | ||
| "p95_seconds": 240, | ||
| "max_cost_usd": 2 | ||
| }, | ||
| "evidence": [ | ||
| "EVID:ai-tools-roundup-2026:metrics:0001" | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| { | ||
| "questions": [ | ||
| "What portion of target clusters are currently covered?", | ||
| "Which outputs are deterministic and replayable?" | ||
| ], | ||
| "answers": [ | ||
| "Current fixture score indicates 5 of 6 target clusters covered.", | ||
| "report.json and metrics.json are flagged byte-stable in deterministic fixture mode." | ||
| ], | ||
| "evidence": ["EVID:ai-tools-roundup-2026:qa-pack:0001"] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| { | ||
| "item_slug": "ai-tools-roundup-2026", | ||
| "schema_version": "1.0.0", | ||
| "evidence": [ | ||
| "EVID:ai-tools-roundup-2026:report:0001", | ||
| "EVID:ai-tools-roundup-2026:report:0002", | ||
| "EVID:ai-tools-roundup-2026:report:0003", | ||
| "EVID:ai-tools-roundup-2026:report:0004" | ||
| ], | ||
| "clusters": [ | ||
| {"name": "research", "status": "covered"}, | ||
| {"name": "summarization", "status": "covered"}, | ||
| {"name": "meeting-intel", "status": "covered"}, | ||
| {"name": "chat-over-corpus", "status": "covered"}, | ||
| {"name": "brief-and-deck", "status": "covered"}, | ||
| {"name": "automation-handoff", "status": "partial"} | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| { | ||
| "$schema": "https://json-schema.org/draft/2020-12/schema", | ||
| "title": "AI Tools Roundup Metrics", | ||
| "type": "object", | ||
| "required": ["item_slug", "schema_version", "determinism", "coverage"], | ||
| "properties": { | ||
| "item_slug": {"type": "string", "const": "ai-tools-roundup-2026"}, | ||
| "schema_version": {"type": "string"}, | ||
| "determinism": { | ||
| "type": "object", | ||
| "required": ["byte_stable_report", "byte_stable_metrics"], | ||
| "properties": { | ||
| "byte_stable_report": {"type": "boolean"}, | ||
| "byte_stable_metrics": {"type": "boolean"} | ||
| } | ||
| }, | ||
| "coverage": { | ||
| "type": "object", | ||
| "required": ["target_clusters_covered", "target_clusters_total"], | ||
| "properties": { | ||
| "target_clusters_covered": {"type": "integer", "minimum": 0}, | ||
| "target_clusters_total": {"type": "integer", "minimum": 1} | ||
| } | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| { | ||
| "$schema": "https://json-schema.org/draft/2020-12/schema", | ||
| "title": "AI Tools Roundup Report", | ||
| "type": "object", | ||
| "required": ["item_slug", "schema_version", "evidence", "clusters"], | ||
| "properties": { | ||
| "item_slug": {"type": "string", "const": "ai-tools-roundup-2026"}, | ||
| "schema_version": {"type": "string"}, | ||
| "evidence": { | ||
| "type": "array", | ||
| "items": {"type": "string", "pattern": "^EVID:ai-tools-roundup-2026:[a-z-]+:[0-9]{4}$"} | ||
| }, | ||
| "clusters": { | ||
| "type": "array", | ||
| "items": { | ||
| "type": "object", | ||
| "required": ["name", "status"], | ||
| "properties": { | ||
| "name": {"type": "string"}, | ||
| "status": {"type": "string", "enum": ["covered", "partial", "out_of_scope"]} | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| { | ||
| "$schema": "https://json-schema.org/draft/2020-12/schema", | ||
| "title": "AI Tools Roundup Stamp", | ||
| "type": "object", | ||
| "required": ["item_slug", "fixture_hash", "git_sha", "schema_version"], | ||
| "properties": { | ||
| "item_slug": {"type": "string", "const": "ai-tools-roundup-2026"}, | ||
| "fixture_hash": {"type": "string"}, | ||
| "git_sha": {"type": "string"}, | ||
| "schema_version": {"type": "string"} | ||
| }, | ||
| "additionalProperties": false | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| { | ||
| "item_slug": "ai-tools-roundup-2026", | ||
| "fixture_hash": "sha256:4ef4d8c91595ef89c91281f3f6fd3265d02ac9e6", | ||
| "git_sha": "workspace-placeholder", | ||
| "schema_version": "1.0.0" | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| import test from 'node:test'; | ||
| import assert from 'node:assert/strict'; | ||
| import { readFileSync } from 'node:fs'; | ||
|
|
||
| const ROOT = '.artifacts/subsumption/ai-tools-roundup-2026'; | ||
|
|
||
| function stable(value: unknown): string { | ||
| if (Array.isArray(value)) { | ||
| return `[${value.map((v) => stable(v)).join(',')}]`; | ||
| } | ||
| if (value && typeof value === 'object') { | ||
| const entries = Object.entries(value as Record<string, unknown>).sort(([a], [b]) => | ||
| a.localeCompare(b), | ||
| ); | ||
| return `{${entries.map(([k, v]) => `${JSON.stringify(k)}:${stable(v)}`).join(',')}}`; | ||
| } | ||
| return JSON.stringify(value); | ||
| } | ||
|
|
||
| function loadJson(path: string) { | ||
| return JSON.parse(readFileSync(path, 'utf8')); | ||
| } | ||
|
|
||
| test('report and metrics are byte-stable for identical fixture input', () => { | ||
| const report = loadJson(`${ROOT}/report.json`); | ||
| const metrics = loadJson(`${ROOT}/metrics.json`); | ||
|
|
||
| const runA = { | ||
| report: stable(report), | ||
| metrics: stable(metrics), | ||
| }; | ||
|
|
||
| const runB = { | ||
| report: stable(loadJson(`${ROOT}/report.json`)), | ||
| metrics: stable(loadJson(`${ROOT}/metrics.json`)), | ||
|
Comment on lines
+34
to
+35
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This test currently compares Useful? React with 👍 / 👎. |
||
| }; | ||
|
|
||
| assert.equal(runA.report, runB.report); | ||
| assert.equal(runA.metrics, runB.metrics); | ||
| }); | ||
|
|
||
| test('stamp contains deterministic keys only', () => { | ||
| const stamp = loadJson(`${ROOT}/stamp.json`); | ||
| assert.deepEqual(Object.keys(stamp).sort(), ['fixture_hash', 'git_sha', 'item_slug', 'schema_version']); | ||
| }); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
fixture_hashvalue is labeled assha256:but only contains 40 hex characters, which is SHA-1-length rather than SHA-256-length. Any downstream verifier that expects a valid SHA-256 digest will reject or mis-handle this stamp, undermining the "machine-verifiable" integrity contract for the fixture bundle.Useful? React with 👍 / 👎.