utooland · elrrrrrrr · May 8, 2026 · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/.github/workflows/pm-e2e-bench.yml b/.github/workflows/pm-e2e-bench.yml
@@ -143,6 +143,43 @@ jobs:
           name: utoo-linux-x64
           path: target/x86_64-unknown-linux-gnu/release/utoo
           retention-days: 1
+      # manifest-bench is a standalone HTTP-only fetch sweeper used as
+      # the network-only baseline for p1_resolve perf work. Built only
+      # when phases bench is going to run (label or dispatch), so plain
+      # PR builds aren't slowed by the extra crate.
+      - name: Build manifest-bench (p1 baseline)
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        run: cargo build --release --target x86_64-unknown-linux-gnu -p manifest-bench
+      - name: Upload manifest-bench binary
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        uses: actions/upload-artifact@v4
+        with:
+          name: manifest-bench-linux-x64
+          path: target/x86_64-unknown-linux-gnu/release/manifest-bench
+          retention-days: 1
+      # preload-bench: same HTTP setup as manifest-bench, but discovers
+      # names by walking transitive deps from a package.json root —
+      # tests whether a fully self-contained streaming preload can match
+      # standalone manifest-bench's wall on the same workload that
+      # ruborist's path runs at ~2.18s.
+      - name: Build preload-bench
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        run: cargo build --release --target x86_64-unknown-linux-gnu -p preload-bench
+      - name: Upload preload-bench binary
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        uses: actions/upload-artifact@v4
+        with:
+          name: preload-bench-linux-x64
+          path: target/x86_64-unknown-linux-gnu/release/preload-bench
+          retention-days: 1
       # Piggyback on the already-built target/ from the step above: when the
       # PR is labeled `benchmark`, overlay origin/next's tree onto the current
       # workdir and re-run cargo build. cargo's incremental compile only
@@ -516,6 +553,33 @@ jobs:
           mv /tmp/utoo-next-dist/utoo /tmp/utoo-next
           echo "Baseline utoo (next) version: $(/tmp/utoo-next --version)"
           echo "UTOO_NEXT_BIN=/tmp/utoo-next" >> $GITHUB_ENV
+      # Download the manifest-bench binary built by build-linux. Used as
+      # the network-only baseline for p1_resolve work — strips out parse,
+      # BFS, dedup, lockfile write so the wall is pure HTTP fetch.
+      - name: Download manifest-bench binary
+        uses: actions/download-artifact@v4
+        with:
+          name: manifest-bench-linux-x64
+          path: /tmp/manifest-bench-dist
+      - name: Install manifest-bench
+        run: |
+          chmod +x /tmp/manifest-bench-dist/manifest-bench
+          mv /tmp/manifest-bench-dist/manifest-bench /tmp/manifest-bench
+          echo "MANIFEST_BENCH_BIN=/tmp/manifest-bench" >> $GITHUB_ENV
+      # Self-contained streaming preload bench — same HTTP setup as
+      # manifest-bench but discovers names via transitive walk from a
+      # package.json. Used to test whether a fully-isolated path can
+      # match standalone manifest-bench's wall on the same workload.
+      - name: Download preload-bench binary
+        uses: actions/download-artifact@v4
+        with:
+          name: preload-bench-linux-x64
+          path: /tmp/preload-bench-dist
+      - name: Install preload-bench
+        run: |
+          chmod +x /tmp/preload-bench-dist/preload-bench
+          mv /tmp/preload-bench-dist/preload-bench /tmp/preload-bench
+          echo "PRELOAD_BENCH_BIN=/tmp/preload-bench" >> $GITHUB_ENV
       - name: Verify tools
         run: |
           hyperfine --version
@@ -565,6 +629,91 @@ jobs:
         run: |
           mkdir -p /tmp/pm-bench-output
           bash bench/pm-bench-phases.sh 2>&1 | tee /tmp/pm-bench-output/bench-phases-npmmirror.log
+      # Standalone HTTP-only sweep — sweeps the network-only ceiling
+      # against the same lockfile-derived workload phase-bench just used.
+      # Output goes into the bench logs artifact; no PR comment surface.
+      - name: Standalone manifest-bench (HTTP-only sweep)
+        env:
+          PROJECT: ${{ github.event.inputs.project || 'ant-design' }}
+          REGISTRY: 'https://registry.npmjs.org'
+        run: |
+          set -eu
+          mkdir -p /tmp/pm-bench-output
+          PROJECT_DIR="/tmp/pm-bench/$PROJECT"
+          if [ ! -d "$PROJECT_DIR" ]; then
+            mkdir -p /tmp/pm-bench
+            git clone --depth 1 "https://github.com/ant-design/$PROJECT" "$PROJECT_DIR"
+          fi
+          cd "$PROJECT_DIR"
+          if [ ! -f package-lock.json ]; then
+            echo "==> generating lockfile via utoo (one-shot, untimed)"
+            utoo deps --registry "$REGISTRY" || true
+          fi
+          ls -la package-lock.json || { echo "no lockfile; skipping manifest-bench"; exit 0; }
+
+          MB_LOG=/tmp/pm-bench-output/manifest-bench-npmjs.log
+          {
+            echo "============================================================"
+            echo "manifest-bench: HTTP-only fetch (no parse, no resolver)"
+            echo "  Goal: isolate reqwest/rustls/tokio behaviour from"
+            echo "  ruborist's resolver pipeline. Same metric shape as"
+            echo "  ruborist's p1-breakdown line."
+            echo "============================================================"
+            for CAP in 32 64 96 128 192 256; do
+              echo
+              echo "--- concurrency=$CAP, h1, full manifest, default UA ---"
+              "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+                --concurrency "$CAP" --reps 2 --http1-only || true
+            done
+            echo
+            echo "--- concurrency=128, h2 negotiate, full manifest, default UA ---"
+            "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+              --concurrency 128 --reps 2 || true
+            echo
+            echo "--- concurrency=128, h1, single-version endpoint ---"
+            "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+              --concurrency 128 --reps 2 --http1-only --single-version || true
+            echo
+            echo "--- concurrency=128, h1, UA=Bun/1.2.21 ---"
+            "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+              --concurrency 128 --reps 2 --http1-only --user-agent "Bun/1.2.21" || true
+          } 2>&1 | tee "$MB_LOG"
+      # Self-contained streaming preload (transitive walk from
+      # package.json) — same HTTP setup as manifest-bench but with a
+      # streaming FuturesUnordered + per-future parse. This tests
+      # whether a fully ruborist-independent path can hit standalone
+      # manifest-bench's wall under the same project workload.
+      - name: Standalone preload-bench (transitive walk sweep)
+        env:
+          PROJECT: ${{ github.event.inputs.project || 'ant-design' }}
+          REGISTRY: 'https://registry.npmjs.org'
+        run: |
+          set -eu
+          mkdir -p /tmp/pm-bench-output
+          PROJECT_DIR="/tmp/pm-bench/$PROJECT"
+          if [ ! -d "$PROJECT_DIR" ]; then
+            echo "no project dir; skipping preload-bench"; exit 0
+          fi
+          PJ="$PROJECT_DIR/package.json"
+          if [ ! -f "$PJ" ]; then
+            echo "no package.json; skipping preload-bench"; exit 0
+          fi
+
+          PB_LOG=/tmp/pm-bench-output/preload-bench-npmjs.log
+          {
+            echo "============================================================"
+            echo "preload-bench: streaming transitive-walk preload"
+            echo "  Self-contained (no ruborist deps). Same HTTP setup as"
+            echo "  manifest-bench, but discovers names by walking transitive"
+            echo "  deps from package.json instead of consuming a flat list."
+            echo "============================================================"
+            for CAP in 64 96 128; do
+              echo
+              echo "--- concurrency=$CAP, h1, transitive walk ---"
+              "$PRELOAD_BENCH_BIN" --package-json "$PJ" --registry "$REGISTRY" \
+                --concurrency "$CAP" --reps 4 || true
+            done
+          } 2>&1 | tee "$PB_LOG"
       - name: Upload bench logs
         if: always()
         uses: actions/upload-artifact@v4
@@ -851,17 +1000,69 @@ jobs:
           mv /tmp/utoo-next-dist/utoo /tmp/utoo-next
           echo "Baseline utoo (next) version: $(/tmp/utoo-next --version)"
           echo "UTOO_NEXT_BIN=/tmp/utoo-next" >> $GITHUB_ENV
+      # manifest-bench + preload-bench binaries for pcap-comparing
+      # utoo's TCP-level behaviour against pure-HTTP and
+      # transitive-walk baselines.
+      - name: Download manifest-bench binary
+        uses: actions/download-artifact@v4
+        with:
+          name: manifest-bench-linux-x64
+          path: /tmp/manifest-bench-dist
+      - name: Install manifest-bench
+        run: |
+          chmod +x /tmp/manifest-bench-dist/manifest-bench
+          mv /tmp/manifest-bench-dist/manifest-bench /tmp/manifest-bench
+          echo "MANIFEST_BENCH_BIN=/tmp/manifest-bench" >> $GITHUB_ENV
+      - name: Download preload-bench binary
+        uses: actions/download-artifact@v4
+        with:
+          name: preload-bench-linux-x64
+          path: /tmp/preload-bench-dist
+      - name: Install preload-bench
+        run: |
+          chmod +x /tmp/preload-bench-dist/preload-bench
+          mv /tmp/preload-bench-dist/preload-bench /tmp/preload-bench
+          echo "PRELOAD_BENCH_BIN=/tmp/preload-bench" >> $GITHUB_ENV
       - name: Capture pcap
         env:
           PROJECT: ${{ github.event.inputs.project || 'ant-design' }}
           REGISTRY: 'https://registry.npmjs.org'
         run: |
           chmod +x bench/pm-bench-pcap.sh
           bash bench/pm-bench-pcap.sh
+      # Small artifact (KB scale) with just the per-capture +
+      # aggregated metrics — fast to download for diff analysis,
+      # avoids the 2GB pcap-corpus pull when we only need numbers.
+      - name: Upload pcap summaries
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: pm-bench-pcap-summaries
+          path: |
+            /tmp/pm-bench-pcap/*.json
+            /tmp/pm-bench-pcap/*.log
+            /tmp/pm-bench-pcap/*.iostat.txt
+            /tmp/pm-bench-pcap/dns.txt
+          retention-days: 7
       - name: Upload pcap artifact
         if: always()
         uses: actions/upload-artifact@v4
         with:
           name: pm-bench-pcap
           path: /tmp/pm-bench-pcap
           retention-days: 7
+      # Tiny summary-only artifact for quick comparison without
+      # re-downloading the multi-GB pcap blob. Includes the
+      # tshark-extracted JSON metrics + the pcap.log files (text,
+      # tiny) but no .pcap binaries.
+      - name: Upload pcap summaries (small)
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: pm-bench-pcap-summaries
+          path: |
+            /tmp/pm-bench-pcap/*.json
+            /tmp/pm-bench-pcap/*.log
+            /tmp/pm-bench-pcap/*.iostat.txt
+            /tmp/pm-bench-pcap/dns.txt
+          retention-days: 7