diff --git a/.github/workflows/pm-e2e-bench.yml b/.github/workflows/pm-e2e-bench.yml
index 74c90ece5..5b219199a 100644
--- a/.github/workflows/pm-e2e-bench.yml
+++ b/.github/workflows/pm-e2e-bench.yml
@@ -143,6 +143,43 @@ jobs:
           name: utoo-linux-x64
           path: target/x86_64-unknown-linux-gnu/release/utoo
           retention-days: 1
+      # manifest-bench is a standalone HTTP-only fetch sweeper used as
+      # the network-only baseline for p1_resolve perf work. Built only
+      # when phases bench is going to run (label or dispatch), so plain
+      # PR builds aren't slowed by the extra crate.
+      - name: Build manifest-bench (p1 baseline)
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        run: cargo build --release --target x86_64-unknown-linux-gnu -p manifest-bench
+      - name: Upload manifest-bench binary
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        uses: actions/upload-artifact@v4
+        with:
+          name: manifest-bench-linux-x64
+          path: target/x86_64-unknown-linux-gnu/release/manifest-bench
+          retention-days: 1
+      # preload-bench: same HTTP setup as manifest-bench, but discovers
+      # names by walking transitive deps from a package.json root —
+      # tests whether a fully self-contained streaming preload can match
+      # standalone manifest-bench's wall on the same workload that
+      # ruborist's path runs at ~2.18s.
+      - name: Build preload-bench
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        run: cargo build --release --target x86_64-unknown-linux-gnu -p preload-bench
+      - name: Upload preload-bench binary
+        if: >
+          (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmark')) ||
+          (github.event_name == 'workflow_dispatch' && (inputs.target == 'pm-bench-phases' || inputs.target == 'pm-bench-pcap'))
+        uses: actions/upload-artifact@v4
+        with:
+          name: preload-bench-linux-x64
+          path: target/x86_64-unknown-linux-gnu/release/preload-bench
+          retention-days: 1
       # Piggyback on the already-built target/ from the step above: when the
       # PR is labeled `benchmark`, overlay origin/next's tree onto the current
       # workdir and re-run cargo build. cargo's incremental compile only
@@ -516,6 +553,33 @@ jobs:
           mv /tmp/utoo-next-dist/utoo /tmp/utoo-next
           echo "Baseline utoo (next) version: $(/tmp/utoo-next --version)"
           echo "UTOO_NEXT_BIN=/tmp/utoo-next" >> $GITHUB_ENV
+      # Download the manifest-bench binary built by build-linux. Used as
+      # the network-only baseline for p1_resolve work — strips out parse,
+      # BFS, dedup, lockfile write so the wall is pure HTTP fetch.
+      - name: Download manifest-bench binary
+        uses: actions/download-artifact@v4
+        with:
+          name: manifest-bench-linux-x64
+          path: /tmp/manifest-bench-dist
+      - name: Install manifest-bench
+        run: |
+          chmod +x /tmp/manifest-bench-dist/manifest-bench
+          mv /tmp/manifest-bench-dist/manifest-bench /tmp/manifest-bench
+          echo "MANIFEST_BENCH_BIN=/tmp/manifest-bench" >> $GITHUB_ENV
+      # Self-contained streaming preload bench — same HTTP setup as
+      # manifest-bench but discovers names via transitive walk from a
+      # package.json. Used to test whether a fully-isolated path can
+      # match standalone manifest-bench's wall on the same workload.
+      - name: Download preload-bench binary
+        uses: actions/download-artifact@v4
+        with:
+          name: preload-bench-linux-x64
+          path: /tmp/preload-bench-dist
+      - name: Install preload-bench
+        run: |
+          chmod +x /tmp/preload-bench-dist/preload-bench
+          mv /tmp/preload-bench-dist/preload-bench /tmp/preload-bench
+          echo "PRELOAD_BENCH_BIN=/tmp/preload-bench" >> $GITHUB_ENV
       - name: Verify tools
         run: |
           hyperfine --version
@@ -565,6 +629,91 @@ jobs:
         run: |
           mkdir -p /tmp/pm-bench-output
           bash bench/pm-bench-phases.sh 2>&1 | tee /tmp/pm-bench-output/bench-phases-npmmirror.log
+      # Standalone HTTP-only sweep — sweeps the network-only ceiling
+      # against the same lockfile-derived workload phase-bench just used.
+      # Output goes into the bench logs artifact; no PR comment surface.
+      - name: Standalone manifest-bench (HTTP-only sweep)
+        env:
+          PROJECT: ${{ github.event.inputs.project || 'ant-design' }}
+          REGISTRY: 'https://registry.npmjs.org'
+        run: |
+          set -eu
+          mkdir -p /tmp/pm-bench-output
+          PROJECT_DIR="/tmp/pm-bench/$PROJECT"
+          if [ ! -d "$PROJECT_DIR" ]; then
+            mkdir -p /tmp/pm-bench
+            git clone --depth 1 "https://github.com/ant-design/$PROJECT" "$PROJECT_DIR"
+          fi
+          cd "$PROJECT_DIR"
+          if [ ! -f package-lock.json ]; then
+            echo "==> generating lockfile via utoo (one-shot, untimed)"
+            utoo deps --registry "$REGISTRY" || true
+          fi
+          ls -la package-lock.json || { echo "no lockfile; skipping manifest-bench"; exit 0; }
+
+          MB_LOG=/tmp/pm-bench-output/manifest-bench-npmjs.log
+          {
+            echo "============================================================"
+            echo "manifest-bench: HTTP-only fetch (no parse, no resolver)"
+            echo "  Goal: isolate reqwest/rustls/tokio behaviour from"
+            echo "  ruborist's resolver pipeline. Same metric shape as"
+            echo "  ruborist's p1-breakdown line."
+            echo "============================================================"
+            for CAP in 32 64 96 128 192 256; do
+              echo
+              echo "--- concurrency=$CAP, h1, full manifest, default UA ---"
+              "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+                --concurrency "$CAP" --reps 2 --http1-only || true
+            done
+            echo
+            echo "--- concurrency=128, h2 negotiate, full manifest, default UA ---"
+            "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+              --concurrency 128 --reps 2 || true
+            echo
+            echo "--- concurrency=128, h1, single-version endpoint ---"
+            "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+              --concurrency 128 --reps 2 --http1-only --single-version || true
+            echo
+            echo "--- concurrency=128, h1, UA=Bun/1.2.21 ---"
+            "$MANIFEST_BENCH_BIN" --lockfile package-lock.json --registry "$REGISTRY" \
+              --concurrency 128 --reps 2 --http1-only --user-agent "Bun/1.2.21" || true
+          } 2>&1 | tee "$MB_LOG"
+      # Self-contained streaming preload (transitive walk from
+      # package.json) — same HTTP setup as manifest-bench but with a
+      # streaming FuturesUnordered + per-future parse. This tests
+      # whether a fully ruborist-independent path can hit standalone
+      # manifest-bench's wall under the same project workload.
+      - name: Standalone preload-bench (transitive walk sweep)
+        env:
+          PROJECT: ${{ github.event.inputs.project || 'ant-design' }}
+          REGISTRY: 'https://registry.npmjs.org'
+        run: |
+          set -eu
+          mkdir -p /tmp/pm-bench-output
+          PROJECT_DIR="/tmp/pm-bench/$PROJECT"
+          if [ ! -d "$PROJECT_DIR" ]; then
+            echo "no project dir; skipping preload-bench"; exit 0
+          fi
+          PJ="$PROJECT_DIR/package.json"
+          if [ ! -f "$PJ" ]; then
+            echo "no package.json; skipping preload-bench"; exit 0
+          fi
+
+          PB_LOG=/tmp/pm-bench-output/preload-bench-npmjs.log
+          {
+            echo "============================================================"
+            echo "preload-bench: streaming transitive-walk preload"
+            echo "  Self-contained (no ruborist deps). Same HTTP setup as"
+            echo "  manifest-bench, but discovers names by walking transitive"
+            echo "  deps from package.json instead of consuming a flat list."
+            echo "============================================================"
+            for CAP in 64 96 128; do
+              echo
+              echo "--- concurrency=$CAP, h1, transitive walk ---"
+              "$PRELOAD_BENCH_BIN" --package-json "$PJ" --registry "$REGISTRY" \
+                --concurrency "$CAP" --reps 4 || true
+            done
+          } 2>&1 | tee "$PB_LOG"
       - name: Upload bench logs
         if: always()
         uses: actions/upload-artifact@v4
@@ -851,6 +1000,29 @@ jobs:
           mv /tmp/utoo-next-dist/utoo /tmp/utoo-next
           echo "Baseline utoo (next) version: $(/tmp/utoo-next --version)"
           echo "UTOO_NEXT_BIN=/tmp/utoo-next" >> $GITHUB_ENV
+      # manifest-bench + preload-bench binaries for pcap-comparing
+      # utoo's TCP-level behaviour against pure-HTTP and
+      # transitive-walk baselines.
+      - name: Download manifest-bench binary
+        uses: actions/download-artifact@v4
+        with:
+          name: manifest-bench-linux-x64
+          path: /tmp/manifest-bench-dist
+      - name: Install manifest-bench
+        run: |
+          chmod +x /tmp/manifest-bench-dist/manifest-bench
+          mv /tmp/manifest-bench-dist/manifest-bench /tmp/manifest-bench
+          echo "MANIFEST_BENCH_BIN=/tmp/manifest-bench" >> $GITHUB_ENV
+      - name: Download preload-bench binary
+        uses: actions/download-artifact@v4
+        with:
+          name: preload-bench-linux-x64
+          path: /tmp/preload-bench-dist
+      - name: Install preload-bench
+        run: |
+          chmod +x /tmp/preload-bench-dist/preload-bench
+          mv /tmp/preload-bench-dist/preload-bench /tmp/preload-bench
+          echo "PRELOAD_BENCH_BIN=/tmp/preload-bench" >> $GITHUB_ENV
       - name: Capture pcap
         env:
           PROJECT: ${{ github.event.inputs.project || 'ant-design' }}
@@ -858,6 +1030,20 @@ jobs:
         run: |
           chmod +x bench/pm-bench-pcap.sh
           bash bench/pm-bench-pcap.sh
+      # Small artifact (KB scale) with just the per-capture +
+      # aggregated metrics — fast to download for diff analysis,
+      # avoids the 2GB pcap-corpus pull when we only need numbers.
+      - name: Upload pcap summaries
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: pm-bench-pcap-summaries
+          path: |
+            /tmp/pm-bench-pcap/*.json
+            /tmp/pm-bench-pcap/*.log
+            /tmp/pm-bench-pcap/*.iostat.txt
+            /tmp/pm-bench-pcap/dns.txt
+          retention-days: 7
       - name: Upload pcap artifact
         if: always()
         uses: actions/upload-artifact@v4
@@ -865,3 +1051,18 @@ jobs:
           name: pm-bench-pcap
           path: /tmp/pm-bench-pcap
           retention-days: 7
+      # Tiny summary-only artifact for quick comparison without
+      # re-downloading the multi-GB pcap blob. Includes the
+      # tshark-extracted JSON metrics + the pcap.log files (text,
+      # tiny) but no .pcap binaries.
+      - name: Upload pcap summaries (small)
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: pm-bench-pcap-summaries
+          path: |
+            /tmp/pm-bench-pcap/*.json
+            /tmp/pm-bench-pcap/*.log
+            /tmp/pm-bench-pcap/*.iostat.txt
+            /tmp/pm-bench-pcap/dns.txt
+          retention-days: 7
diff --git a/Cargo.lock b/Cargo.lock
index 3a136807b..c4b103915 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -61,6 +61,21 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "alloc-no-stdlib"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
+
+[[package]]
+name = "alloc-stdlib"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
+dependencies = [
+ "alloc-no-stdlib",
+]
+
 [[package]]
 name = "allocator-api2"
 version = "0.2.21"
@@ -573,6 +588,27 @@ dependencies = [
  "syn 2.0.106",
 ]
 
+[[package]]
+name = "brotli"
+version = "8.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+ "brotli-decompressor",
+]
+
+[[package]]
+name = "brotli-decompressor"
+version = "5.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+]
+
 [[package]]
 name = "browserslist-data"
 version = "0.1.4"
@@ -1110,6 +1146,7 @@ version = "0.4.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ef8a506ec4b81c460798f572caead636d57d3d7e940f998160f52bd254bf2d23"
 dependencies = [
+ "brotli",
  "compression-core",
  "flate2",
  "memchr",
@@ -1690,23 +1727,6 @@ dependencies = [
  "syn 2.0.106",
 ]
 
-[[package]]
-name = "ctor"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83cf0d42651b16c6dfe68685716d18480d18a9c39c62d76e8cf3eb6ed5d8bcbf"
-dependencies = [
- "ctor-proc-macro",
- "dtor",
- "link-section",
-]
-
-[[package]]
-name = "ctor-proc-macro"
-version = "0.0.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a949c44fcacbbbb7ada007dc7acb34603dd97cd47de5d054f2b6493ecebb483"
-
 [[package]]
 name = "cty"
 version = "0.2.2"
@@ -2246,21 +2266,6 @@ dependencies = [
  "dtoa",
 ]
 
-[[package]]
-name = "dtor"
-version = "0.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edf234dd1594d6dd434a8fb8cada51ddbbc593e40e4a01556a0b31c62da2775b"
-dependencies = [
- "dtor-proc-macro",
-]
-
-[[package]]
-name = "dtor-proc-macro"
-version = "0.0.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2647271c92754afcb174e758003cfd1cbf1e43e5a7853d7b1813e63e19e39a73"
-
 [[package]]
 name = "dunce"
 version = "1.0.5"
@@ -4824,12 +4829,6 @@ dependencies = [
  "syn 1.0.109",
 ]
 
-[[package]]
-name = "link-section"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b685d66585d646efe09fec763d796c291049c8b6bf84e04954bffc8748341f0d"
-
 [[package]]
 name = "linked-hash-map"
 version = "0.5.6"
@@ -4944,6 +4943,21 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ca88d725a0a943b096803bd34e73a4437208b6077654cc4ecb2947a5f91618d"
 
+[[package]]
+name = "manifest-bench"
+version = "0.0.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "futures",
+ "reqwest 0.12.24",
+ "rustls",
+ "rustls-native-certs",
+ "serde",
+ "serde_json",
+ "tokio",
+]
+
 [[package]]
 name = "markdown"
 version = "1.0.0"
@@ -5380,7 +5394,7 @@ checksum = "55740c4ae1d8696773c78fdafd5d0e5fe9bc9f1b071c7ba493ba5c413a9184f3"
 dependencies = [
  "anyhow",
  "bitflags 2.9.4",
- "ctor 0.2.9",
+ "ctor",
  "napi-derive",
  "napi-sys",
  "once_cell",
@@ -6358,6 +6372,22 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "84350ffee5cedfabf9bee3e8825721f651da8ff79d50fe7a37cf0ca015c428ee"
 
+[[package]]
+name = "preload-bench"
+version = "0.0.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "futures",
+ "reqwest 0.12.24",
+ "rustls",
+ "rustls-native-certs",
+ "serde",
+ "serde_json",
+ "simd-json",
+ "tokio",
+]
+
 [[package]]
 name = "preset_env_base"
 version = "7.0.0"
@@ -7208,9 +7238,9 @@ dependencies = [
 
 [[package]]
 name = "roaring"
-version = "0.11.4"
+version = "0.10.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1dedc5658c6ecb3bdb5ef5f3295bb9253f42dcf3fd1402c03f6b1f7659c3c4a9"
+checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -8115,9 +8145,9 @@ dependencies = [
 
 [[package]]
 name = "styled_components"
-version = "4.0.0"
+version = "3.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72418ea605a423c70ffa8590196c83b04b04636fd25aaceabe0fa7f1e15f66f0"
+checksum = "99aeadac58111060ad883c7e7a01917bcecc6572243c06d41315f200cbaa9240"
 dependencies = [
  "Inflector",
  "once_cell",
@@ -8134,9 +8164,9 @@ dependencies = [
 
 [[package]]
 name = "styled_jsx"
-version = "4.0.0"
+version = "3.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "98cc5352e19f02be3ba10fb9ecbcd0d72e9b2d9762965712f1cbe737d1f428ec"
+checksum = "c3917b257122e7cf3f46f95557af3178edaa9a3fd89fc1469768e05f01901e98"
 dependencies = [
  "anyhow",
  "lightningcss",
@@ -8155,9 +8185,9 @@ dependencies = [
  "swc_css_prefixer",
  "swc_css_visit",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_minifier",
- "swc_ecma_parser 39.0.2",
- "swc_ecma_transforms_base 42.0.0",
+ "swc_ecma_minifier 51.1.0",
+ "swc_ecma_parser 38.0.2",
+ "swc_ecma_transforms_base 41.0.1",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
  "swc_plugin_macro",
@@ -8170,6 +8200,57 @@ version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
 
+[[package]]
+name = "swc"
+version = "61.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb7d502b72d0b5e059cefe3a55825c43587a2e3c81025862694e52deecddc3de"
+dependencies = [
+ "anyhow",
+ "base64 0.22.1",
+ "bytes-str",
+ "dashmap 5.5.3",
+ "either",
+ "indexmap 2.13.0",
+ "jsonc-parser",
+ "once_cell",
+ "par-core",
+ "par-iter",
+ "parking_lot",
+ "regex",
+ "rustc-hash 2.1.1",
+ "serde",
+ "serde_json",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_compiler_base 54.0.0",
+ "swc_config",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_codegen 26.0.1",
+ "swc_ecma_ext_transforms",
+ "swc_ecma_loader",
+ "swc_ecma_minifier 51.1.0",
+ "swc_ecma_parser 38.0.2",
+ "swc_ecma_preset_env 52.0.0",
+ "swc_ecma_transforms 51.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_transforms_compat 47.0.0",
+ "swc_ecma_transforms_optimization 43.0.0",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "swc_error_reporters",
+ "swc_node_comments",
+ "swc_plugin_proxy",
+ "swc_plugin_runner",
+ "swc_sourcemap",
+ "swc_timer",
+ "swc_transform_common",
+ "swc_visit",
+ "tokio",
+ "tracing",
+ "url",
+]
+
 [[package]]
 name = "swc"
 version = "63.0.0"
@@ -8193,19 +8274,19 @@ dependencies = [
  "serde_json",
  "swc_atoms",
  "swc_common 21.0.1",
- "swc_compiler_base",
+ "swc_compiler_base 55.0.0",
  "swc_config",
  "swc_ecma_ast 23.0.0",
  "swc_ecma_codegen 26.0.1",
  "swc_ecma_ext_transforms",
  "swc_ecma_loader",
- "swc_ecma_minifier",
+ "swc_ecma_minifier 52.0.4",
  "swc_ecma_parser 39.0.2",
- "swc_ecma_preset_env",
- "swc_ecma_transforms",
+ "swc_ecma_preset_env 53.0.0",
+ "swc_ecma_transforms 52.0.0",
  "swc_ecma_transforms_base 42.0.0",
- "swc_ecma_transforms_compat",
- "swc_ecma_transforms_optimization",
+ "swc_ecma_transforms_compat 48.0.0",
+ "swc_ecma_transforms_optimization 44.0.0",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
  "swc_error_reporters",
@@ -8322,6 +8403,32 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "swc_compiler_base"
+version = "54.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "386c6121a98d7630ef5a07b79acee964c778568d61d3b76a188be17f19418a9c"
+dependencies = [
+ "anyhow",
+ "base64 0.22.1",
+ "bytes-str",
+ "once_cell",
+ "pathdiff",
+ "rustc-hash 2.1.1",
+ "serde",
+ "serde_json",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_config",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_codegen 26.0.1",
+ "swc_ecma_minifier 51.1.0",
+ "swc_ecma_parser 38.0.2",
+ "swc_ecma_visit 23.0.0",
+ "swc_sourcemap",
+ "swc_timer",
+]
+
 [[package]]
 name = "swc_compiler_base"
 version = "55.0.0"
@@ -8341,7 +8448,7 @@ dependencies = [
  "swc_config",
  "swc_ecma_ast 23.0.0",
  "swc_ecma_codegen 26.0.1",
- "swc_ecma_minifier",
+ "swc_ecma_minifier 52.0.4",
  "swc_ecma_parser 39.0.2",
  "swc_ecma_visit 23.0.0",
  "swc_sourcemap",
@@ -8398,6 +8505,38 @@ dependencies = [
  "vergen",
 ]
 
+[[package]]
+name = "swc_core"
+version = "63.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb9470306b0d532da617be037de878f64ec0f04cb364d920e8cee05d658d66de"
+dependencies = [
+ "par-core",
+ "swc 61.0.0",
+ "swc_allocator",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_codegen 26.0.1",
+ "swc_ecma_lints",
+ "swc_ecma_loader",
+ "swc_ecma_minifier 51.1.0",
+ "swc_ecma_parser 38.0.2",
+ "swc_ecma_preset_env 52.0.0",
+ "swc_ecma_quote_macros",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_transforms_optimization 43.0.0",
+ "swc_ecma_transforms_proposal 41.0.3",
+ "swc_ecma_transforms_react 45.0.0",
+ "swc_ecma_transforms_typescript 45.0.2",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "swc_plugin_proxy",
+ "swc_plugin_runner",
+ "testing",
+ "vergen",
+]
+
 [[package]]
 name = "swc_core"
 version = "65.0.3"
@@ -8405,28 +8544,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "898413141c6d3e1fed24ac3a4c57cc61ef98194df2a7957820d48ad158a318f6"
 dependencies = [
  "par-core",
- "swc",
+ "swc 63.0.0",
  "swc_allocator",
  "swc_atoms",
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
  "swc_ecma_codegen 26.0.1",
- "swc_ecma_lints",
  "swc_ecma_loader",
- "swc_ecma_minifier",
+ "swc_ecma_minifier 52.0.4",
  "swc_ecma_parser 39.0.2",
- "swc_ecma_preset_env",
- "swc_ecma_quote_macros",
+ "swc_ecma_preset_env 53.0.0",
  "swc_ecma_transforms_base 42.0.0",
- "swc_ecma_transforms_optimization",
- "swc_ecma_transforms_proposal",
- "swc_ecma_transforms_react",
- "swc_ecma_transforms_typescript",
+ "swc_ecma_transforms_optimization 44.0.0",
+ "swc_ecma_transforms_react 46.0.1",
+ "swc_ecma_transforms_typescript 46.0.1",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
  "swc_plugin_proxy",
  "swc_plugin_runner",
- "testing",
  "vergen",
 ]
 
@@ -8656,6 +8791,24 @@ dependencies = [
  "syn 2.0.106",
 ]
 
+[[package]]
+name = "swc_ecma_compat_bugfixes"
+version = "46.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22d4da77f7014b5efd416bb5208ab6e3d005ad5d532df8ced2904e50ca233d44"
+dependencies = [
+ "rustc-hash 2.1.1",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_compat_es2015 45.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "swc_trace_macro",
+ "tracing",
+]
+
 [[package]]
 name = "swc_ecma_compat_bugfixes"
 version = "47.0.0"
@@ -8666,7 +8819,7 @@ dependencies = [
  "swc_atoms",
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_compat_es2015",
+ "swc_ecma_compat_es2015 46.0.0",
  "swc_ecma_transforms_base 42.0.0",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
@@ -8674,6 +8827,18 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "swc_ecma_compat_common"
+version = "37.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d72d7d499e4bd4059ccfe432c1a52111a28fdd2b49b3882f18108fddfa3f6b4f"
+dependencies = [
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transformer 13.0.0",
+ "swc_ecma_utils 29.1.0",
+]
+
 [[package]]
 name = "swc_ecma_compat_common"
 version = "38.0.0"
@@ -8682,8 +8847,36 @@ checksum = "04b936fe418e2bd707298357f560d269c1bdedc86a2325f7163307fe140806bd"
 dependencies = [
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_transformer",
+ "swc_ecma_transformer 14.0.0",
+ "swc_ecma_utils 29.1.0",
+]
+
+[[package]]
+name = "swc_ecma_compat_es2015"
+version = "45.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5095800ee11e7c37df38a2e0fae2caa9d98b7801121d5f5ce70710ab65e21ec7"
+dependencies = [
+ "arrayvec",
+ "indexmap 2.13.0",
+ "is-macro",
+ "rustc-hash 2.1.1",
+ "serde",
+ "serde_derive",
+ "smallvec",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_config",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_compat_common 37.0.0",
+ "swc_ecma_transformer 13.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_transforms_classes 41.0.1",
+ "swc_ecma_transforms_macros",
  "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "swc_trace_macro",
+ "tracing",
 ]
 
 [[package]]
@@ -8703,10 +8896,10 @@ dependencies = [
  "swc_common 21.0.1",
  "swc_config",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_compat_common",
- "swc_ecma_transformer",
+ "swc_ecma_compat_common 38.0.0",
+ "swc_ecma_transformer 14.0.0",
  "swc_ecma_transforms_base 42.0.0",
- "swc_ecma_transforms_classes",
+ "swc_ecma_transforms_classes 42.0.0",
  "swc_ecma_transforms_macros",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
@@ -8714,6 +8907,19 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "swc_ecma_compat_es2016"
+version = "42.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1358f912b0b5bdb6509f64dada8dc9ac8dc9233175b1d033c571cd34ad0bbec"
+dependencies = [
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transformer 13.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_utils 29.1.0",
+ "tracing",
+]
+
 [[package]]
 name = "swc_ecma_compat_es2016"
 version = "43.0.0"
@@ -8721,7 +8927,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4402a84df86ebd3723decdd041743ba8e48c7903bfe7f5c7c712bac46642ac90"
 dependencies = [
  "swc_ecma_ast 23.0.0",
- "swc_ecma_transformer",
+ "swc_ecma_transformer 14.0.0",
  "swc_ecma_transforms_base 42.0.0",
  "swc_ecma_utils 29.1.0",
  "tracing",
@@ -8729,77 +8935,170 @@ dependencies = [
 
 [[package]]
 name = "swc_ecma_compat_es2017"
-version = "43.0.0"
+version = "42.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99d5f9f182e397fb69ea1f592770b67b94fe2bf201f3e6695cbeba66ccc1715a"
+checksum = "65a437c6a98cbfed7b355e2da721a52b1731537b6debf81cadccc9f196bbdbba"
 dependencies = [
  "serde",
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_transformer",
- "swc_ecma_transforms_base 42.0.0",
+ "swc_ecma_transformer 13.0.0",
+ "swc_ecma_transforms_base 41.0.1",
  "swc_ecma_utils 29.1.0",
  "tracing",
 ]
 
 [[package]]
-name = "swc_ecma_compat_es2018"
-version = "44.0.0"
+name = "swc_ecma_compat_es2017"
+version = "43.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "757acfefd8ececa3fd3491e7dcbf6da1b7b5fba602b70b8f2b36af30fac35eea"
+checksum = "99d5f9f182e397fb69ea1f592770b67b94fe2bf201f3e6695cbeba66ccc1715a"
 dependencies = [
  "serde",
+ "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_transformer",
+ "swc_ecma_transformer 14.0.0",
  "swc_ecma_transforms_base 42.0.0",
  "swc_ecma_utils 29.1.0",
  "tracing",
 ]
 
 [[package]]
-name = "swc_ecma_compat_es2019"
+name = "swc_ecma_compat_es2018"
 version = "43.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a0f39d1ebadade7d0a0a137cedec958cfd38fe99c5c69c762d879650b5e9848"
+checksum = "27ffcf499581d598250e4d93d45ef64fe81b16f83c3bcb8c21d27af2004e6f54"
 dependencies = [
- "swc_common 21.0.1",
+ "serde",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_transformer",
- "swc_ecma_transforms_base 42.0.0",
+ "swc_ecma_transformer 13.0.0",
+ "swc_ecma_transforms_base 41.0.1",
  "swc_ecma_utils 29.1.0",
  "tracing",
 ]
 
 [[package]]
-name = "swc_ecma_compat_es2020"
-version = "45.0.0"
+name = "swc_ecma_compat_es2018"
+version = "44.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "170d1ba05307a49e53a55f13128e991e6d250819ed2f75be267dbd9a4a14b00d"
+checksum = "757acfefd8ececa3fd3491e7dcbf6da1b7b5fba602b70b8f2b36af30fac35eea"
 dependencies = [
  "serde",
- "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_compat_es2022",
- "swc_ecma_transformer",
+ "swc_ecma_transformer 14.0.0",
  "swc_ecma_transforms_base 42.0.0",
  "swc_ecma_utils 29.1.0",
- "swc_ecma_visit 23.0.0",
  "tracing",
 ]
 
 [[package]]
-name = "swc_ecma_compat_es2021"
-version = "43.0.0"
+name = "swc_ecma_compat_es2019"
+version = "42.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfef1313a8410a2229aca737b65bb82c4aa45bdd6cedc0a0083688da0b960b20"
+checksum = "5125766d7ca9c4789eefdb68fd9d1bc9eba1119df21ad3d1fd7b0ac2808893d0"
 dependencies = [
+ "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_transformer",
- "swc_ecma_transforms_base 42.0.0",
+ "swc_ecma_transformer 13.0.0",
+ "swc_ecma_transforms_base 41.0.1",
  "swc_ecma_utils 29.1.0",
  "tracing",
 ]
 
+[[package]]
+name = "swc_ecma_compat_es2019"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a0f39d1ebadade7d0a0a137cedec958cfd38fe99c5c69c762d879650b5e9848"
+dependencies = [
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transformer 14.0.0",
+ "swc_ecma_transforms_base 42.0.0",
+ "swc_ecma_utils 29.1.0",
+ "tracing",
+]
+
+[[package]]
+name = "swc_ecma_compat_es2020"
+version = "44.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4eba7cf139b36cdf75daf9f1fc9096f566c8034d774ce040f09f0fccd4ffe02e"
+dependencies = [
+ "serde",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_compat_es2022 44.0.0",
+ "swc_ecma_transformer 13.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "tracing",
+]
+
+[[package]]
+name = "swc_ecma_compat_es2020"
+version = "45.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "170d1ba05307a49e53a55f13128e991e6d250819ed2f75be267dbd9a4a14b00d"
+dependencies = [
+ "serde",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_compat_es2022 45.0.0",
+ "swc_ecma_transformer 14.0.0",
+ "swc_ecma_transforms_base 42.0.0",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "tracing",
+]
+
+[[package]]
+name = "swc_ecma_compat_es2021"
+version = "42.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f64ee2ff23cdc2bb9749f3fb730bd4a95cc26cdea84b384b85574a1ab43f78af"
+dependencies = [
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transformer 13.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_utils 29.1.0",
+ "tracing",
+]
+
+[[package]]
+name = "swc_ecma_compat_es2021"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfef1313a8410a2229aca737b65bb82c4aa45bdd6cedc0a0083688da0b960b20"
+dependencies = [
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transformer 14.0.0",
+ "swc_ecma_transforms_base 42.0.0",
+ "swc_ecma_utils 29.1.0",
+ "tracing",
+]
+
+[[package]]
+name = "swc_ecma_compat_es2022"
+version = "44.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9e0499dc93f8eb04c88d5cf6aefc4ce34fdcca9dd69155d6882eb011339c9dd"
+dependencies = [
+ "rustc-hash 2.1.1",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transformer 13.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_transforms_classes 41.0.1",
+ "swc_ecma_transforms_macros",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "swc_trace_macro",
+ "tracing",
+]
+
 [[package]]
 name = "swc_ecma_compat_es2022"
 version = "45.0.0"
@@ -8810,9 +9109,9 @@ dependencies = [
  "swc_atoms",
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_transformer",
+ "swc_ecma_transformer 14.0.0",
  "swc_ecma_transforms_base 42.0.0",
- "swc_ecma_transforms_classes",
+ "swc_ecma_transforms_classes 42.0.0",
  "swc_ecma_transforms_macros",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
@@ -8899,6 +9198,42 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "swc_ecma_minifier"
+version = "51.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c25a685c2efe2f88ba359dde0a17382b28a206ea21b23bda612f97b2c423b2f2"
+dependencies = [
+ "arrayvec",
+ "bitflags 2.9.4",
+ "indexmap 2.13.0",
+ "num-bigint",
+ "num_cpus",
+ "once_cell",
+ "par-core",
+ "par-iter",
+ "parking_lot",
+ "phf",
+ "radix_fmt",
+ "rustc-hash 2.1.1",
+ "ryu-js",
+ "serde",
+ "serde_json",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_config",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_codegen 26.0.1",
+ "swc_ecma_hooks",
+ "swc_ecma_parser 38.0.2",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_transforms_optimization 43.0.0",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "swc_timer",
+ "tracing",
+]
+
 [[package]]
 name = "swc_ecma_minifier"
 version = "52.0.4"
@@ -8928,7 +9263,7 @@ dependencies = [
  "swc_ecma_hooks",
  "swc_ecma_parser 39.0.2",
  "swc_ecma_transforms_base 42.0.0",
- "swc_ecma_transforms_optimization",
+ "swc_ecma_transforms_optimization 44.0.0",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
  "swc_timer",
@@ -8955,6 +9290,27 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "swc_ecma_parser"
+version = "38.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7c251d44e048647b5335861d1585b3e95fa8bc74f6e7a40570b0ea95d27ba66"
+dependencies = [
+ "bitflags 2.9.4",
+ "either",
+ "num-bigint",
+ "phf",
+ "rustc-hash 2.1.1",
+ "seq-macro",
+ "serde",
+ "smartstring",
+ "stacker",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "tracing",
+]
+
 [[package]]
 name = "swc_ecma_parser"
 version = "39.0.2"
@@ -8969,13 +9325,37 @@ dependencies = [
  "seq-macro",
  "serde",
  "smartstring",
- "stacker",
  "swc_atoms",
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
  "tracing",
 ]
 
+[[package]]
+name = "swc_ecma_preset_env"
+version = "52.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5132d5890cddc4e47feb29c3388b4b0ca2251173c2c859c4b48b896794767c54"
+dependencies = [
+ "anyhow",
+ "foldhash 0.1.5",
+ "indexmap 2.13.0",
+ "once_cell",
+ "precomputed-map",
+ "preset_env_base",
+ "rustc-hash 2.1.1",
+ "serde",
+ "serde_json",
+ "string_enum",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transformer 13.0.0",
+ "swc_ecma_transforms 51.0.0",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+]
+
 [[package]]
 name = "swc_ecma_preset_env"
 version = "53.0.0"
@@ -8995,17 +9375,17 @@ dependencies = [
  "swc_atoms",
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_transformer",
- "swc_ecma_transforms",
+ "swc_ecma_transformer 14.0.0",
+ "swc_ecma_transforms 52.0.0",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
 ]
 
 [[package]]
 name = "swc_ecma_quote_macros"
-version = "39.0.0"
+version = "38.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54e4d28106d86d9c45d187687688d03bab7064bd8480d8bc783df9ff2a5d5a9a"
+checksum = "16896c184ff6915c85ee4bffd08db32e010b1c1a9628e6c4ee49a233653c20a7"
 dependencies = [
  "anyhow",
  "proc-macro2",
@@ -9014,7 +9394,7 @@ dependencies = [
  "swc_atoms",
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_parser 39.0.2",
+ "swc_ecma_parser 38.0.2",
  "swc_macros_common",
  "syn 2.0.106",
 ]
@@ -9067,6 +9447,25 @@ dependencies = [
  "swc_visit",
 ]
 
+[[package]]
+name = "swc_ecma_transformer"
+version = "13.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65c334a42d7d8252e5a80dbae85a1230144d29f7ed4aa7feada2a47167f9282e"
+dependencies = [
+ "rustc-hash 2.1.1",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_compat_regexp",
+ "swc_ecma_hooks",
+ "swc_ecma_regexp",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "tracing",
+]
+
 [[package]]
 name = "swc_ecma_transformer"
 version = "14.0.0"
@@ -9086,6 +9485,24 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "swc_ecma_transforms"
+version = "51.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b94503bbcd555d82cb33ff0e591e935bb925b79b254e94e706521f15d762b473"
+dependencies = [
+ "par-core",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_transforms_compat 47.0.0",
+ "swc_ecma_transforms_optimization 43.0.0",
+ "swc_ecma_transforms_proposal 41.0.3",
+ "swc_ecma_transforms_react 45.0.0",
+ "swc_ecma_transforms_typescript 45.0.2",
+ "swc_ecma_utils 29.1.0",
+]
+
 [[package]]
 name = "swc_ecma_transforms"
 version = "52.0.0"
@@ -9096,11 +9513,11 @@ dependencies = [
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
  "swc_ecma_transforms_base 42.0.0",
- "swc_ecma_transforms_compat",
- "swc_ecma_transforms_optimization",
- "swc_ecma_transforms_proposal",
- "swc_ecma_transforms_react",
- "swc_ecma_transforms_typescript",
+ "swc_ecma_transforms_compat 48.0.0",
+ "swc_ecma_transforms_optimization 44.0.0",
+ "swc_ecma_transforms_proposal 42.0.0",
+ "swc_ecma_transforms_react 46.0.1",
+ "swc_ecma_transforms_typescript 46.0.1",
  "swc_ecma_utils 29.1.0",
 ]
 
@@ -9126,6 +9543,28 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "swc_ecma_transforms_base"
+version = "41.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6be824dc326da1f7673d1e241790626e5f39f09e1d896175134143408eeaa081"
+dependencies = [
+ "better_scoped_tls",
+ "indexmap 2.13.0",
+ "once_cell",
+ "par-core",
+ "phf",
+ "rustc-hash 2.1.1",
+ "serde",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_parser 38.0.2",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "tracing",
+]
+
 [[package]]
 name = "swc_ecma_transforms_base"
 version = "42.0.0"
@@ -9148,6 +9587,19 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "swc_ecma_transforms_classes"
+version = "41.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ffae23e996fa1a7b20b77ff599aa0e4997a6eb21369e2e5e906c91b89fdffaa"
+dependencies = [
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+]
+
 [[package]]
 name = "swc_ecma_transforms_classes"
 version = "42.0.0"
@@ -9161,6 +9613,34 @@ dependencies = [
  "swc_ecma_visit 23.0.0",
 ]
 
+[[package]]
+name = "swc_ecma_transforms_compat"
+version = "47.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd54b7d82f0037f03367b4c9052a4ba2913e044df009fbeac388b2142c3ddd8a"
+dependencies = [
+ "indexmap 2.13.0",
+ "par-core",
+ "serde",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_compat_bugfixes 46.0.0",
+ "swc_ecma_compat_common 37.0.0",
+ "swc_ecma_compat_es2015 45.0.0",
+ "swc_ecma_compat_es2016 42.0.0",
+ "swc_ecma_compat_es2017 42.0.0",
+ "swc_ecma_compat_es2018 43.0.0",
+ "swc_ecma_compat_es2019 42.0.0",
+ "swc_ecma_compat_es2020 44.0.0",
+ "swc_ecma_compat_es2021 42.0.0",
+ "swc_ecma_compat_es2022 44.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "tracing",
+]
+
 [[package]]
 name = "swc_ecma_transforms_compat"
 version = "48.0.0"
@@ -9173,16 +9653,16 @@ dependencies = [
  "swc_atoms",
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
- "swc_ecma_compat_bugfixes",
- "swc_ecma_compat_common",
- "swc_ecma_compat_es2015",
- "swc_ecma_compat_es2016",
- "swc_ecma_compat_es2017",
- "swc_ecma_compat_es2018",
- "swc_ecma_compat_es2019",
- "swc_ecma_compat_es2020",
- "swc_ecma_compat_es2021",
- "swc_ecma_compat_es2022",
+ "swc_ecma_compat_bugfixes 47.0.0",
+ "swc_ecma_compat_common 38.0.0",
+ "swc_ecma_compat_es2015 46.0.0",
+ "swc_ecma_compat_es2016 43.0.0",
+ "swc_ecma_compat_es2017 43.0.0",
+ "swc_ecma_compat_es2018 44.0.0",
+ "swc_ecma_compat_es2019 43.0.0",
+ "swc_ecma_compat_es2020 45.0.0",
+ "swc_ecma_compat_es2021 43.0.0",
+ "swc_ecma_compat_es2022 45.0.0",
  "swc_ecma_transforms_base 42.0.0",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
@@ -9201,6 +9681,30 @@ dependencies = [
  "syn 2.0.106",
 ]
 
+[[package]]
+name = "swc_ecma_transforms_optimization"
+version = "43.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae12179c92f0690850bae8932dfac2b7f191b8bfc6bac80dd81abfe6b0c014aa"
+dependencies = [
+ "bytes-str",
+ "dashmap 5.5.3",
+ "indexmap 2.13.0",
+ "once_cell",
+ "par-core",
+ "petgraph 0.7.1",
+ "rustc-hash 2.1.1",
+ "serde_json",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_parser 38.0.2",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+ "tracing",
+]
+
 [[package]]
 name = "swc_ecma_transforms_optimization"
 version = "44.0.0"
@@ -9225,6 +9729,24 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "swc_ecma_transforms_proposal"
+version = "41.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02c49fd90ad7ef87cfacb9e15eb939bfecac83fe6638fdd4f94a31eff56b8276"
+dependencies = [
+ "either",
+ "rustc-hash 2.1.1",
+ "serde",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_transforms_classes 41.0.1",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+]
+
 [[package]]
 name = "swc_ecma_transforms_proposal"
 version = "42.0.0"
@@ -9238,7 +9760,32 @@ dependencies = [
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
  "swc_ecma_transforms_base 42.0.0",
- "swc_ecma_transforms_classes",
+ "swc_ecma_transforms_classes 42.0.0",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+]
+
+[[package]]
+name = "swc_ecma_transforms_react"
+version = "45.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b41b35e76a78a01650dcfb92889d37fdebbc3b86932a052259c2a99e7955e699"
+dependencies = [
+ "base64 0.22.1",
+ "bytes-str",
+ "indexmap 2.13.0",
+ "once_cell",
+ "rustc-hash 2.1.1",
+ "serde",
+ "sha1",
+ "string_enum",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_config",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_hooks",
+ "swc_ecma_parser 38.0.2",
+ "swc_ecma_transforms_base 41.0.1",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
 ]
@@ -9268,6 +9815,24 @@ dependencies = [
  "swc_ecma_visit 23.0.0",
 ]
 
+[[package]]
+name = "swc_ecma_transforms_typescript"
+version = "45.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d25026f22efe873b50c97b3aaca6bfd178f954031effd14394e7b3add1e95fb"
+dependencies = [
+ "bytes-str",
+ "rustc-hash 2.1.1",
+ "serde",
+ "swc_atoms",
+ "swc_common 21.0.1",
+ "swc_ecma_ast 23.0.0",
+ "swc_ecma_transforms_base 41.0.1",
+ "swc_ecma_transforms_react 45.0.0",
+ "swc_ecma_utils 29.1.0",
+ "swc_ecma_visit 23.0.0",
+]
+
 [[package]]
 name = "swc_ecma_transforms_typescript"
 version = "46.0.1"
@@ -9281,7 +9846,7 @@ dependencies = [
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
  "swc_ecma_transforms_base 42.0.0",
- "swc_ecma_transforms_react",
+ "swc_ecma_transforms_react 46.0.1",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
 ]
@@ -9357,9 +9922,9 @@ dependencies = [
 
 [[package]]
 name = "swc_emotion"
-version = "4.0.0"
+version = "3.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7077db4cd3dc9908a860c2e55b40ae6de8d6ce41d919867f2e58eb81b4019718"
+checksum = "11d8058e754b05eb672671b71974c4f79673b32bc2a2763706ba6970f8d2c86f"
 dependencies = [
  "base64 0.22.1",
  "byteorder",
@@ -9373,7 +9938,7 @@ dependencies = [
  "swc_common 21.0.1",
  "swc_ecma_ast 23.0.0",
  "swc_ecma_codegen 26.0.1",
- "swc_ecma_transforms",
+ "swc_ecma_transforms 51.0.0",
  "swc_ecma_utils 29.1.0",
  "swc_ecma_visit 23.0.0",
  "swc_sourcemap",
@@ -9506,9 +10071,9 @@ dependencies = [
 
 [[package]]
 name = "swc_relay"
-version = "4.0.0"
+version = "3.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b592abba81c24baad593d6130a162beaa50699b5c2ba791a5b0db7be2dff1db4"
+checksum = "d1a0e98d0497d914f2a0736be9be050af6c3c0fbb2a9d911dae40379fffcc7c8"
 dependencies = [
  "once_cell",
  "regex",
@@ -10465,7 +11030,6 @@ dependencies = [
  "auto-hash-map",
  "bincode 2.0.1",
  "concurrent-queue",
- "ctor 0.10.1",
  "dashmap 6.1.0",
  "either",
  "erased-serde",
@@ -10638,10 +11202,8 @@ dependencies = [
 name = "turbo-tasks-malloc"
 version = "0.1.0"
 dependencies = [
- "libc",
  "libmimalloc-sys",
  "mimalloc",
- "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -10741,6 +11303,7 @@ dependencies = [
  "either",
  "indexmap 2.13.0",
  "num-bigint",
+ "once_cell",
  "patricia_tree",
  "petgraph 0.8.3",
  "ref-cast",
@@ -10750,7 +11313,7 @@ dependencies = [
  "serde",
  "serde_json",
  "smallvec",
- "swc_core 65.0.3",
+ "swc_core 63.1.3",
  "swc_sourcemap",
  "tracing",
  "turbo-bincode",
@@ -10782,7 +11345,7 @@ dependencies = [
  "rustc-hash 2.1.1",
  "serde",
  "smallvec",
- "swc_core 65.0.3",
+ "swc_core 63.1.3",
  "tokio",
  "tracing",
  "turbo-bincode",
@@ -10849,6 +11412,7 @@ dependencies = [
  "itertools 0.10.5",
  "num-bigint",
  "num-traits",
+ "once_cell",
  "parking_lot",
  "petgraph 0.8.3",
  "phf",
@@ -10858,7 +11422,7 @@ dependencies = [
  "serde_json",
  "smallvec",
  "strsim 0.11.1",
- "swc_core 65.0.3",
+ "swc_core 63.1.3",
  "swc_sourcemap",
  "tokio",
  "tracing",
@@ -10873,7 +11437,6 @@ dependencies = [
  "turbopack-resolve",
  "turbopack-swc-utils",
  "url",
- "urlencoding",
 ]
 
 [[package]]
@@ -10901,7 +11464,7 @@ dependencies = [
  "serde_json",
  "styled_components",
  "styled_jsx",
- "swc_core 65.0.3",
+ "swc_core 63.1.3",
  "swc_emotion",
  "swc_plugin_backend_wasmtime",
  "swc_relay",
@@ -10951,8 +11514,9 @@ dependencies = [
  "bincode 2.0.1",
  "image",
  "mime",
- "phf",
+ "once_cell",
  "regex",
+ "rustc-hash 2.1.1",
  "serde",
  "turbo-bincode",
  "turbo-rcstr",
@@ -10996,6 +11560,7 @@ dependencies = [
  "js-sys",
  "napi",
  "napi-derive",
+ "once_cell",
  "owo-colors",
  "parking_lot",
  "regex",
@@ -11081,7 +11646,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "parking_lot",
- "swc_core 65.0.3",
+ "swc_core 63.1.3",
  "turbo-rcstr",
  "turbo-tasks",
  "turbopack-core",
@@ -11093,6 +11658,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "bincode 2.0.1",
+ "once_cell",
  "regex",
  "rustc-hash 2.1.1",
  "serde",
@@ -11121,7 +11687,6 @@ dependencies = [
  "rustc-hash 2.1.1",
  "serde",
  "serde_json",
- "smallvec",
  "tungstenite 0.21.0",
  "turbo-rcstr",
  "turbo-tasks-malloc",
@@ -11136,6 +11701,7 @@ dependencies = [
  "anyhow",
  "crossbeam-channel",
  "crossbeam-utils",
+ "once_cell",
  "parking_lot",
  "postcard",
  "rustc-hash 2.1.1",
diff --git a/Cargo.toml b/Cargo.toml
index ef4a4f926..4b2836c06 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,8 @@
 [workspace]
 resolver = "2"
 members  = [
+  "crates/manifest-bench",
+  "crates/preload-bench",
   "crates/pack-api",
   "crates/pack-cli",
   "crates/pack-core",
diff --git a/bench/pm-bench-pcap.sh b/bench/pm-bench-pcap.sh
index 7a0f7c819..7f37fc5db 100755
--- a/bench/pm-bench-pcap.sh
+++ b/bench/pm-bench-pcap.sh
@@ -139,6 +139,40 @@ fi
 
 run_pm_phases bun "$(command -v bun)" "$BUN_CACHE"
 
+# --- standalone bench captures (resolve-only baselines) ----------------
+# After all PM captures, regenerate a fresh package-lock.json via utoo
+# deps (untimed) so manifest-bench has a stable name list to consume.
+# Then pcap-capture each standalone bench at conc=96 — the same conc
+# utoo's mb_fetch_with_graph ran with — so the TCP signals are
+# directly comparable between the integrated path and the pure-HTTP
+# / pure-streaming-walk ceilings.
+cd "$PROJECT_DIR"
+rm -f package-lock.json bun.lock
+rm -rf "$UTOO_CACHE" node_modules
+echo "=== regenerating package-lock.json for standalone benches ==="
+utoo deps --registry="$REGISTRY" --cache-dir="$UTOO_CACHE" \
+  >/dev/null 2>&1 || echo "lock regen failed"
+
+if [ -f package-lock.json ] && [ -n "${MANIFEST_BENCH_BIN:-}" ] && [ -x "$MANIFEST_BENCH_BIN" ]; then
+  capture_one "manifest-bench-c96" \
+    "$MANIFEST_BENCH_BIN" \
+    --lockfile package-lock.json \
+    --registry "$REGISTRY" \
+    --concurrency 96 --reps 1 --http1-only
+else
+  echo "skip manifest-bench: bin missing or no lockfile"
+fi
+
+if [ -n "${PRELOAD_BENCH_BIN:-}" ] && [ -x "$PRELOAD_BENCH_BIN" ]; then
+  capture_one "preload-bench-c96" \
+    "$PRELOAD_BENCH_BIN" \
+    --package-json package.json \
+    --registry "$REGISTRY" \
+    --concurrency 96 --reps 1
+else
+  echo "skip preload-bench: bin missing"
+fi
+
 # --- post-capture analysis: tshark metrics per pcap ---------------------
 # Extract TCP-level stress signals to validate the "install greediness
 # starves download" hypothesis. All of these are pre-TLS so we don't need
@@ -333,3 +367,25 @@ fi
 
 echo "done. files:"
 ls -lh "$PCAP_DIR"
+
+# Print summary table to CI logs so we don't need to download the
+# 2 GB pcap artifact just to read the comparison numbers.
+echo
+echo "=== summary table ==="
+if command -v jq >/dev/null && [ -f "$PCAP_DIR/summary.json" ]; then
+  jq -r '
+    .captures
+    | (["name", "wall_s", "packets", "streams", "zwin", "retx", "dup_ack", "gap_p99_us", "gap_max_us"] | @tsv),
+      (.[] | [
+        .name,
+        (.wall_seconds | tostring),
+        (.packet_count | tostring),
+        (.distinct_streams | tostring),
+        (.zero_windows | tostring),
+        (.retransmits | tostring),
+        (.duplicate_acks | tostring),
+        (.gap_p99_us | tostring),
+        (.gap_max_us | tostring)
+      ] | @tsv)
+  ' "$PCAP_DIR/summary.json" | column -t
+fi
diff --git a/bench/pm-bench-phases.sh b/bench/pm-bench-phases.sh
index 226ffb751..b025ebc6f 100755
--- a/bench/pm-bench-phases.sh
+++ b/bench/pm-bench-phases.sh
@@ -22,6 +22,13 @@ UTOO_NEXT_CACHE="${UTOO_NEXT_CACHE:-/tmp/utoo-next-bench-cache}"
 BUN_CACHE="${BUN_CACHE:-/tmp/bun-bench-cache}"
 export BUN_INSTALL_CACHE_DIR="$BUN_CACHE"
 
+# utoo path defaults to fast_preload (combined-parse) so we have a
+# stable baseline to compare against. preload-bench is run as a
+# separate standalone tool by the CI workflow — its wall is the
+# self-contained-streaming reference, ruborist's utoo p1_resolve
+# wall is the integrated path. The gap between them is what
+# remains to close.
+
 # Drop optional baselines from the PM list when their binary is not wired
 # up — UTOO_NPM_BIN is set by CI's "Install utoo@npm" step, UTOO_NEXT_BIN
 # by the optional "Build next branch utoo" step. Local runs without them
diff --git a/crates/manifest-bench/Cargo.toml b/crates/manifest-bench/Cargo.toml
new file mode 100644
index 000000000..5b01e57c0
--- /dev/null
+++ b/crates/manifest-bench/Cargo.toml
@@ -0,0 +1,37 @@
+[package]
+name        = "manifest-bench"
+version     = "0.0.0"
+edition     = "2024"
+license     = "MIT"
+publish     = false
+description = "Standalone HTTP-only manifest fetch benchmark, isolating network behaviour from ruborist's resolver pipeline."
+
+[[bin]]
+name = "manifest-bench"
+path = "src/main.rs"
+
+# tombi: format.rules.table-keys-order.disabled = true
+[dependencies]
+anyhow      = { workspace = true }
+clap        = { workspace = true }
+futures     = "0.3"
+serde       = { version = "1", features = ["derive"] }
+serde_json  = { workspace = true }
+tokio       = { workspace = true, features = ["macros", "rt-multi-thread", "fs", "time"] }
+
+# Identical TLS / DNS choices to ruborist so we measure the *protocol*
+# characteristics of the same stack, not a different implementation.
+reqwest             = { version = "0.12", default-features = false, features = [
+  "brotli",
+  "gzip",
+  "http2",
+  "rustls-tls-native-roots-no-provider",
+  "socks"
+] }
+rustls              = { version = "0.23", default-features = false, features = [
+  "aws-lc-rs",
+  "logging",
+  "std",
+  "tls12"
+] }
+rustls-native-certs = "0.8"
diff --git a/crates/manifest-bench/src/main.rs b/crates/manifest-bench/src/main.rs
new file mode 100644
index 000000000..fa70f3fe4
--- /dev/null
+++ b/crates/manifest-bench/src/main.rs
@@ -0,0 +1,371 @@
+//! Standalone HTTP-only manifest fetch benchmark.
+//!
+//! Isolates the network behaviour of `reqwest + rustls + tokio` from
+//! ruborist's resolver pipeline (BFS, dedup, parse, lockfile, project
+//! cache). Reads a list of package names, builds manifest URLs, fires
+//! parallel `GET` requests, records `(start, end)` per request, and
+//! reports the same diag shape as ruborist's `Preload HTTP diag` line.
+//!
+//! Two input modes:
+//! - `--names-file <path>` — newline-separated package names
+//! - `--lockfile <path>` — a npm-style package-lock.json; we extract
+//!   the `packages.*` (v3) or `dependencies.*` (v2) keys
+//!
+//! Two registry modes:
+//! - `<registry>/<name>` — full manifest endpoint (default, npmjs)
+//! - `<registry>/<name>/latest` — single-version endpoint
+//!   (gated behind `--single-version`)
+//!
+//! Each request reads the body to completion (we only measure I/O, no
+//! parse). Output: same fields as preload's HTTP diag for direct
+//! comparison.
+
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::time::Instant;
+
+use anyhow::{Context, Result, anyhow};
+use clap::Parser;
+use futures::stream::{FuturesUnordered, StreamExt};
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "manifest-bench",
+    about = "HTTP-only manifest fetch bench (no parse, no resolver)"
+)]
+struct Args {
+    /// Registry base URL.
+    #[arg(long, default_value = "https://registry.npmjs.org")]
+    registry: String,
+
+    /// File of newline-separated package names. Mutually exclusive with `--lockfile`.
+    #[arg(long, conflicts_with = "lockfile")]
+    names_file: Option<PathBuf>,
+
+    /// `package-lock.json` file. Reads top-level `packages.*.name` keys.
+    #[arg(long)]
+    lockfile: Option<PathBuf>,
+
+    /// Maximum concurrent in-flight requests.
+    #[arg(long, default_value_t = 128)]
+    concurrency: usize,
+
+    /// Number of times to repeat the whole sweep (each iteration is a
+    /// fresh `reqwest::Client`, so connection pool / TLS handshake
+    /// costs are paid each time, matching `hyperfine` cold-start).
+    #[arg(long, default_value_t = 1)]
+    reps: usize,
+
+    /// Use the single-version endpoint `/<name>/latest` instead of the
+    /// full-manifest endpoint `/<name>`. Smaller bodies, more requests
+    /// served per byte.
+    #[arg(long)]
+    single_version: bool,
+
+    /// Override `Accept` header. Default mimics ruborist's preload
+    /// (`application/vnd.npm.install-v1+json` — abbreviated metadata).
+    #[arg(long)]
+    accept: Option<String>,
+
+    /// Override `User-Agent`. Default uses reqwest's default. Try
+    /// `Bun/1.x.x` to test whether Cloudflare differentiates by UA.
+    #[arg(long)]
+    user_agent: Option<String>,
+
+    /// Force HTTP/1.1 (no H2 negotiation). Default lets ALPN decide.
+    #[arg(long)]
+    http1_only: bool,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let args = Args::parse();
+
+    let names = load_names(&args)?;
+    if names.is_empty() {
+        return Err(anyhow!("no package names found in input"));
+    }
+
+    println!(
+        "manifest-bench: registry={} concurrency={} reps={} names={} h1_only={} single_version={} accept={} ua={}",
+        args.registry,
+        args.concurrency,
+        args.reps,
+        names.len(),
+        args.http1_only,
+        args.single_version,
+        args.accept.as_deref().unwrap_or("<default>"),
+        args.user_agent.as_deref().unwrap_or("<reqwest default>"),
+    );
+
+    for rep in 1..=args.reps {
+        run_once(&args, &names, rep).await?;
+    }
+
+    Ok(())
+}
+
+fn load_names(args: &Args) -> Result<Vec<String>> {
+    if let Some(path) = &args.names_file {
+        let raw = std::fs::read_to_string(path).with_context(|| format!("read {path:?}"))?;
+        return Ok(raw
+            .lines()
+            .map(str::trim)
+            .filter(|s| !s.is_empty() && !s.starts_with('#'))
+            .map(str::to_string)
+            .collect());
+    }
+
+    if let Some(path) = &args.lockfile {
+        let raw = std::fs::read_to_string(path).with_context(|| format!("read {path:?}"))?;
+        return extract_lockfile_names(&raw);
+    }
+
+    Err(anyhow!("provide --names-file or --lockfile"))
+}
+
+/// Pull unique package names from an npm v3 lockfile (`packages.*`)
+/// or an older v2 lockfile (`dependencies.*`).
+fn extract_lockfile_names(raw: &str) -> Result<Vec<String>> {
+    use std::collections::BTreeSet;
+
+    let v: serde_json::Value = serde_json::from_str(raw).context("parse lockfile JSON")?;
+    let mut names: BTreeSet<String> = BTreeSet::new();
+
+    if let Some(packages) = v.get("packages").and_then(|p| p.as_object()) {
+        for key in packages.keys() {
+            if key.is_empty() {
+                continue;
+            }
+            // npm v3 packages key like "node_modules/foo" or
+            // "node_modules/@scope/bar/node_modules/baz" — take the
+            // last path segment (or @scope/name pair).
+            let last = last_module_name(key);
+            if !last.is_empty() {
+                names.insert(last);
+            }
+        }
+    } else if let Some(deps) = v.get("dependencies").and_then(|d| d.as_object()) {
+        for key in deps.keys() {
+            names.insert(key.clone());
+        }
+    }
+
+    Ok(names.into_iter().collect())
+}
+
+fn last_module_name(key: &str) -> String {
+    let parts: Vec<&str> = key.split("node_modules/").collect();
+    let tail = parts.last().copied().unwrap_or("");
+    tail.to_string()
+}
+
+#[derive(Debug)]
+struct ReqResult {
+    start: Instant,
+    end: Instant,
+    bytes: usize,
+    status: u16,
+}
+
+async fn run_once(args: &Args, names: &[String], rep: usize) -> Result<()> {
+    // Build a fresh client per rep — matches hyperfine's cold-start
+    // assumption that each iteration pays the TLS handshake cost.
+    let client = build_client(args)?;
+    let registry = Arc::new(args.registry.trim_end_matches('/').to_string());
+    let accept = Arc::new(
+        args.accept
+            .clone()
+            .unwrap_or_else(|| "application/vnd.npm.install-v1+json".to_string()),
+    );
+
+    let single_version = args.single_version;
+    let concurrency = args.concurrency;
+
+    let phase_start = Instant::now();
+    let mut futs = FuturesUnordered::new();
+    let mut idx = 0usize;
+    let mut results: Vec<ReqResult> = Vec::with_capacity(names.len());
+
+    while idx < names.len() && futs.len() < concurrency {
+        spawn_one(
+            &client,
+            &registry,
+            &names[idx],
+            &accept,
+            single_version,
+            &mut futs,
+        );
+        idx += 1;
+    }
+
+    while let Some(res) = futs.next().await {
+        results.push(res);
+        if idx < names.len() {
+            spawn_one(
+                &client,
+                &registry,
+                &names[idx],
+                &accept,
+                single_version,
+                &mut futs,
+            );
+            idx += 1;
+        }
+    }
+    let phase_wall_ms = phase_start.elapsed().as_millis();
+
+    report(rep, &results, phase_wall_ms);
+    Ok(())
+}
+
+type Fut = std::pin::Pin<Box<dyn std::future::Future<Output = ReqResult> + Send>>;
+
+fn spawn_one(
+    client: &reqwest::Client,
+    registry: &Arc<String>,
+    name: &str,
+    accept: &Arc<String>,
+    single_version: bool,
+    futs: &mut FuturesUnordered<Fut>,
+) {
+    let url = if single_version {
+        format!("{registry}/{name}/latest")
+    } else {
+        format!("{registry}/{name}")
+    };
+    let client = client.clone();
+    let accept = Arc::clone(accept);
+    futs.push(Box::pin(async move {
+        let start = Instant::now();
+        let req = client.get(&url).header("accept", accept.as_str()).send();
+        let (bytes, status) = match req.await {
+            Ok(resp) => {
+                let status = resp.status().as_u16();
+                let body = resp.bytes().await.map(|b| b.len()).unwrap_or(0);
+                (body, status)
+            }
+            Err(_) => (0, 0),
+        };
+        let end = Instant::now();
+        ReqResult {
+            start,
+            end,
+            bytes,
+            status,
+        }
+    }));
+}
+
+fn build_client(args: &Args) -> Result<reqwest::Client> {
+    // Install aws-lc-rs as the default crypto provider (idempotent —
+    // first call wins). Matches ruborist's `service::http` setup.
+    let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
+
+    let mut roots = rustls::RootCertStore::empty();
+    let native = rustls_native_certs::load_native_certs();
+    for cert in native.certs {
+        let _ = roots.add(cert);
+    }
+
+    let tls_config = rustls::ClientConfig::builder_with_provider(std::sync::Arc::new(
+        rustls::crypto::aws_lc_rs::default_provider(),
+    ))
+    .with_safe_default_protocol_versions()
+    .map_err(|e| anyhow!("rustls protocol versions: {e}"))?
+    .with_root_certificates(roots)
+    .with_no_client_auth();
+
+    let mut builder = reqwest::Client::builder()
+        .use_preconfigured_tls(tls_config)
+        .no_proxy()
+        .pool_max_idle_per_host(256);
+    if args.http1_only {
+        builder = builder.http1_only();
+    }
+    if let Some(ua) = &args.user_agent {
+        builder = builder.user_agent(ua);
+    }
+    builder.build().context("build reqwest client")
+}
+
+fn report(rep: usize, results: &[ReqResult], wall_ms: u128) {
+    if results.is_empty() {
+        eprintln!("[rep {rep}] no results");
+        return;
+    }
+
+    let mut spans: Vec<(Instant, Instant)> = results.iter().map(|r| (r.start, r.end)).collect();
+    spans.sort_by_key(|(s, _)| *s);
+
+    let first_start = spans.first().unwrap().0;
+    let last_end = spans.iter().map(|(_, e)| *e).max().unwrap();
+    let win_wall = last_end.duration_since(first_start).as_millis();
+
+    let mut per_us: Vec<u128> = spans
+        .iter()
+        .map(|(s, e)| e.duration_since(*s).as_micros())
+        .collect();
+    per_us.sort_unstable();
+    let n = per_us.len();
+    let pct = |p: usize| per_us[(n * p).div_ceil(100).saturating_sub(1)];
+    let sum: u128 = per_us.iter().sum();
+    let p50 = per_us[n / 2];
+
+    let mut busy_us: u128 = 0;
+    let (mut cur_s, mut cur_e) = spans[0];
+    for &(s, e) in &spans[1..] {
+        if s <= cur_e {
+            if e > cur_e {
+                cur_e = e;
+            }
+        } else {
+            busy_us += cur_e.duration_since(cur_s).as_micros();
+            cur_s = s;
+            cur_e = e;
+        }
+    }
+    busy_us += cur_e.duration_since(cur_s).as_micros();
+
+    let bytes_total: usize = results.iter().map(|r| r.bytes).sum();
+    let ok = results.iter().filter(|r| r.status == 200).count();
+    let err = results.iter().filter(|r| r.status == 0).count();
+    let four_xx = results
+        .iter()
+        .filter(|r| (400..500).contains(&r.status))
+        .count();
+    let five_xx = results
+        .iter()
+        .filter(|r| (500..600).contains(&r.status))
+        .count();
+
+    let avg_conc = if busy_us > 0 {
+        sum as f64 / busy_us as f64
+    } else {
+        0.0
+    };
+
+    println!(
+        "[rep {rep}] n={} phase_wall={}ms win_wall={}ms busy={}ms ({:.0}%) sum={}ms avg_conc={:.1} p50={}ms p95={}ms p99={}ms max={}ms bytes={} 200={} 4xx={} 5xx={} err={}",
+        n,
+        wall_ms,
+        win_wall,
+        busy_us / 1000,
+        if win_wall > 0 {
+            100.0 * (busy_us as f64 / 1000.0) / win_wall as f64
+        } else {
+            0.0
+        },
+        sum / 1000,
+        avg_conc,
+        p50 / 1000,
+        pct(95) / 1000,
+        pct(99) / 1000,
+        per_us.last().unwrap() / 1000,
+        bytes_total,
+        ok,
+        four_xx,
+        five_xx,
+        err,
+    );
+}
diff --git a/crates/pm/src/helper/ruborist_context.rs b/crates/pm/src/helper/ruborist_context.rs
index b47def019..b78643a49 100644
--- a/crates/pm/src/helper/ruborist_context.rs
+++ b/crates/pm/src/helper/ruborist_context.rs
@@ -63,11 +63,21 @@ impl Context {
             receiver,
             supports_semver: get_supports_semver(),
             catalogs,
+            skip_preload: false,
         }
     }
 
     /// Create BuildDepsOptions with PipelineReceiver for concurrent download/clone.
     /// Returns (options, channels) where channels are used to start pipeline workers.
+    ///
+    /// Routes through the **legacy preload + BFS** path (`skip_preload=false`).
+    /// The optimal-hypothesis A/B sweep showed channel `mb_fetch_with_graph`'s
+    /// PackageResolved emit fires at full fetch rate, flooding the download
+    /// pipeline with concurrent tarball requests that fight extract workers
+    /// for blocking-pool slots — net p3_cold_install regression of ~0.7s vs
+    /// the legacy install path. Lockfile-only callers (`utoo deps`) still
+    /// get the channel `mb_fetch_with_graph` win because they go through
+    /// `Self::build_deps` which sets `skip_preload=true` independently.
     pub async fn pipeline_deps_options(
         cwd: PathBuf,
     ) -> (
@@ -82,8 +92,17 @@ impl Context {
     /// Resolve dependency tree with plain ProgressReceiver. Returns
     /// [`BuildDepsOutput`] (lock + project cache); the project cache is
     /// persisted in the background.
+    ///
+    /// Used by the lockfile-only path (`utoo deps`). With
+    /// `skip_preload=true`, ruborist's `service::api::build_deps`
+    /// internally routes through `mb_resolve::mb_fetch` — a
+    /// standalone manifest-bench-style preload that bypasses
+    /// `service::http` / `service::manifest` / `service::registry`
+    /// for the cold-cache lockfile-only workload. PM doesn't see
+    /// the dispatch.
     pub async fn build_deps(cwd: PathBuf) -> anyhow::Result<BuildDepsOutput> {
-        let options = Self::deps_options(cwd.clone(), ProgressReceiver).await;
+        let mut options = Self::deps_options(cwd.clone(), ProgressReceiver).await;
+        options.skip_preload = true;
         let output = utoo_ruborist::service::build_deps(options).await?;
         spawn_save_project_cache(cwd, output.project_cache.clone());
         Ok(output)
diff --git a/crates/pm/src/main.rs b/crates/pm/src/main.rs
index 3ec93fc1c..7bbe4fc67 100644
--- a/crates/pm/src/main.rs
+++ b/crates/pm/src/main.rs
@@ -344,9 +344,22 @@ enum Commands {
 fn main() {
     crate::util::sysconf::init();
 
+    // Floor at 4 worker threads even on 1-2 core CI runners. The
+    // install path multiplexes 4+ concurrent task families on the
+    // tokio multi-thread runtime: mb_fetch_with_graph main loop +
+    // graph_worker (`tokio::spawn`, CPU-heavy) + pipeline download
+    // workers + pipeline clone workers. With `num_cpus = 2` (default
+    // on GHA ubuntu-latest) and 2 worker threads, graph_worker can
+    // monopolize a worker for tens of ms at a time, starving the
+    // main loop's socket polling and producing TCP zwin events that
+    // stretch p0/p1 tail wall by 3-5s per affected run. Floor of 4
+    // gives the runtime headroom to keep the resolve hot path on
+    // its own worker even when the install pipeline saturates the
+    // others.
     let worker_threads = std::thread::available_parallelism()
         .map(|n| n.get())
-        .unwrap_or(4);
+        .unwrap_or(4)
+        .max(4);
 
     let result = tokio::runtime::Builder::new_multi_thread()
         .enable_all()
diff --git a/crates/pm/src/util/sysconf.rs b/crates/pm/src/util/sysconf.rs
index af77a7745..03fd4ef5b 100644
--- a/crates/pm/src/util/sysconf.rs
+++ b/crates/pm/src/util/sysconf.rs
@@ -6,8 +6,18 @@ pub fn init() {
         reset_sigpipe();
     }
 
-    // Windows default thread stack is 1MB, insufficient for libdeflater + tar
-    // + rayon work-stealing.
+    // Windows default thread stack is 1MB, insufficient for libdeflater
+    // + tar + rayon work-stealing. On Unix the default 8MB stack is fine.
+    //
+    // Rayon thread count: prior iteration forced \`max(num_cpus, 8)\` on
+    // the theory that resolve-path manifest parse benefits from extra
+    // pool slots. Bench A/B showed that on 2-core GHA runners, 8 rayon
+    // workers oversubscribe disk during install-path tarball extract
+    // (par_chunks(64) × 8 = 512 in-flight writes) — utoo p3 degrades
+    // sharply under CI contention while utoo-next (default num_cpus)
+    // stays stable. Reverted to default to keep install-path stable;
+    // resolve-path uses tokio's blocking pool (512 default slots),
+    // which doesn't share rayon's contention.
     #[cfg(target_os = "windows")]
     rayon::ThreadPoolBuilder::new()
         .stack_size(8 * 1024 * 1024)
diff --git a/crates/pm/src/util/user_config.rs b/crates/pm/src/util/user_config.rs
index 34ee45a34..f6924f5aa 100644
--- a/crates/pm/src/util/user_config.rs
+++ b/crates/pm/src/util/user_config.rs
@@ -132,9 +132,23 @@ pub fn get_install_scope() -> InstallScope {
     INSTALL_SCOPE.get().copied().unwrap_or_default()
 }
 
-// Manifest fetch concurrency configuration
+// Manifest fetch concurrency configuration. Default kept at 64.
+//
+// We tried 256 to match bun's observed parallel streams; on GHA the
+// fetch-breakdown instrumentation showed sum_parse exploded from
+// ~10ms (local Mac, network-bound) to 728s on first cold run with
+// manifest-bench's HTTP-only sweep on GHA (npmjs, h1) bottoms out
+// somewhere in the 96-128 band — which one wins varies with npmjs's
+// per-IP latency on each run (good runs picked 128, slow-network
+// runs flattened the curve and even regressed at 128 due to wider
+// p99 from queued requests). 96 is the conservative pick: it's at
+// or near best on every run we've measured, never the worst, and
+// leaves headroom for npmjs to throttle without compounding queue
+// time. Combined-parse fetch (671ac98e) made the spawn_blocking
+// pool no longer a contention bottleneck, but didn't change the
+// network-side variance — that's what caps the useful concurrency.
 static MANIFESTS_CONCURRENCY_LIMIT: LazyLock<ConfigValue<usize>> =
-    LazyLock::new(|| ConfigValue::new("manifests-concurrency-limit", 64));
+    LazyLock::new(|| ConfigValue::new("manifests-concurrency-limit", 96));
 
 pub fn set_manifests_concurrency_limit(value: Option<usize>) {
     MANIFESTS_CONCURRENCY_LIMIT.set(value);
diff --git a/crates/preload-bench/Cargo.toml b/crates/preload-bench/Cargo.toml
new file mode 100644
index 000000000..9d37d7769
--- /dev/null
+++ b/crates/preload-bench/Cargo.toml
@@ -0,0 +1,38 @@
+[package]
+name        = "preload-bench"
+version     = "0.0.0"
+edition     = "2024"
+license     = "MIT"
+publish     = false
+description = "Self-contained streaming-with-transitive-walk manifest preload bench. Reproduces manifest-bench's standalone fetch loop but discovers transitive deps from package.json instead of consuming a flat name list. No dependency on ruborist or any utoo internals."
+
+[[bin]]
+name = "preload-bench"
+path = "src/main.rs"
+
+# tombi: format.rules.table-keys-order.disabled = true
+[dependencies]
+anyhow      = { workspace = true }
+clap        = { workspace = true }
+futures     = "0.3"
+serde       = { version = "1", features = ["derive"] }
+serde_json  = { workspace = true }
+simd-json   = "0.17"
+tokio       = { workspace = true, features = ["macros", "rt-multi-thread", "fs", "time"] }
+
+# Same TLS/DNS choices as manifest-bench so the only delta vs that bench
+# is the transitive-walk loop.
+reqwest             = { version = "0.12", default-features = false, features = [
+  "brotli",
+  "gzip",
+  "http2",
+  "rustls-tls-native-roots-no-provider",
+  "socks"
+] }
+rustls              = { version = "0.23", default-features = false, features = [
+  "aws-lc-rs",
+  "logging",
+  "std",
+  "tls12"
+] }
+rustls-native-certs = "0.8"
diff --git a/crates/preload-bench/src/main.rs b/crates/preload-bench/src/main.rs
new file mode 100644
index 000000000..46f917d19
--- /dev/null
+++ b/crates/preload-bench/src/main.rs
@@ -0,0 +1,505 @@
+//! Self-contained streaming preload bench with transitive walking.
+//!
+//! Same HTTP setup as `manifest-bench` (own `reqwest::Client` built
+//! per rep with `aws-lc-rs` TLS, `pool_max_idle_per_host(256)`, no
+//! proxy, default DNS, no retry). The only delta vs `manifest-bench`
+//! is that this bench discovers names by walking transitive deps
+//! from a `package.json` root, instead of consuming a flat name
+//! list.
+//!
+//! Why a separate crate: ruborist's manifest-fetch path goes through
+//! several service layers (custom DNS resolver, retry, cache,
+//! single-flight gates, event receivers). Each layer might add
+//! overhead. This bench bypasses all of them — same shape as
+//! manifest-bench, just with a streaming `FuturesUnordered` that
+//! refills from a pending queue extended by parsed transitive deps.
+//!
+//! Reports both the standalone preload wall and a per-rep eff_parallel
+//! number so we can compare directly against manifest-bench's
+//! `phase_wall` + `avg_conc` for the same workload.
+//!
+//! Output (one line per rep, matching manifest-bench shape):
+//!   [rep N] preload_wall=Xms n=Y bytes=Z avg_conc=N.N parse_sum=Wms 200=A 4xx=B err=C
+
+use std::collections::{HashMap, HashSet, VecDeque};
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::time::Instant;
+
+use anyhow::{Context, Result, anyhow};
+use clap::Parser;
+use futures::stream::{FuturesUnordered, StreamExt};
+use serde::Deserialize;
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "preload-bench",
+    about = "Streaming preload bench with transitive walking (self-contained)"
+)]
+struct Args {
+    /// Registry base URL.
+    #[arg(long, default_value = "https://registry.npmjs.org")]
+    registry: String,
+
+    /// Path to a `package.json` to walk from. Reads `dependencies` +
+    /// `devDependencies` + `optionalDependencies` as the initial seed.
+    #[arg(long)]
+    package_json: PathBuf,
+
+    /// Maximum concurrent in-flight requests.
+    #[arg(long, default_value_t = 96)]
+    concurrency: usize,
+
+    /// Number of times to repeat the whole walk (fresh client per rep).
+    #[arg(long, default_value_t = 4)]
+    reps: usize,
+
+    /// Force HTTP/1.1.
+    #[arg(long, default_value_t = true)]
+    http1_only: bool,
+
+    /// Override `User-Agent`.
+    #[arg(long)]
+    user_agent: Option<String>,
+
+    /// Include `peerDependencies` when walking transitives. Off by
+    /// default (matches utoo's default).
+    #[arg(long)]
+    include_peer: bool,
+}
+
+#[derive(Deserialize)]
+struct PackageJson {
+    #[serde(default)]
+    dependencies: HashMap<String, String>,
+    #[serde(default, rename = "devDependencies")]
+    dev_dependencies: HashMap<String, String>,
+    #[serde(default, rename = "optionalDependencies")]
+    optional_dependencies: HashMap<String, String>,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let args = Args::parse();
+
+    let raw = std::fs::read_to_string(&args.package_json)
+        .with_context(|| format!("read {:?}", args.package_json))?;
+    let pkg: PackageJson = serde_json::from_str(&raw).context("parse package.json")?;
+    let initial: Vec<(String, String)> = pkg
+        .dependencies
+        .into_iter()
+        .chain(pkg.dev_dependencies)
+        .chain(pkg.optional_dependencies)
+        .filter(|(_, spec)| is_registry_spec(spec))
+        .collect();
+
+    println!(
+        "preload-bench: registry={} concurrency={} reps={} initial={} h1_only={} ua={} include_peer={}",
+        args.registry,
+        args.concurrency,
+        args.reps,
+        initial.len(),
+        args.http1_only,
+        args.user_agent.as_deref().unwrap_or("<reqwest default>"),
+        args.include_peer,
+    );
+
+    for rep in 1..=args.reps {
+        run_once(&args, &initial, rep).await?;
+    }
+
+    Ok(())
+}
+
+/// Quick registry-spec check (a `^...` / `~...` / `latest` / etc).
+/// Excludes `file:`, `link:`, `workspace:`, `git+`, `https://`, and
+/// `<user>/<repo>` shorthand. Same intent as ruborist's
+/// `SpecStr::is_registry_spec` but inlined to keep this crate
+/// dependency-free.
+fn is_registry_spec(spec: &str) -> bool {
+    if spec.is_empty() {
+        return true; // bare entries default to "*"
+    }
+    let lower = spec.to_ascii_lowercase();
+    if lower.starts_with("file:")
+        || lower.starts_with("link:")
+        || lower.starts_with("workspace:")
+        || lower.starts_with("portal:")
+        || lower.starts_with("git+")
+        || lower.starts_with("git://")
+        || lower.starts_with("github:")
+        || lower.starts_with("https://")
+        || lower.starts_with("http://")
+    {
+        return false;
+    }
+    // `<user>/<repo>` shorthand — exactly one '/' and no '@' prefix on
+    // first segment (rules out scoped names like `@scope/pkg`).
+    if let Some((head, tail)) = spec.split_once('/')
+        && !head.starts_with('@')
+        && !tail.is_empty()
+        && !tail.contains('/')
+    {
+        return false;
+    }
+    true
+}
+
+#[derive(Debug, Default)]
+struct RepStats {
+    n: usize,
+    bytes: usize,
+    parse_sum_us: u128,
+    busy_us: u128,
+    sum_us: u128,
+    ok_200: usize,
+    err_4xx: usize,
+    err_other: usize,
+}
+
+async fn run_once(args: &Args, initial: &[(String, String)], rep: usize) -> Result<()> {
+    let client = build_client(args)?;
+    let registry = Arc::new(args.registry.trim_end_matches('/').to_string());
+    let concurrency = args.concurrency;
+    let include_peer = args.include_peer;
+
+    let phase_start = Instant::now();
+    let mut stats = RepStats::default();
+
+    // (name, spec) dedup — same shape as ruborist's seen_specs but
+    // self-contained. We dedup the *spec* level because two specs on
+    // the same name might resolve to different versions.
+    let mut seen: HashSet<(String, String)> = HashSet::new();
+    let mut pending: VecDeque<(String, String)> = VecDeque::new();
+    for (name, spec) in initial {
+        if seen.insert((name.clone(), spec.clone())) {
+            pending.push_back((name.clone(), spec.clone()));
+        }
+    }
+
+    // Sibling-fetch dedup: when two specs for the same name are both
+    // pending, only one fetch is issued; subsequent specs settle from
+    // the cached body. Keyed by name. Maps name → cached parsed body
+    // (`Arc<Vec<u8>>`) once the first fetch lands.
+    let body_cache: Arc<std::sync::Mutex<HashMap<String, Arc<Vec<u8>>>>> =
+        Arc::new(std::sync::Mutex::new(HashMap::new()));
+    let mut in_flight_names: HashSet<String> = HashSet::new();
+    let mut deferred_by_name: HashMap<String, Vec<String>> = HashMap::new();
+
+    let mut futs: FuturesUnordered<Fut> = FuturesUnordered::new();
+
+    loop {
+        while futs.len() < concurrency {
+            let Some((name, spec)) = pending.pop_front() else {
+                break;
+            };
+
+            // If the body is already cached (sibling spec for an
+            // already-fetched name), spawn a settle-only future.
+            if let Some(raw) = body_cache.lock().unwrap().get(&name).cloned() {
+                let n = name.clone();
+                let s = spec.clone();
+                let fut: Fut = Box::pin(settle_only(n, s, raw, include_peer));
+                futs.push(fut);
+                continue;
+            }
+
+            // First time seeing this name: fetch + settle. Stash any
+            // sibling specs that arrive while in-flight.
+            if !in_flight_names.insert(name.clone()) {
+                deferred_by_name.entry(name).or_default().push(spec);
+                continue;
+            }
+
+            spawn_fetch(
+                &client,
+                &registry,
+                name,
+                spec,
+                Arc::clone(&body_cache),
+                include_peer,
+                &mut futs,
+            );
+        }
+
+        if futs.is_empty() {
+            break;
+        }
+
+        let Some(out) = futs.next().await else { break };
+        stats.n += 1;
+        stats.busy_us += out.busy_us;
+        stats.sum_us += out.sum_us;
+        stats.parse_sum_us += out.parse_us;
+        stats.bytes += out.bytes;
+        match out.status {
+            200 => stats.ok_200 += 1,
+            400..=499 => stats.err_4xx += 1,
+            _ => stats.err_other += 1,
+        }
+
+        // Drain sibling specs for this name now that body is cached.
+        if out.fetched
+            && let Some(siblings) = deferred_by_name.remove(&out.name)
+            && let Some(raw) = body_cache.lock().unwrap().get(&out.name).cloned()
+        {
+            for sibling_spec in siblings {
+                let n = out.name.clone();
+                let r = Arc::clone(&raw);
+                let fut: Fut = Box::pin(settle_only(n, sibling_spec, r, include_peer));
+                futs.push(fut);
+            }
+        }
+
+        // Extend pending with new transitives, dedup by (name, spec).
+        for (name, spec) in out.transitives {
+            if seen.insert((name.clone(), spec.clone())) {
+                pending.push_back((name, spec));
+            }
+        }
+    }
+
+    let phase_wall_ms = phase_start.elapsed().as_millis();
+    let parse_sum_ms = stats.parse_sum_us / 1000;
+    // avg_conc = sum_request_us / busy_window_us. busy_us isn't a true
+    // merged-interval here (we don't track per-req start/end timestamps
+    // for that), so use phase_wall as the denominator — slightly
+    // pessimistic but consistent.
+    let avg_conc = if phase_wall_ms > 0 {
+        stats.sum_us as f64 / 1000.0 / phase_wall_ms as f64
+    } else {
+        0.0
+    };
+
+    println!(
+        "[rep {rep}] preload_wall={phase_wall_ms}ms n={} bytes={} parse_sum={parse_sum_ms}ms avg_conc={avg_conc:.1} 200={} 4xx={} err={}",
+        stats.n, stats.bytes, stats.ok_200, stats.err_4xx, stats.err_other,
+    );
+    Ok(())
+}
+
+#[derive(Debug)]
+struct FetchOutcome {
+    name: String,
+    /// `(name, spec)` transitive deps unfolded by parsing the resolved
+    /// version's `dependencies` / `optionalDependencies` (and
+    /// optionally `peerDependencies`).
+    transitives: Vec<(String, String)>,
+    /// `true` if this future fetched the body (vs settle-only on a
+    /// cached body); only fetchers populate `body_cache` and trigger
+    /// sibling drain.
+    fetched: bool,
+    /// HTTP status code (200 / 4xx / 5xx / 0 on transport error).
+    status: u16,
+    /// Body byte count (0 on error).
+    bytes: usize,
+    /// Self-reported per-future busy_us — `end - start`. Approximate.
+    busy_us: u128,
+    /// Sum of all per-future durations summed by the main loop.
+    sum_us: u128,
+    /// Parse work done inside this future (for accounting).
+    parse_us: u128,
+}
+
+type Fut = std::pin::Pin<Box<dyn std::future::Future<Output = FetchOutcome> + Send>>;
+
+fn spawn_fetch(
+    client: &reqwest::Client,
+    registry: &Arc<String>,
+    name: String,
+    spec: String,
+    body_cache: Arc<std::sync::Mutex<HashMap<String, Arc<Vec<u8>>>>>,
+    include_peer: bool,
+    futs: &mut FuturesUnordered<Fut>,
+) {
+    let url = format!("{}/{}", registry, name);
+    let client = client.clone();
+    let fut: Fut = Box::pin(async move {
+        let start = Instant::now();
+        let req = client
+            .get(&url)
+            .header("accept", "application/vnd.npm.install-v1+json")
+            .send();
+        let (raw_bytes, status) = match req.await {
+            Ok(resp) => {
+                let status = resp.status().as_u16();
+                let body = resp.bytes().await.map(|b| b.to_vec()).unwrap_or_default();
+                (body, status)
+            }
+            Err(_) => (Vec::new(), 0),
+        };
+        let bytes = raw_bytes.len();
+
+        let (parse_us, transitives) = if status == 200 && !raw_bytes.is_empty() {
+            let raw_arc = Arc::new(raw_bytes);
+            body_cache
+                .lock()
+                .unwrap()
+                .insert(name.clone(), Arc::clone(&raw_arc));
+            // Move the Arc<Vec<u8>> into spawn_blocking; the parser
+            // mutates a clone, so the cached copy is unaffected.
+            let spec_for_parse = spec.clone();
+            let parse_start = Instant::now();
+            let result = tokio::task::spawn_blocking(move || {
+                parse_and_extract(&raw_arc, &spec_for_parse, include_peer)
+            })
+            .await
+            .ok()
+            .flatten()
+            .unwrap_or_default();
+            (parse_start.elapsed().as_micros(), result)
+        } else {
+            (0, Vec::new())
+        };
+
+        let end = Instant::now();
+        let busy_us = end.duration_since(start).as_micros();
+        FetchOutcome {
+            name,
+            transitives,
+            fetched: true,
+            status,
+            bytes,
+            busy_us,
+            sum_us: busy_us,
+            parse_us,
+        }
+    });
+    futs.push(fut);
+}
+
+async fn settle_only(
+    name: String,
+    spec: String,
+    raw: Arc<Vec<u8>>,
+    include_peer: bool,
+) -> FetchOutcome {
+    let start = Instant::now();
+    let parse_start = start;
+    let transitives = tokio::task::spawn_blocking(move || {
+        parse_and_extract(&raw, &spec, include_peer).unwrap_or_default()
+    })
+    .await
+    .unwrap_or_default();
+    let parse_us = parse_start.elapsed().as_micros();
+    let end = Instant::now();
+    let busy_us = end.duration_since(start).as_micros();
+    FetchOutcome {
+        name,
+        transitives,
+        fetched: false,
+        status: 200,
+        bytes: 0,
+        busy_us,
+        sum_us: busy_us,
+        parse_us,
+    }
+}
+
+/// Parse a manifest body, resolve `spec` against the version list,
+/// extract that version's transitive deps. Single
+/// `simd_json::to_borrowed_value` pass for the whole body — same as
+/// ruborist's combined-parse path, but inlined here so this crate
+/// has no ruborist dependency.
+fn parse_and_extract(
+    raw: &Arc<Vec<u8>>,
+    spec: &str,
+    include_peer: bool,
+) -> Option<Vec<(String, String)>> {
+    use simd_json::prelude::{ValueAsObject, ValueObjectAccess};
+
+    let mut buf = (**raw).clone();
+    let parsed = simd_json::to_borrowed_value(&mut buf).ok()?;
+
+    let dist_tags: HashMap<String, String> = parsed
+        .get("dist-tags")
+        .and_then(|v| HashMap::<String, String>::deserialize(v).ok())
+        .unwrap_or_default();
+    let versions_obj = parsed.get("versions").and_then(ValueAsObject::as_object)?;
+
+    // Resolve spec. Three cases: dist-tag match, exact-version key, or
+    // semver range (we approximate with "first version that satisfies"
+    // — preload-bench is a measurement tool, not a real resolver, so
+    // we tolerate slight selection differences vs ruborist for the
+    // purpose of timing the network path).
+    let resolved = if let Some(via_tag) = dist_tags.get(spec) {
+        via_tag.clone()
+    } else if versions_obj.contains_key(spec) {
+        spec.to_string()
+    } else if let Some(latest) = dist_tags.get("latest")
+        && spec_satisfied_by(spec, latest)
+    {
+        latest.clone()
+    } else {
+        // Last-resort: pick the lexicographically-largest version. Not
+        // semver-correct but bounded by the version set, and good
+        // enough for timing.
+        versions_obj.keys().max().map(|k| k.to_string())?
+    };
+
+    let version_obj = versions_obj.get(resolved.as_str())?;
+    let mut out: Vec<(String, String)> = Vec::new();
+
+    if let Some(deps) = version_obj.get("dependencies")
+        && let Ok(map) = HashMap::<String, String>::deserialize(deps)
+    {
+        out.extend(map.into_iter().filter(|(_, s)| is_registry_spec(s)));
+    }
+    if include_peer
+        && let Some(deps) = version_obj.get("peerDependencies")
+        && let Ok(map) = HashMap::<String, String>::deserialize(deps)
+    {
+        out.extend(map.into_iter().filter(|(_, s)| is_registry_spec(s)));
+    }
+    if let Some(deps) = version_obj.get("optionalDependencies")
+        && let Ok(map) = HashMap::<String, String>::deserialize(deps)
+    {
+        out.extend(map.into_iter().filter(|(_, s)| is_registry_spec(s)));
+    }
+    Some(out)
+}
+
+/// Crude semver-satisfies check: only handles `^X.Y.Z` and `~X.Y.Z`
+/// against an exact target. Sufficient for "does latest satisfy spec"
+/// in this measurement context — full semver is in the resolver, not
+/// the bench.
+fn spec_satisfied_by(spec: &str, target: &str) -> bool {
+    let s = spec.trim();
+    let body = s
+        .strip_prefix('^')
+        .or_else(|| s.strip_prefix('~'))
+        .unwrap_or(s);
+    target.starts_with(body) || target == body
+}
+
+fn build_client(args: &Args) -> Result<reqwest::Client> {
+    // Install aws-lc-rs as the default crypto provider (idempotent —
+    // first call wins). Same setup as manifest-bench.
+    let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
+
+    let mut roots = rustls::RootCertStore::empty();
+    let native = rustls_native_certs::load_native_certs();
+    for cert in native.certs {
+        let _ = roots.add(cert);
+    }
+
+    let tls_config = rustls::ClientConfig::builder_with_provider(std::sync::Arc::new(
+        rustls::crypto::aws_lc_rs::default_provider(),
+    ))
+    .with_safe_default_protocol_versions()
+    .map_err(|e| anyhow!("rustls protocol versions: {e}"))?
+    .with_root_certificates(roots)
+    .with_no_client_auth();
+
+    let mut builder = reqwest::Client::builder()
+        .use_preconfigured_tls(tls_config)
+        .no_proxy()
+        .pool_max_idle_per_host(256);
+    if args.http1_only {
+        builder = builder.http1_only();
+    }
+    if let Some(ua) = &args.user_agent {
+        builder = builder.user_agent(ua);
+    }
+    builder.build().context("build reqwest client")
+}
diff --git a/crates/ruborist/Cargo.toml b/crates/ruborist/Cargo.toml
index fdda5ea5e..57d96f187 100644
--- a/crates/ruborist/Cargo.toml
+++ b/crates/ruborist/Cargo.toml
@@ -52,9 +52,17 @@ workspace = true
 [dev-dependencies]
 tokio = { workspace = true, features = ["macros", "rt"] }
 
-# Native (non-macOS) targets: reqwest's default rustls + ring.
+# Native (non-macOS) targets: reqwest's default rustls + ring (used by
+# `service::http`'s global client). `mb_resolve` separately brings
+# `rustls` (with aws-lc-rs) and `rustls-native-certs` to build its
+# own client via `use_preconfigured_tls(aws_lc_rs)` — same TLS choice
+# as `manifest-bench` / `preload-bench`. The two providers coexist:
+# reqwest's internal client uses ring; `mb_resolve`'s explicit client
+# uses aws-lc-rs.
 [target.'cfg(not(any(target_arch = "wasm32", target_os = "macos")))'.dependencies]
-reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots"] }
+reqwest             = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots"] }
+rustls              = { version = "0.23", default-features = false, features = ["aws-lc-rs", "logging", "std", "tls12"] }
+rustls-native-certs = "0.8"
 
 # Native-only dependencies (not compiled for WASM)
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
diff --git a/crates/ruborist/src/model/manifest.rs b/crates/ruborist/src/model/manifest.rs
index 37e95deb9..3509e839d 100644
--- a/crates/ruborist/src/model/manifest.rs
+++ b/crates/ruborist/src/model/manifest.rs
@@ -163,6 +163,14 @@ pub async fn extract_core_version_off_runtime(
     full: Arc<FullManifest>,
     version: String,
 ) -> (String, Option<Arc<CoreVersionManifest>>) {
+    // Round 3 attempted to switch this to `tokio::task::spawn_blocking`
+    // for the same reasons as `parse_json_off_runtime`, but CI showed
+    // it regressed p1 by 0.5s on `preload_wall`. Mechanism: this
+    // function is called per (name, spec), so packages with multiple
+    // specs (e.g. peer-dep range overlaps) call it 2-5x per fetch.
+    // spawn_blocking's per-dispatch overhead (channel + thread wake)
+    // is significant for short CPU work; with the multiplier this
+    // outweighed rayon queue waits at conc=64. Keep on rayon::spawn.
     #[cfg(not(target_arch = "wasm32"))]
     {
         let (tx, rx) = tokio::sync::oneshot::channel();
diff --git a/crates/ruborist/src/resolver/builder.rs b/crates/ruborist/src/resolver/builder.rs
index b0bf2794c..97db89e79 100644
--- a/crates/ruborist/src/resolver/builder.rs
+++ b/crates/ruborist/src/resolver/builder.rs
@@ -18,13 +18,13 @@
 //! This separation allows for maximum parallelism during network I/O
 //! while keeping the graph building logic simple and deterministic.
 
-use petgraph::graph::NodeIndex;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::path::PathBuf;
 use std::sync::Arc;
 
 #[cfg(feature = "http-tarball")]
 use anyhow::Context as _;
+use petgraph::graph::NodeIndex;
 
 use crate::model::graph::{DependencyGraph, FindResult, PackageNode};
 use crate::model::manifest::NodeManifest;
@@ -32,7 +32,7 @@ use crate::model::node::EdgeType;
 use crate::model::package_json::PackageJson;
 use crate::resolver::preload::{PreloadConfig, preload_manifests};
 use crate::resolver::registry::{ResolveError, resolve_registry_dep};
-use crate::spec::{Catalogs, PackageSpec, Protocol};
+use crate::spec::{Catalogs, PackageSpec, Protocol, SpecStr};
 use crate::traits::progress::{BuildEvent, EventReceiver, NoopReceiver};
 use crate::traits::registry::{RegistryClient, ResolvedPackage};
 
@@ -180,10 +180,10 @@ struct NodeFlags {
 /// Only registry specs (e.g. `^4.17.0`) are collected. `catalog:` specs are
 /// resolved at edge creation time, so by the time this runs they are already
 /// concrete registry specs.
-fn gather_preload_deps(graph: &DependencyGraph, peer_deps: PeerDeps) -> Vec<(String, String)> {
-    use crate::spec::SpecStr;
-    use std::collections::HashSet;
-
+pub(crate) fn gather_preload_deps(
+    graph: &DependencyGraph,
+    peer_deps: PeerDeps,
+) -> Vec<(String, String)> {
     let mut deps = HashSet::new();
 
     let collect = |node_index: NodeIndex, deps: &mut HashSet<(String, String)>| {
@@ -651,6 +651,55 @@ pub async fn process_dependency<R: RegistryClient>(
     }
 }
 
+/// Sync variant of [`process_dependency`] for callers that already
+/// have a resolved registry manifest in hand (the
+/// `mb_fetch_with_graph` lockfile-only path populates one
+/// per fetch). Skips:
+///   * spec-routing (`Git` / `Http` / `Local` / `Workspace`) — only
+///     the `Registry` branch is handled. Non-registry edges are
+///     left unresolved for the caller to defer.
+///   * `resolve_registry_dep` (the resolved package is the
+///     parameter).
+///   * Override re-resolve (uses the original resolved package even
+///     if `graph.check_override` would re-route the spec). Override
+///     re-resolve requires another network round-trip; the
+///     lockfile-only fast path skips it intentionally — overridden
+///     specs that diverge from the original resolution will need a
+///     follow-up BFS sweep.
+///
+/// Returns the same [`ProcessResult`] shape as `process_dependency`
+/// so the caller can register newly-created nodes' edges with
+/// `edge_targets` for the streaming graph build.
+pub fn process_dependency_with_resolved(
+    graph: &mut DependencyGraph,
+    parent_idx: NodeIndex,
+    edge_info: &DependencyEdgeInfo,
+    resolved: &ResolvedPackage,
+    config: &BuildDepsConfig,
+) -> ProcessResult {
+    match graph.find_compatible_node(parent_idx, &edge_info.name, &edge_info.spec) {
+        FindResult::Reuse(existing_index) => {
+            graph.mark_dependency_resolved(edge_info.edge_id, existing_index);
+            update_node_type_from_edge(graph, parent_idx, existing_index, &edge_info.edge_type);
+            ProcessResult::Reused(existing_index)
+        }
+        FindResult::Conflict(conflict_parent) | FindResult::New(conflict_parent) => {
+            let new_node = create_package_node(&edge_info.name, resolved, conflict_parent, graph);
+            let new_index = graph.add_node(new_node);
+            graph.add_physical_edge(conflict_parent, new_index);
+            graph.mark_dependency_resolved(edge_info.edge_id, new_index);
+            update_node_type_from_edge(graph, parent_idx, new_index, &edge_info.edge_type);
+            add_edges_from(
+                graph,
+                new_index,
+                &*resolved.manifest,
+                &EdgeContext::new(config.peer_deps, DevDeps::Exclude),
+            );
+            ProcessResult::Created(new_index)
+        }
+    }
+}
+
 /// Build the complete dependency tree using BFS traversal.
 ///
 /// This is the main entry point for dependency resolution. It starts from
@@ -756,6 +805,7 @@ async fn run_preload_phase<R: RegistryClient, E: EventReceiver>(
         return;
     }
 
+    crate::util::FETCH_TIMINGS.reset();
     let start = tokio::time::Instant::now();
 
     let initial_deps = gather_preload_deps(graph, config.peer_deps);
@@ -794,10 +844,27 @@ async fn run_preload_phase<R: RegistryClient, E: EventReceiver>(
         failed: stats.failed_count,
     });
 
-    tracing::debug!("Preload phase: {:?}", start.elapsed());
+    let preload_elapsed = start.elapsed();
+    tracing::debug!("Preload phase: {:?}", preload_elapsed);
+    tracing::info!(
+        "p1-breakdown preload_wall={}ms | {}",
+        preload_elapsed.as_millis(),
+        crate::util::FETCH_TIMINGS.snapshot().summary_line(),
+    );
 }
 
 /// Run the BFS traversal phase to build the dependency tree.
+///
+/// Each level does a parallel prefetch of all unresolved registry specs
+/// before the sequential `process_dependency` walk.
+///
+/// When `skip_preload=true` (lockfile-only path), the caller is
+/// expected to have already populated `registry.cache()` via
+/// [`super::fast_preload::fast_preload`], so this BFS sees only
+/// cache hits. When `skip_preload=false` (install paths), the
+/// receiver-driven [`super::preload::preload_manifests`] runs ahead
+/// of this phase and feeds `BuildEvent::PackageResolved` to the
+/// pipeline.
 async fn run_bfs_phase<R: RegistryClient, E: EventReceiver>(
     graph: &mut DependencyGraph,
     registry: &R,
@@ -805,13 +872,24 @@ async fn run_bfs_phase<R: RegistryClient, E: EventReceiver>(
     receiver: &E,
 ) -> Result<(), ResolveError<R::Error>> {
     let start = tokio::time::Instant::now();
-
     let mut current_level = vec![graph.root_index];
 
+    // Per-stage instrumentation. The full BFS wall is `bfs_elapsed`
+    // below; these split it into work types so we can see whether
+    // graph traversal, edge resolution, or post-resolve event
+    // dispatch dominates.
+    let mut total_collect_us: u64 = 0;
+    let mut total_resolve_us: u64 = 0;
+    let mut total_event_us: u64 = 0;
+    let mut total_edges: u64 = 0;
+    let mut total_levels: u64 = 0;
+
     while !current_level.is_empty() {
+        total_levels += 1;
         receiver.on_event(BuildEvent::LevelStart {
             node_count: current_level.len(),
         });
+
         let mut next_level = Vec::new();
 
         for node_index in current_level {
@@ -828,7 +906,10 @@ async fn run_bfs_phase<R: RegistryClient, E: EventReceiver>(
             }
 
             // Process unresolved dependencies
+            let collect_start = std::time::Instant::now();
             let unresolved = collect_unresolved_edges(graph, node_index);
+            total_collect_us += collect_start.elapsed().as_micros() as u64;
+            total_edges += unresolved.len() as u64;
             receiver.on_event(BuildEvent::DependencyCount {
                 count: unresolved.len(),
             });
@@ -837,6 +918,7 @@ async fn run_bfs_phase<R: RegistryClient, E: EventReceiver>(
                 receiver.on_event(BuildEvent::Resolving {
                     name: &edge_info.name,
                 });
+                let resolve_start = std::time::Instant::now();
                 let result = process_dependency(graph, registry, node_index, &edge_info, config)
                     .await
                     .map_err(|inner| {
@@ -847,7 +929,10 @@ async fn run_bfs_phase<R: RegistryClient, E: EventReceiver>(
                             source: Box::new(inner),
                         }
                     });
-                match result? {
+                total_resolve_us += resolve_start.elapsed().as_micros() as u64;
+                let event_start = std::time::Instant::now();
+                let processed = result?;
+                match processed {
                     ProcessResult::Created(idx) => {
                         // Extract node info for events
                         if let Some(node) = graph.get_node(idx) {
@@ -887,6 +972,7 @@ async fn run_bfs_phase<R: RegistryClient, E: EventReceiver>(
                         });
                     }
                 }
+                total_event_us += event_start.elapsed().as_micros() as u64;
             }
         }
 
@@ -896,7 +982,18 @@ async fn run_bfs_phase<R: RegistryClient, E: EventReceiver>(
         current_level = next_level;
     }
 
-    tracing::debug!("Build phase: {:?}", start.elapsed());
+    let bfs_elapsed = start.elapsed();
+    tracing::debug!("Build phase: {:?}", bfs_elapsed);
+    tracing::info!(
+        "p1-breakdown bfs_wall={}ms levels={} edges={} collect={}us resolve={}us event={}us | {}",
+        bfs_elapsed.as_millis(),
+        total_levels,
+        total_edges,
+        total_collect_us,
+        total_resolve_us,
+        total_event_us,
+        crate::util::FETCH_TIMINGS.snapshot().summary_line(),
+    );
     Ok(())
 }
 
diff --git a/crates/ruborist/src/resolver/fast_preload.rs b/crates/ruborist/src/resolver/fast_preload.rs
new file mode 100644
index 000000000..d049321d8
--- /dev/null
+++ b/crates/ruborist/src/resolver/fast_preload.rs
@@ -0,0 +1,362 @@
+//! Lean parallel manifest fetcher modeled on `manifest-bench`.
+//!
+//! Bypasses [`crate::service::registry::UnifiedRegistry`] — and therefore
+//! its `OnceMap` gates, [`crate::service::store::ManifestStore`] writes,
+//! and `EventReceiver` event dispatch — to drive a flat
+//! `FuturesUnordered` over [`crate::service::manifest::fetch_full_manifest`]
+//! plus a fused-into-fetch primary settle. The warm
+//! [`crate::service::cache::MemoryCache`] it leaves behind makes the
+//! subsequent BFS phase a pure cache-hit walk: no network, no rayon
+//! re-parse hop on `extract_core_version`.
+//!
+//! Intended for the lockfile-only path (`utoo deps`) which has no
+//! pipeline consumer for `BuildEvent::PackageResolved` — install paths
+//! still go through [`super::preload::preload_manifests`] so the
+//! pipeline keeps its early-start signal.
+//!
+//! ## Why settle is fused into the fetch task
+//!
+//! A "settle" turns a freshly-fetched `FullManifest` plus a spec into a
+//! `CoreVersionManifest` for one version, via `simd_json::to_borrowed_value`
+//! over the manifest's raw bytes. That parse is 5–10ms per spec on a
+//! 100KB body.
+//!
+//! v1 ran settle inline on the tokio runtime worker — that starved
+//! sibling fetches' I/O drive (CI showed `avg_request` +3ms,
+//! `avg_parse` 5→11ms). v2 dispatched settle to rayon via a separate
+//! `FuturesUnordered` future, which fixed the runtime starvation but
+//! introduced a dispatch RTT: fetch lands → rayon settle queued → settle
+//! pops → `pending` finally gets transitive deps. That round-trip held
+//! the wave-shaped transitive walk back, capping `eff_parallel` at ~44
+//! against a 96 cap.
+//!
+//! v3 (this) folds the primary settle into the fetch task itself via
+//! `tokio::task::spawn_blocking`. The fetch task awaits both the
+//! network round-trip and the version-extract on the same blocking
+//! pool slot, then returns with the resolved `CoreVersionManifest`
+//! attached. The main loop pulls a single `Fetched` event and
+//! immediately extends `pending` — no separate settle pop. Sibling
+//! specs (rare; same package, different range) still go through a
+//! `Settled` future to keep the primary path lean.
+
+use std::collections::{HashMap, HashSet, VecDeque};
+use std::pin::Pin;
+use std::sync::Arc;
+
+use futures::future::BoxFuture;
+use futures::stream::{FuturesUnordered, StreamExt};
+
+use crate::model::manifest::{CoreVersionManifest, FullManifest, extract_core_version_off_runtime};
+use crate::model::node::PeerDeps;
+use crate::resolver::preload::{Dep, PreloadConfig};
+use crate::resolver::version::resolve_target_version;
+use crate::service::{
+    FetchManifestOptions, FetchWithSettleResult, MemoryCache, MetadataFormat,
+    fetch_full_manifest_with_settle,
+};
+use crate::spec::SpecStr;
+use crate::util::FETCH_TIMINGS;
+
+/// Statistics from the lean fetch loop. Mirrors `PreloadStats` shape so
+/// the bench-grep regex stays the same.
+#[derive(Debug, Default)]
+pub struct FastPreloadStats {
+    pub success_count: usize,
+    pub failed_count: usize,
+    pub fetched_names: usize,
+    pub min_request_ms: u64,
+    pub max_request_ms: u64,
+    pub total_request_ms: u64,
+}
+
+/// One fetch's primary settle outcome — the resolved version + parsed
+/// `CoreVersionManifest` for the spec the fetch was originally issued
+/// for. `None` means the spec didn't match any version (caller treats
+/// as soft skip).
+type PrimarySettle = Option<(String, Arc<CoreVersionManifest>)>;
+
+/// Outcome of a fetch task. Owning `Arc<FullManifest>` (rather than
+/// `FetchManifestResult` by-value) means the fetch task can `Arc::clone`
+/// once for the primary settle, then pass ownership along — no full
+/// `FullManifest` clone (which would copy the 200-entry `time`
+/// HashMap + the `versions` `Vec<String>` per fetch).
+enum FetchOutcome {
+    Ok(Arc<FullManifest>),
+    NotModified,
+    Err,
+}
+
+/// Output of one in-flight future. The main loop merges fetch and
+/// sibling-settle completions through a single `FuturesUnordered`.
+enum FastEvent {
+    Fetched {
+        name: String,
+        primary_spec: String,
+        outcome: FetchOutcome,
+        primary_settle: PrimarySettle,
+        elapsed_ms: u64,
+    },
+    Settled {
+        new_deps: Vec<Dep>,
+    },
+}
+
+type FastFut = Pin<Box<dyn std::future::Future<Output = FastEvent> + Send>>;
+
+/// Collect dependencies from any deps map, filtering out non-registry specs.
+fn collect_deps(map: Option<&HashMap<String, String>>) -> Vec<Dep> {
+    map.into_iter()
+        .flatten()
+        .filter(|(_, spec)| spec.is_registry_spec())
+        .map(|(name, spec)| (name.clone(), spec.clone()))
+        .collect()
+}
+
+/// Extract transitive dependencies from a resolved manifest.
+/// devDependencies are omitted (only the root installs devDeps).
+fn extract_transitive_deps(manifest: &CoreVersionManifest, peer_deps: PeerDeps) -> Vec<Dep> {
+    let mut deps = Vec::new();
+    deps.extend(collect_deps(manifest.dependencies.as_ref()));
+    if peer_deps == PeerDeps::Include {
+        deps.extend(collect_deps(manifest.peer_dependencies.as_ref()));
+    }
+    deps.extend(collect_deps(manifest.optional_dependencies.as_ref()));
+    deps
+}
+
+/// Off-runtime settle for a `(name, spec)` whose `FullManifest` is
+/// already cached. Used for sibling specs — multiple ranges on the
+/// same package — that arrive after the primary fetch has landed.
+fn settle_future(
+    name: String,
+    spec: String,
+    full: Arc<FullManifest>,
+    cache: MemoryCache,
+    peer_deps: PeerDeps,
+) -> BoxFuture<'static, FastEvent> {
+    Box::pin(async move {
+        let resolved_version = match resolve_target_version((&*full).into(), &spec) {
+            Ok(v) => v,
+            Err(_) => return FastEvent::Settled { new_deps: vec![] },
+        };
+        if let Some(cached) = cache.get_version_manifest(&name, &resolved_version) {
+            cache.set_version_manifest(name.clone(), spec.clone(), Arc::clone(&cached));
+            return FastEvent::Settled {
+                new_deps: extract_transitive_deps(&cached, peer_deps),
+            };
+        }
+        let (resolved_version, core) =
+            extract_core_version_off_runtime(Arc::clone(&full), resolved_version).await;
+        let new_deps = match core {
+            Some(core_arc) => {
+                cache.set_version_manifest(name.clone(), spec.clone(), Arc::clone(&core_arc));
+                cache.set_version_manifest(name, resolved_version, Arc::clone(&core_arc));
+                extract_transitive_deps(&core_arc, peer_deps)
+            }
+            None => Vec::new(),
+        };
+        FastEvent::Settled { new_deps }
+    })
+}
+
+/// Manifest-bench-style flat parallel fetch of all transitively-reachable
+/// registry manifests. Populates `cache` with both `full_manifests` and
+/// `version_manifests` slots so the subsequent BFS does no network and no
+/// re-parse.
+///
+/// `initial_deps` should already be the union of root+workspace
+/// registry edges, with non-registry specs filtered out.
+pub async fn fast_preload(
+    initial_deps: Vec<Dep>,
+    registry_url: &str,
+    cache: &MemoryCache,
+    config: &PreloadConfig,
+) -> FastPreloadStats {
+    let mut stats = FastPreloadStats::default();
+    let mut pending: VecDeque<Dep> = VecDeque::from(initial_deps);
+    // Specs we've already enqueued. Prevents duplicate settles from
+    // re-walking the same transitive subtree.
+    let mut seen_specs: HashSet<(String, String)> = HashSet::new();
+    // Names whose full manifest is in flight or already cached.
+    let mut fetched_names: HashSet<String> = HashSet::new();
+    // Sibling specs that arrived while their package's full manifest
+    // was still in flight. The fetch's completion handler dispatches
+    // settles for them, then drains this bucket.
+    let mut deferred_by_name: HashMap<String, Vec<String>> = HashMap::new();
+    let mut futs: FuturesUnordered<FastFut> = FuturesUnordered::new();
+    let concurrency = config.concurrency;
+    let peer_deps = config.peer_deps;
+
+    loop {
+        while futs.len() < concurrency {
+            let Some((name, spec)) = pending.pop_front() else {
+                break;
+            };
+            if !seen_specs.insert((name.clone(), spec.clone())) {
+                continue;
+            }
+
+            // Hot path: a sibling spec for this name has already
+            // returned, so the full manifest is cached. Settle on
+            // rayon (off-runtime) — keeps the primary fetch path
+            // (next branch) clean.
+            if let Some(full) = cache.get_full_manifest(&name) {
+                futs.push(Box::pin(settle_future(
+                    name,
+                    spec,
+                    full,
+                    cache.clone(),
+                    peer_deps,
+                )));
+                continue;
+            }
+
+            // A fetch for this name is already in flight: stash this
+            // sibling spec; the fetch's completion handler will
+            // dispatch a settle for it.
+            if !fetched_names.insert(name.clone()) {
+                deferred_by_name.entry(name).or_default().push(spec);
+                continue;
+            }
+
+            let registry_url = registry_url.to_string();
+            let primary_spec = spec.clone();
+            let n = name.clone();
+            futs.push(Box::pin(async move {
+                let start = tokio::time::Instant::now();
+                // Combined fetch + envelope parse + primary settle in
+                // a single `to_borrowed_value` pass — replaces the old
+                // pattern of typed-serde envelope parse followed by a
+                // separate `to_borrowed_value` reparse for version
+                // extraction. Halves simd_json work per fetch.
+                let result = fetch_full_manifest_with_settle(
+                    FetchManifestOptions {
+                        registry_url: &registry_url,
+                        name: &n,
+                        format: MetadataFormat::Abbreviated,
+                        etag: None,
+                    },
+                    &primary_spec,
+                )
+                .await;
+                let elapsed_ms = start.elapsed().as_millis() as u64;
+                let (outcome, primary_settle) = match result {
+                    Ok(FetchWithSettleResult::Ok(payload)) => {
+                        let full_arc = Arc::new(payload.manifest);
+                        (FetchOutcome::Ok(full_arc), payload.primary_settle)
+                    }
+                    Ok(FetchWithSettleResult::NotModified) => (FetchOutcome::NotModified, None),
+                    Err(e) => {
+                        tracing::debug!("fast_preload failed for {}: {}", n, e);
+                        (FetchOutcome::Err, None)
+                    }
+                };
+                FastEvent::Fetched {
+                    name,
+                    primary_spec,
+                    outcome,
+                    primary_settle,
+                    elapsed_ms,
+                }
+            }));
+        }
+
+        if futs.is_empty() {
+            break;
+        }
+
+        let Some(event) = futs.next().await else {
+            break;
+        };
+
+        match event {
+            FastEvent::Fetched {
+                name,
+                primary_spec,
+                outcome,
+                primary_settle,
+                elapsed_ms,
+            } => {
+                if stats.success_count == 0 && stats.failed_count == 0 {
+                    stats.min_request_ms = elapsed_ms;
+                    stats.max_request_ms = elapsed_ms;
+                } else {
+                    stats.min_request_ms = stats.min_request_ms.min(elapsed_ms);
+                    stats.max_request_ms = stats.max_request_ms.max(elapsed_ms);
+                }
+                stats.total_request_ms += elapsed_ms;
+
+                match outcome {
+                    FetchOutcome::Ok(full_arc) => {
+                        stats.success_count += 1;
+                        stats.fetched_names += 1;
+                        cache.set_full_manifest(name.clone(), Arc::clone(&full_arc));
+
+                        // Apply the primary settle (already done inside
+                        // the fetch task via spawn_blocking) — populate
+                        // both `(name, primary_spec)` and
+                        // `(name, resolved_version)` cache slots so BFS
+                        // hits the early-return at registry.rs:347 on
+                        // its first probe, then extend `pending` with
+                        // the spec's transitive deps.
+                        if let Some((resolved_version, core_arc)) = primary_settle {
+                            cache.set_version_manifest(
+                                name.clone(),
+                                primary_spec,
+                                Arc::clone(&core_arc),
+                            );
+                            cache.set_version_manifest(
+                                name.clone(),
+                                resolved_version,
+                                Arc::clone(&core_arc),
+                            );
+                            pending.extend(extract_transitive_deps(&core_arc, peer_deps));
+                        }
+
+                        // Sibling specs that were stashed while the
+                        // fetch was in flight: dispatch each as a
+                        // separate settle future.
+                        if let Some(siblings) = deferred_by_name.remove(&name) {
+                            for sibling_spec in siblings {
+                                futs.push(Box::pin(settle_future(
+                                    name.clone(),
+                                    sibling_spec,
+                                    Arc::clone(&full_arc),
+                                    cache.clone(),
+                                    peer_deps,
+                                )));
+                            }
+                        }
+                    }
+                    FetchOutcome::NotModified | FetchOutcome::Err => {
+                        // 304 is unreachable in practice (no ETag sent);
+                        // both branches treated as soft failure.
+                        stats.failed_count += 1;
+                    }
+                }
+            }
+            FastEvent::Settled { new_deps } => {
+                pending.extend(new_deps);
+            }
+        }
+    }
+
+    let total = stats.success_count + stats.failed_count;
+    let avg_ms = if total > 0 {
+        stats.total_request_ms / total as u64
+    } else {
+        0
+    };
+    tracing::info!(
+        "p1-breakdown fast_preload n={} ok={} fail={} avg_req={}ms min={}ms max={}ms | {}",
+        total,
+        stats.success_count,
+        stats.failed_count,
+        avg_ms,
+        stats.min_request_ms,
+        stats.max_request_ms,
+        FETCH_TIMINGS.snapshot().summary_line(),
+    );
+
+    stats
+}
diff --git a/crates/ruborist/src/resolver/mb_resolve.rs b/crates/ruborist/src/resolver/mb_resolve.rs
new file mode 100644
index 000000000..07939c7a2
--- /dev/null
+++ b/crates/ruborist/src/resolver/mb_resolve.rs
@@ -0,0 +1,1091 @@
+//! Standalone manifest preload for the lockfile-only path.
+//!
+//! Mirrors `crates/preload-bench`'s loop shape verbatim, but lives
+//! inside ruborist so it can populate `MemoryCache` for the BFS phase
+//! to read. Used by `service::api::build_deps` whenever the caller
+//! has `skip_preload=true` and no warm project cache — i.e. the
+//! `utoo deps` (lockfile-only) path.
+//!
+//! Bypasses every other ruborist service layer:
+//!   * `service::http::get_client` — own `reqwest::Client` built per
+//!     call, no global LazyLock, no `dns_resolver(shared_resolver)`,
+//!     no `connect_timeout`, `pool_max_idle_per_host(256)` matching
+//!     `preload-bench` / `manifest-bench`.
+//!   * `service::manifest::fetch_full_manifest_with_settle` — own
+//!     `reqwest::get + body.bytes() + spawn_blocking(simd_json
+//!     to_borrowed_value)`, no `RetryIf`, no `FETCH_TIMINGS`.
+//!   * `service::registry::UnifiedRegistry` — no `OnceMap` inflight
+//!     gates, no `ManifestStore`, no `EventReceiver`.
+//!
+//! The only `service::*` touched is `MemoryCache::set_full_manifest`
+//! and `MemoryCache::set_version_manifest` — thin DashMap wrappers
+//! the BFS phase reads from. Without that, BFS would have nothing to
+//! resolve against.
+//!
+//! Why a separate path: same-run CI data shows `preload-bench`
+//! (self-contained, transitive walk, 4153 fetches) lands at ~2.57s
+//! while ruborist's existing `fast_preload` path (combined parse via
+//! service layers, 2733 fetches) lands at ~2.67s on the same network
+//! — so on a per-fetch basis the service-layer path is ~50 % slower.
+//! Removing the layers should close that gap.
+
+use std::collections::{HashMap, HashSet, VecDeque};
+use std::pin::Pin;
+use std::sync::Arc;
+use std::time::Instant;
+
+use anyhow::{Context, Result, anyhow};
+use futures::stream::{FuturesUnordered, StreamExt};
+use parking_lot::Mutex;
+use petgraph::graph::{EdgeIndex, NodeIndex};
+use serde::Deserialize;
+use tokio::sync::mpsc;
+
+use crate::model::graph::DependencyGraph;
+use crate::model::manifest::{CoreVersionManifest, FullManifest};
+use crate::model::node::PeerDeps;
+use crate::resolver::builder::{
+    BuildDepsConfig, ProcessResult, collect_unresolved_edges, process_dependency_with_resolved,
+};
+use crate::resolver::preload::{Dep, PreloadConfig};
+use crate::resolver::semver::normalize_spec;
+use crate::resolver::version::resolve_target_version;
+use crate::service::MemoryCache;
+use crate::spec::SpecStr;
+use crate::traits::progress::{BuildEvent, EventReceiver};
+use crate::traits::registry::ResolvedPackage;
+
+#[derive(Debug, Default)]
+pub struct MbFetchStats {
+    pub success: usize,
+    pub fail: usize,
+}
+
+/// Build a fresh `reqwest::Client` matching `preload-bench` /
+/// `manifest-bench` exactly: aws-lc-rs TLS provider via
+/// `use_preconfigured_tls`, `pool_max_idle_per_host(256)`, no
+/// proxy, `http1_only`. The reqwest crate's
+/// `rustls-tls-native-roots` feature on Linux still bundles ring
+/// for `service::http`'s global client, but this client overrides
+/// at construction time — both providers coexist in the binary.
+#[cfg(not(target_arch = "wasm32"))]
+fn build_mb_client() -> Result<reqwest::Client> {
+    // Idempotent: first install_default wins; subsequent calls are
+    // no-ops. Sets the process-wide default for any rustls consumer
+    // that builds a `ClientConfig` without explicit provider.
+    let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
+
+    let mut roots = rustls::RootCertStore::empty();
+    let native = rustls_native_certs::load_native_certs();
+    for cert in native.certs {
+        // Tolerate individual bad roots — same defensive load pattern
+        // as `service::http::build_rustls_config`.
+        let _ = roots.add(cert);
+    }
+
+    let tls_config = rustls::ClientConfig::builder_with_provider(std::sync::Arc::new(
+        rustls::crypto::aws_lc_rs::default_provider(),
+    ))
+    .with_safe_default_protocol_versions()
+    .map_err(|e| anyhow!("rustls protocol versions: {e}"))?
+    .with_root_certificates(roots)
+    .with_no_client_auth();
+
+    reqwest::Client::builder()
+        .use_preconfigured_tls(tls_config)
+        .no_proxy()
+        .pool_max_idle_per_host(256)
+        .http1_only()
+        .build()
+        .context("build reqwest client for mb_resolve")
+}
+
+#[cfg(target_arch = "wasm32")]
+fn build_mb_client() -> Result<reqwest::Client> {
+    reqwest::Client::builder()
+        .no_proxy()
+        .build()
+        .context("build reqwest client for mb_resolve")
+}
+
+/// Collect deps from a deps map, filtering non-registry specs.
+fn collect_deps(map: Option<&HashMap<String, String>>) -> Vec<Dep> {
+    map.into_iter()
+        .flatten()
+        .filter(|(_, spec)| spec.is_registry_spec())
+        .map(|(name, spec)| (name.clone(), spec.clone()))
+        .collect()
+}
+
+fn extract_transitive(manifest: &CoreVersionManifest, peer_deps: PeerDeps) -> Vec<Dep> {
+    let mut out = Vec::new();
+    out.extend(collect_deps(manifest.dependencies.as_ref()));
+    if peer_deps == PeerDeps::Include {
+        out.extend(collect_deps(manifest.peer_dependencies.as_ref()));
+    }
+    out.extend(collect_deps(manifest.optional_dependencies.as_ref()));
+    out
+}
+
+/// What a future returns when it lands. The main loop uses
+/// `transitives` to extend `pending`, plus the cache writes already
+/// happened inside the future. Only `fetched=true` futures populate
+/// `body_cache` and trigger sibling drain.
+struct FetchOutcome {
+    /// The dep key (alias name as it appears in the parent's deps map).
+    /// Used by `graph_worker` to filter `edge_targets`, which is keyed
+    /// on the alias.
+    name: String,
+    /// The real package name after npm-alias normalization (e.g.
+    /// `name="ms"` + `spec="npm:raw-body@2.1.3"` → `real_name="raw-body"`).
+    /// Used by the main loop for `body_cache` / `deferred_by_name` /
+    /// `in_flight_names` keying, so two distinct aliases pointing at
+    /// the same package share dedup.
+    real_name: String,
+    /// The spec that triggered this fetch / settle. Used by the
+    /// main loop to look up the cached `CoreVersionManifest` for
+    /// `PackageResolved` event emission (the future already wrote
+    /// `(name, primary_spec)` to the cache).
+    primary_spec: String,
+    transitives: Vec<Dep>,
+    fetched: bool,
+    /// Per-future wall (network + body recv + spawn_blocking parse).
+    /// Summed across all futures, divided by mb_fetch total wall =
+    /// eff_parallel — the same number `manifest-bench` reports as
+    /// `avg_conc`. Used to spot wave-shape underutilization.
+    wall_us: u64,
+    /// Per-future network-only wall (request.send + body.bytes).
+    /// `wall_us - net_us` is the spawn_blocking parse contribution.
+    net_us: u64,
+}
+
+type Fut = Pin<Box<dyn std::future::Future<Output = FetchOutcome> + Send>>;
+
+/// `(name, spec) → (FullManifest, resolved_version, version_subtree, transitive_deps)`.
+type ParseResult = (
+    Arc<FullManifest>,
+    String,
+    Arc<CoreVersionManifest>,
+    Vec<Dep>,
+);
+
+/// Single combined parse: one `simd_json::to_borrowed_value` over the
+/// raw body extracts the envelope (name, dist-tags, versions keys)
+/// AND deserializes the resolved version's `CoreVersionManifest`
+/// subtree. Same shape as the parse step in `preload-bench`.
+fn parse_combined(raw: Arc<[u8]>, spec: &str, peer_deps: PeerDeps) -> Option<ParseResult> {
+    use simd_json::prelude::{ValueAsObject, ValueAsScalar, ValueObjectAccess};
+
+    let mut buf = (*raw).to_vec();
+    let parsed = simd_json::to_borrowed_value(&mut buf).ok()?;
+
+    let name = parsed
+        .get("name")
+        .and_then(|v| v.as_str())
+        .map(|s| s.to_string())
+        .unwrap_or_default();
+    let dist_tags: HashMap<String, String> = parsed
+        .get("dist-tags")
+        .and_then(|v| HashMap::<String, String>::deserialize(v).ok())
+        .unwrap_or_default();
+    let versions_keys: Vec<String> = parsed
+        .get("versions")
+        .and_then(ValueAsObject::as_object)
+        .map(|obj| obj.keys().map(|k| k.to_string()).collect())
+        .unwrap_or_default();
+
+    let full = FullManifest {
+        name,
+        dist_tags,
+        versions: versions_keys,
+        raw: Arc::clone(&raw),
+        ..Default::default()
+    };
+
+    let resolved = resolve_target_version((&full).into(), spec).ok()?;
+    let core = parsed
+        .get("versions")
+        .and_then(|v| v.get(resolved.as_str()))
+        .and_then(|version_obj| CoreVersionManifest::deserialize(version_obj).ok())?;
+    let core_arc = Arc::new(core);
+    let transitives = extract_transitive(&core_arc, peer_deps);
+
+    Some((Arc::new(full), resolved, core_arc, transitives))
+}
+
+/// Fetch + combined parse + cache write for one `(name, spec)`.
+/// Future body owns all per-fetch work; main loop only extends
+/// `pending` from the returned transitives and refills `futs`.
+fn spawn_fetch(
+    client: reqwest::Client,
+    registry_url: Arc<String>,
+    name: String,
+    spec: String,
+    cache: MemoryCache,
+    body_cache: Arc<Mutex<HashMap<String, Arc<[u8]>>>>,
+    peer_deps: PeerDeps,
+) -> Fut {
+    Box::pin(async move {
+        let fut_start = Instant::now();
+        let primary_spec = spec.clone();
+        // Normalize npm-alias / workspace specs so the registry hit
+        // and the manifest parse run against the *real* package, not
+        // the alias name. Cache writes still go under the original
+        // (alias_name, alias_spec) key so `graph_worker` can locate
+        // them via `edge_targets`.
+        let (real_name, real_spec) = normalize_spec(&name, &spec);
+        let url = format!("{}/{}", registry_url, real_name);
+        let resp = match client
+            .get(&url)
+            .header("accept", "application/vnd.npm.install-v1+json")
+            .send()
+            .await
+        {
+            Ok(r) if r.status().is_success() => r,
+            _ => {
+                let wall_us = fut_start.elapsed().as_micros() as u64;
+                return FetchOutcome {
+                    name,
+                    real_name,
+                    primary_spec,
+                    transitives: Vec::new(),
+                    fetched: true,
+                    wall_us,
+                    net_us: wall_us,
+                };
+            }
+        };
+        let raw_bytes = match resp.bytes().await {
+            Ok(b) => b,
+            Err(_) => {
+                let wall_us = fut_start.elapsed().as_micros() as u64;
+                return FetchOutcome {
+                    name,
+                    real_name,
+                    primary_spec,
+                    transitives: Vec::new(),
+                    fetched: true,
+                    wall_us,
+                    net_us: wall_us,
+                };
+            }
+        };
+        let net_us = fut_start.elapsed().as_micros() as u64;
+        let raw_arc: Arc<[u8]> = Arc::from(raw_bytes.as_ref());
+        // Body cache is keyed by real_name so two aliases pointing at
+        // the same registry package share the body and only one fetch
+        // fires. Sibling drains know to use real_name (see
+        // `deferred_by_name` keying in the main loop).
+        body_cache
+            .lock()
+            .insert(real_name.clone(), Arc::clone(&raw_arc));
+
+        let real_spec_for_parse = real_spec.clone();
+        let peer = peer_deps;
+        let parsed = tokio::task::spawn_blocking(move || {
+            parse_combined(raw_arc, &real_spec_for_parse, peer)
+        })
+        .await
+        .ok()
+        .flatten();
+
+        let transitives = match parsed {
+            Some((full_arc, resolved, core_arc, transitives)) => {
+                cache.set_full_manifest(real_name.clone(), Arc::clone(&full_arc));
+                // Under the alias key so `graph_worker` finds it.
+                cache.set_version_manifest(name.clone(), spec, Arc::clone(&core_arc));
+                // Under the real key so subsequent direct deps on
+                // the same package@version dedupe correctly.
+                cache.set_version_manifest(real_name.clone(), resolved, core_arc);
+                transitives
+            }
+            None => Vec::new(),
+        };
+
+        let wall_us = fut_start.elapsed().as_micros() as u64;
+        FetchOutcome {
+            name,
+            real_name,
+            primary_spec,
+            transitives,
+            fetched: true,
+            wall_us,
+            net_us,
+        }
+    })
+}
+
+/// Settle-only future for a sibling spec whose `(name)` body already
+/// landed via a sibling fetch. Same combined parse, no network.
+fn spawn_settle(
+    name: String,
+    spec: String,
+    raw: Arc<[u8]>,
+    cache: MemoryCache,
+    peer_deps: PeerDeps,
+) -> Fut {
+    Box::pin(async move {
+        let fut_start = Instant::now();
+        let primary_spec = spec.clone();
+        let (real_name, real_spec) = normalize_spec(&name, &spec);
+        let real_spec_for_parse = real_spec.clone();
+        let peer = peer_deps;
+        let parsed = tokio::task::spawn_blocking(move || {
+            parse_combined(Arc::clone(&raw), &real_spec_for_parse, peer)
+        })
+        .await
+        .ok()
+        .flatten();
+
+        let transitives = match parsed {
+            Some((full_arc, resolved, core_arc, transitives)) => {
+                // Don't overwrite full_manifest — the original fetcher
+                // already set it under real_name. Populate version
+                // slots so BFS hits the (alias_name, alias_spec)
+                // early-return.
+                cache.set_full_manifest(real_name.clone(), full_arc);
+                cache.set_version_manifest(name.clone(), spec, Arc::clone(&core_arc));
+                cache.set_version_manifest(real_name.clone(), resolved, core_arc);
+                transitives
+            }
+            None => Vec::new(),
+        };
+
+        let wall_us = fut_start.elapsed().as_micros() as u64;
+        FetchOutcome {
+            name,
+            real_name,
+            primary_spec,
+            transitives,
+            fetched: false,
+            wall_us,
+            // Settle-only futures have no network component.
+            net_us: 0,
+        }
+    })
+}
+
+/// Streaming preload with transitive walk. Self-contained — no
+/// dependency on `service::http` / `service::manifest` /
+/// `service::registry` beyond `MemoryCache` writes.
+pub async fn mb_fetch(
+    initial_deps: Vec<Dep>,
+    registry_url: &str,
+    cache: &MemoryCache,
+    config: &PreloadConfig,
+) -> MbFetchStats {
+    let mut stats = MbFetchStats::default();
+    // Per-future wall + net sums for eff_parallel computation.
+    // sum_wall_us / total_wall_ms / 1000 = eff_parallel for the
+    // whole future-body span (network + parse + cache writes).
+    // sum_net_us / total_wall_ms / 1000 = network-only eff_parallel,
+    // directly comparable to manifest-bench's avg_conc.
+    let mut sum_wall_us: u64 = 0;
+    let mut sum_net_us: u64 = 0;
+    let mut fetch_count: u64 = 0;
+    let mut settle_count: u64 = 0;
+    let total_start = Instant::now();
+
+    let client = match build_mb_client() {
+        Ok(c) => c,
+        Err(e) => {
+            tracing::warn!("mb_resolve client build failed: {e}");
+            return stats;
+        }
+    };
+    let registry = Arc::new(registry_url.trim_end_matches('/').to_string());
+    let cap = config.concurrency;
+    let peer_deps = config.peer_deps;
+
+    // Spec-level dedup across the entire run.
+    let mut seen: HashSet<(String, String)> = HashSet::new();
+    let mut pending: VecDeque<Dep> = VecDeque::new();
+    for (name, spec) in initial_deps {
+        if seen.insert((name.clone(), spec.clone())) {
+            pending.push_back((name, spec));
+        }
+    }
+
+    // Sibling-fetch dedup: when two specs for the same package are
+    // both in flight, only the first fires a fetch; the second
+    // arrives at the cached body and goes through `spawn_settle`.
+    // Keyed by *real* package name (post npm-alias normalization)
+    // so two distinct aliases pointing at the same registry package
+    // share dedup.
+    let body_cache: Arc<Mutex<HashMap<String, Arc<[u8]>>>> = Arc::new(Mutex::new(HashMap::new()));
+    let mut in_flight_real_names: HashSet<String> = HashSet::new();
+    let mut deferred_by_real_name: HashMap<String, Vec<(String, String)>> = HashMap::new();
+
+    let mut futs: FuturesUnordered<Fut> = FuturesUnordered::new();
+
+    loop {
+        // Refill to cap.
+        while futs.len() < cap {
+            let Some((name, spec)) = pending.pop_front() else {
+                break;
+            };
+            let (real_name, _) = normalize_spec(&name, &spec);
+            // Sibling fast path: body already cached.
+            if let Some(raw) = body_cache.lock().get(&real_name).cloned() {
+                futs.push(spawn_settle(name, spec, raw, cache.clone(), peer_deps));
+                continue;
+            }
+            // Defer if a fetch for this real package is already in flight.
+            if !in_flight_real_names.insert(real_name.clone()) {
+                deferred_by_real_name
+                    .entry(real_name)
+                    .or_default()
+                    .push((name, spec));
+                continue;
+            }
+            futs.push(spawn_fetch(
+                client.clone(),
+                Arc::clone(&registry),
+                name,
+                spec,
+                cache.clone(),
+                Arc::clone(&body_cache),
+                peer_deps,
+            ));
+        }
+
+        if futs.is_empty() {
+            break;
+        }
+
+        let Some(out) = futs.next().await else { break };
+
+        sum_wall_us += out.wall_us;
+        sum_net_us += out.net_us;
+        if out.fetched {
+            fetch_count += 1;
+        } else {
+            settle_count += 1;
+        }
+
+        if out.transitives.is_empty() && out.fetched {
+            // Empty result from a fetch is ambiguous (no transitives
+            // OR a fetch/parse failure). Track conservatively as
+            // success — the FETCH_TIMINGS-equivalent counter is
+            // omitted in this path on purpose to keep the future
+            // body lean.
+            stats.success += 1;
+        } else if out.fetched {
+            stats.success += 1;
+        }
+
+        // Drain sibling specs deferred while the fetch was in flight.
+        if out.fetched
+            && let Some(siblings) = deferred_by_real_name.remove(&out.real_name)
+            && let Some(raw) = body_cache.lock().get(&out.real_name).cloned()
+        {
+            for (sibling_name, sibling_spec) in siblings {
+                futs.push(spawn_settle(
+                    sibling_name,
+                    sibling_spec,
+                    Arc::clone(&raw),
+                    cache.clone(),
+                    peer_deps,
+                ));
+            }
+        }
+
+        // Extend pending with new transitive specs, dedup.
+        for (name, spec) in out.transitives {
+            if seen.insert((name.clone(), spec.clone())) {
+                pending.push_back((name, spec));
+            }
+        }
+    }
+
+    let total_wall_ms = total_start.elapsed().as_millis();
+    let total_wall_us = (total_wall_ms as u64).saturating_mul(1000);
+    let eff_par_full = if total_wall_us > 0 {
+        sum_wall_us as f64 / total_wall_us as f64
+    } else {
+        0.0
+    };
+    let eff_par_net = if total_wall_us > 0 {
+        sum_net_us as f64 / total_wall_us as f64
+    } else {
+        0.0
+    };
+    let avg_wall_us = sum_wall_us
+        .checked_div(fetch_count + settle_count)
+        .unwrap_or(0);
+    let avg_net_us = sum_net_us.checked_div(fetch_count).unwrap_or(0);
+    tracing::info!(
+        "p1-breakdown mb_fetch wall={}ms ok={} fail={} fetch={} settle={} sum_wall={}ms sum_net={}ms avg_wall={}us avg_net={}us eff_par_full={:.1} eff_par_net={:.1}",
+        total_wall_ms,
+        stats.success,
+        stats.fail,
+        fetch_count,
+        settle_count,
+        sum_wall_us / 1000,
+        sum_net_us / 1000,
+        avg_wall_us,
+        avg_net_us,
+        eff_par_full,
+        eff_par_net,
+    );
+
+    stats
+}
+
+// ============================================================================
+// Folded streaming graph build — preload + BFS in one phase
+// ============================================================================
+
+/// Edges waiting on a `(name, spec)` fetch. Multiple parents can need
+/// the same registry dep; we track them all and process inline as
+/// soon as the manifest lands.
+type EdgeTargets = HashMap<(String, String), Vec<(NodeIndex, EdgeIndex)>>;
+
+/// Collect the unresolved registry edges from `node_idx` into
+/// pending + edge_targets, dedup by spec via `seen_specs`.
+/// Non-registry edges (workspace / git / http / file) are
+/// deliberately left for the follow-up BFS sweep.
+/// Process this node's unresolved registry edges:
+/// * If the (name, spec) is already cached (a sibling subtree
+///   resolved it earlier), call `process_dependency_with_resolved`
+///   inline now. Newly-created child nodes recurse via this same
+///   function so their edges are also enqueued/processed.
+/// * Otherwise, register the (parent, edge_id) under `edge_targets`
+///   so the eventual fetch result drains it; push to `pending` if
+///   this `(name, spec)` hasn't been seen.
+///
+/// Without the inline-process path, `(name, spec)` keys added
+/// AFTER their fetch already landed would never be drained — they'd
+/// sit in `edge_targets` and the corresponding parent edges would
+/// stay unresolved. CI run c02bb152 showed ~580 such orphans.
+fn enqueue_node_edges(
+    graph: &mut DependencyGraph,
+    node_idx: NodeIndex,
+    pending: &mut VecDeque<Dep>,
+    seen_specs: &mut HashSet<(String, String)>,
+    edge_targets: &mut EdgeTargets,
+    cache: &MemoryCache,
+    build_config: &BuildDepsConfig,
+) {
+    let mut work_stack: Vec<NodeIndex> = vec![node_idx];
+    while let Some(idx) = work_stack.pop() {
+        let edges = collect_unresolved_edges(graph, idx);
+        for edge in edges {
+            if !edge.spec.is_registry_spec() {
+                continue;
+            }
+            let key = (edge.name.clone(), edge.spec.clone());
+
+            // Cache-hit fast path: process immediately, no
+            // edge_targets stash. Reuses the same process logic the
+            // main loop uses on fetch result.
+            if let Some(core_arc) = cache.get_version_manifest(&edge.name, &edge.spec) {
+                let resolved = ResolvedPackage {
+                    name: edge.name.clone(),
+                    version: core_arc.version.clone(),
+                    manifest: core_arc,
+                };
+                let edge_info = crate::resolver::edges::DependencyEdgeInfo {
+                    edge_id: edge.edge_id,
+                    name: edge.name.clone(),
+                    spec: edge.spec.clone(),
+                    edge_type: edge.edge_type,
+                };
+                if let ProcessResult::Created(new_idx) = process_dependency_with_resolved(
+                    graph,
+                    idx,
+                    &edge_info,
+                    &resolved,
+                    build_config,
+                ) {
+                    work_stack.push(new_idx);
+                }
+                // Whether Created or Reused, this edge is now
+                // resolved — don't queue.
+                continue;
+            }
+
+            edge_targets
+                .entry(key.clone())
+                .or_default()
+                .push((idx, edge.edge_id));
+            if seen_specs.insert(key.clone()) {
+                pending.push_back(key);
+            }
+        }
+    }
+}
+
+/// Folded variant: combines `mb_fetch`'s streaming preload with the
+/// graph mutations that BFS would otherwise do in a separate phase.
+/// Each fetch result triggers inline `process_dependency_with_resolved`
+/// for every parent edge waiting on `(name, spec)`. New nodes' edges
+/// feed back into pending / edge_targets, so the walk continues
+/// streaming-style without a separate level-by-level traversal.
+///
+/// CPU work (graph mutations) overlaps with network IO (more fetches
+/// in flight via `FuturesUnordered`), so the 305 ms BFS phase
+/// observed against a fully-warm cache is collapsed into mb_fetch's
+/// wall instead of running serially after it.
+///
+/// Non-registry edges (workspace / git / http / file) and any edges
+/// added after the streaming loop converges (override re-resolves
+/// that diverge from the original spec) are left unresolved — the
+/// caller must run a follow-up BFS sweep to handle them. For
+/// `utoo deps` on registry-only workloads (the common case), the
+/// sweep is a no-op.
+/// One fetched/settled event, sent from main loop to graph worker.
+/// The future already performed cache writes inline (cheap DashMap
+/// inserts). Graph worker uses `cache.get_version_manifest` to
+/// retrieve the manifest for `process_dependency_with_resolved`.
+struct FetchEventMsg {
+    name: String,
+}
+
+pub async fn mb_fetch_with_graph<R>(
+    mut graph: DependencyGraph,
+    registry_url: &str,
+    cache: &MemoryCache,
+    preload_config: &PreloadConfig,
+    build_config: &BuildDepsConfig,
+    receiver: Arc<R>,
+) -> Result<(DependencyGraph, MbFetchStats)>
+where
+    R: EventReceiver + 'static,
+{
+    let mut stats = MbFetchStats::default();
+    let total_start = Instant::now();
+
+    let client = match build_mb_client() {
+        Ok(c) => c,
+        Err(e) => {
+            tracing::warn!("mb_resolve client build failed: {e}");
+            return Ok((graph, stats));
+        }
+    };
+    let registry = Arc::new(registry_url.trim_end_matches('/').to_string());
+    let cap = preload_config.concurrency;
+    let peer_deps = preload_config.peer_deps;
+
+    // Initial seed: walk root + workspace nodes for unresolved
+    // registry edges. Done inline before spawning workers (one-time
+    // cost, not on the hot path).
+    let mut seen_specs: HashSet<(String, String)> = HashSet::new();
+    let mut pending: VecDeque<Dep> = VecDeque::new();
+    let mut edge_targets: EdgeTargets = HashMap::new();
+
+    let root_index = graph.root_index;
+    enqueue_node_edges(
+        &mut graph,
+        root_index,
+        &mut pending,
+        &mut seen_specs,
+        &mut edge_targets,
+        cache,
+        build_config,
+    );
+    let workspace_indices: Vec<NodeIndex> = graph
+        .graph
+        .node_indices()
+        .filter(|&i| graph.get_node(i).is_some_and(|n| n.is_workspace()))
+        .collect();
+    for node_idx in workspace_indices {
+        enqueue_node_edges(
+            &mut graph,
+            node_idx,
+            &mut pending,
+            &mut seen_specs,
+            &mut edge_targets,
+            cache,
+            build_config,
+        );
+    }
+
+    // Channels: main → graph (fetched events) + graph → main (new
+    // pending specs). Bounded at 2 * cap so neither side stalls
+    // waiting for the other under bursty wave behavior.
+    let (fetch_tx, fetch_rx) = mpsc::channel::<FetchEventMsg>(cap * 2 + 16);
+    let (specs_tx, mut specs_rx) = mpsc::channel::<Vec<Dep>>(cap * 2 + 16);
+
+    // Spawn graph worker on the *blocking* thread pool, not the
+    // worker scheduler. graph_worker is CPU-only and can run for
+    // tens of ms uninterrupted; on a multi-thread runtime it would
+    // otherwise compete with the install pipeline's download/clone
+    // workers for the small set of worker threads (just 2 on GHA
+    // ubuntu-latest), starving the main loop's socket polling and
+    // producing the eff_par_full collapse (73-77 → 40) that drives
+    // the p0/p1 outlier tail. The blocking pool has 512 slots by
+    // default, so reserving one slot for graph_worker has zero
+    // contention effect on tokio scheduling.
+    let cache_clone = cache.clone();
+    let build_config_owned = build_config.clone();
+    let receiver_for_graph = Arc::clone(&receiver);
+    let graph_handle = tokio::task::spawn_blocking(move || {
+        graph_worker(
+            graph,
+            edge_targets,
+            seen_specs,
+            cache_clone,
+            build_config_owned,
+            fetch_rx,
+            specs_tx,
+            receiver_for_graph,
+        )
+    });
+
+    // Sibling-fetch dedup stays in main loop (drives FuturesUnordered).
+    // Keyed by *real* package name (post npm-alias normalization)
+    // so two distinct aliases pointing at the same registry package
+    // share dedup; siblings store their alias `(name, spec)` so the
+    // drain knows how to spawn `spawn_settle` with the right cache key.
+    let body_cache: Arc<Mutex<HashMap<String, Arc<[u8]>>>> = Arc::new(Mutex::new(HashMap::new()));
+    let mut in_flight_real_names: HashSet<String> = HashSet::new();
+    let mut deferred_by_real_name: HashMap<String, Vec<(String, String)>> = HashMap::new();
+    let mut futs: FuturesUnordered<Fut> = FuturesUnordered::new();
+
+    let mut sum_wall_us: u64 = 0;
+    let mut sum_net_us: u64 = 0;
+    let mut fetch_count: u64 = 0;
+    let mut settle_count: u64 = 0;
+    // Number of FetchEventMsg sent to graph worker that haven't yet
+    // had a corresponding Vec<Dep> response. Drives termination:
+    // when futs empty + in_flight == 0, no more work pipelined.
+    let mut in_flight_graph: usize = 0;
+
+    loop {
+        // Refill futs from pending up to cap.
+        while futs.len() < cap {
+            let Some((name, spec)) = pending.pop_front() else {
+                break;
+            };
+            let (real_name, _) = normalize_spec(&name, &spec);
+            if let Some(raw) = body_cache.lock().get(&real_name).cloned() {
+                futs.push(spawn_settle(name, spec, raw, cache.clone(), peer_deps));
+                continue;
+            }
+            if !in_flight_real_names.insert(real_name.clone()) {
+                deferred_by_real_name
+                    .entry(real_name)
+                    .or_default()
+                    .push((name, spec));
+                continue;
+            }
+            futs.push(spawn_fetch(
+                client.clone(),
+                Arc::clone(&registry),
+                name,
+                spec,
+                cache.clone(),
+                Arc::clone(&body_cache),
+                peer_deps,
+            ));
+        }
+
+        // Termination: nothing in flight at fetch level AND graph
+        // worker has nothing pending.
+        if futs.is_empty() && in_flight_graph == 0 {
+            break;
+        }
+
+        // Drive both halves: prefer draining specs back from graph
+        // worker (unblocks new fetch dispatch) over starting another
+        // fetch landing.
+        tokio::select! {
+            biased;
+            maybe_specs = specs_rx.recv() => {
+                match maybe_specs {
+                    Some(specs) => {
+                        pending.extend(specs);
+                        in_flight_graph -= 1;
+                    }
+                    None => {
+                        // Graph worker exited unexpectedly. Bail.
+                        break;
+                    }
+                }
+            }
+            maybe_result = futs.next(), if !futs.is_empty() => {
+                if let Some(out) = maybe_result {
+                    sum_wall_us += out.wall_us;
+                    sum_net_us += out.net_us;
+                    if out.fetched {
+                        fetch_count += 1;
+                        stats.success += 1;
+                    } else {
+                        settle_count += 1;
+                    }
+
+                    // Pipeline early-start signal: emit
+                    // PackageResolved as soon as the manifest is in
+                    // cache. The install path's PipelineReceiver
+                    // forwards this to the download worker so
+                    // tarball download begins before BFS finishes.
+                    // For lockfile-only callers (NoopReceiver), this
+                    // is a no-op.
+                    if let Some(core_arc) =
+                        cache.get_version_manifest(&out.name, &out.primary_spec)
+                    {
+                        receiver.on_event(BuildEvent::PackageResolved(
+                            (&*core_arc).into(),
+                        ));
+                    }
+
+                    // Drain sibling specs deferred while the fetch
+                    // was in flight. Sibling settles also produce a
+                    // FetchEventMsg downstream.
+                    if out.fetched
+                        && let Some(siblings) = deferred_by_real_name.remove(&out.real_name)
+                        && let Some(raw) = body_cache.lock().get(&out.real_name).cloned()
+                    {
+                        for (sibling_name, sibling_spec) in siblings {
+                            futs.push(spawn_settle(
+                                sibling_name,
+                                sibling_spec,
+                                Arc::clone(&raw),
+                                cache.clone(),
+                                peer_deps,
+                            ));
+                        }
+                    }
+
+                    // Send to graph worker. `send().await` only
+                    // blocks if channel is full (cap * 2 buffer);
+                    // under steady state shouldn't happen.
+                    if fetch_tx.send(FetchEventMsg { name: out.name }).await.is_ok() {
+                        in_flight_graph += 1;
+                    }
+                }
+            }
+        }
+    }
+
+    // Signal graph worker to exit, then await its finalization to
+    // recover the graph + stats.
+    drop(fetch_tx);
+    let (graph, graph_stats) = graph_handle.await.context("graph worker join")??;
+
+    let total_wall_ms = total_start.elapsed().as_millis();
+    let total_wall_us = (total_wall_ms as u64).saturating_mul(1000);
+    let eff_par_full = if total_wall_us > 0 {
+        sum_wall_us as f64 / total_wall_us as f64
+    } else {
+        0.0
+    };
+    let eff_par_net = if total_wall_us > 0 {
+        sum_net_us as f64 / total_wall_us as f64
+    } else {
+        0.0
+    };
+    let avg_net_us = sum_net_us.checked_div(fetch_count).unwrap_or(0);
+    tracing::info!(
+        "p1-breakdown mb_fetch_with_graph wall={}ms ok={} fetch={} settle={} sum_wall={}ms sum_net={}ms sum_graph={}ms avg_net={}us eff_par_full={:.1} eff_par_net={:.1} unresolved_targets={} graph_processed={} graph_new_specs={}",
+        total_wall_ms,
+        stats.success,
+        fetch_count,
+        settle_count,
+        sum_wall_us / 1000,
+        sum_net_us / 1000,
+        graph_stats.sum_graph_us / 1000,
+        avg_net_us,
+        eff_par_full,
+        eff_par_net,
+        graph_stats.unresolved_remaining,
+        graph_stats.processed,
+        graph_stats.new_specs_emitted,
+    );
+
+    Ok((graph, stats))
+}
+
+#[derive(Debug, Default)]
+struct GraphWorkerStats {
+    sum_graph_us: u64,
+    processed: usize,
+    new_specs_emitted: usize,
+    unresolved_remaining: usize,
+}
+
+/// CPU-only worker that owns the graph + edge_targets + seen_specs.
+/// Receives fetch events from main loop, mutates graph via
+/// `process_dependency_with_resolved`, sends new pending specs back.
+///
+/// Runs as a sync function on tokio's blocking thread pool (via
+/// `spawn_blocking` at the call site), not as a regular task on the
+/// worker scheduler. Rationale: graph mutation is pure CPU and can
+/// run for tens of ms uninterrupted; if it sat on a worker thread
+/// alongside the install pipeline (download / clone / extract
+/// workers), it would starve the main loop's socket polling worker
+/// during burst processing. The blocking pool has 512 slots by
+/// default, so reserving one for graph_worker has no contention
+/// effect on other tokio scheduling.
+#[allow(clippy::too_many_arguments)]
+fn graph_worker<R>(
+    mut graph: DependencyGraph,
+    mut edge_targets: EdgeTargets,
+    mut seen_specs: HashSet<(String, String)>,
+    cache: MemoryCache,
+    build_config: BuildDepsConfig,
+    mut fetch_rx: mpsc::Receiver<FetchEventMsg>,
+    specs_tx: mpsc::Sender<Vec<Dep>>,
+    receiver: Arc<R>,
+) -> Result<(DependencyGraph, GraphWorkerStats)>
+where
+    R: EventReceiver + 'static,
+{
+    use crate::model::manifest::NodeManifest;
+    let mut stats = GraphWorkerStats::default();
+
+    while let Some(msg) = fetch_rx.blocking_recv() {
+        let graph_start = Instant::now();
+        stats.processed += 1;
+
+        // Drain edge_targets for every spec keyed under this name.
+        // The fetch future already wrote both `(name, primary_spec)`
+        // and `(name, resolved_version)` cache slots, so any
+        // edge_targets entry for this name should hit cache.
+        let primary_keys: Vec<(String, String)> = edge_targets
+            .keys()
+            .filter(|(n, _)| n == &msg.name)
+            .cloned()
+            .collect();
+
+        let mut new_specs: Vec<Dep> = Vec::new();
+        for (k_name, k_spec) in primary_keys {
+            let Some(core_arc) = cache.get_version_manifest(&k_name, &k_spec) else {
+                continue;
+            };
+            let resolved = ResolvedPackage {
+                name: k_name.clone(),
+                version: core_arc.version.clone(),
+                manifest: core_arc,
+            };
+            let Some(targets) = edge_targets.remove(&(k_name.clone(), k_spec.clone())) else {
+                continue;
+            };
+            for (parent_idx, edge_id) in targets {
+                let edge_info = crate::resolver::edges::DependencyEdgeInfo {
+                    edge_id,
+                    name: k_name.clone(),
+                    spec: k_spec.clone(),
+                    edge_type: graph
+                        .graph
+                        .edge_weight(edge_id)
+                        .and_then(|e| match e {
+                            crate::model::graph::GraphEdge::Dependency(d) => Some(d.edge_type),
+                            _ => None,
+                        })
+                        .unwrap_or(crate::model::node::EdgeType::Prod),
+                };
+                let result = process_dependency_with_resolved(
+                    &mut graph,
+                    parent_idx,
+                    &edge_info,
+                    &resolved,
+                    &build_config,
+                );
+                if let ProcessResult::Created(new_idx) = result {
+                    // Pipeline clone signal: emit PackagePlaced so
+                    // the install path's clone worker can begin
+                    // hardlinking from cache as soon as a node is
+                    // placed in the graph. lockfile-only callers
+                    // (NoopReceiver) drop this on the floor.
+                    if let Some(node) = graph.get_node(new_idx)
+                        && let NodeManifest::Registry(ref manifest) = node.manifest
+                    {
+                        let parent_path = graph.get_node(parent_idx).map(|p| p.path.as_path());
+                        receiver.on_event(BuildEvent::PackagePlaced {
+                            package: manifest.as_ref().into(),
+                            path: &node.path,
+                            parent_path,
+                        });
+                    }
+
+                    // Walk the new node's edges. enqueue handles
+                    // recursive cache-hit drain so already-cached
+                    // specs get processed inline (still on this
+                    // worker thread — graph mutations can't run on
+                    // multiple threads with `&mut graph`).
+                    enqueue_node_edges_into(
+                        &mut graph,
+                        new_idx,
+                        &mut new_specs,
+                        &mut seen_specs,
+                        &mut edge_targets,
+                        &cache,
+                        &build_config,
+                    );
+                }
+            }
+        }
+
+        stats.sum_graph_us += graph_start.elapsed().as_micros() as u64;
+        stats.new_specs_emitted += new_specs.len();
+
+        // Always reply (even if empty) so main loop's `in_flight`
+        // counter decrements for each FetchEventMsg sent.
+        if specs_tx.blocking_send(new_specs).is_err() {
+            // Main loop dropped the receiver — bail.
+            break;
+        }
+    }
+
+    stats.unresolved_remaining = edge_targets.len();
+    Ok((graph, stats))
+}
+
+/// Same as `enqueue_node_edges` but pushes new specs into the
+/// caller-provided `out` Vec instead of a VecDeque. Used by the
+/// graph worker to batch "new specs from this fetch" before sending
+/// them back to the main loop in one channel message.
+fn enqueue_node_edges_into(
+    graph: &mut DependencyGraph,
+    node_idx: NodeIndex,
+    out: &mut Vec<Dep>,
+    seen_specs: &mut HashSet<(String, String)>,
+    edge_targets: &mut EdgeTargets,
+    cache: &MemoryCache,
+    build_config: &BuildDepsConfig,
+) {
+    let mut work_stack: Vec<NodeIndex> = vec![node_idx];
+    while let Some(idx) = work_stack.pop() {
+        let edges = collect_unresolved_edges(graph, idx);
+        for edge in edges {
+            if !edge.spec.is_registry_spec() {
+                continue;
+            }
+            let key = (edge.name.clone(), edge.spec.clone());
+
+            if let Some(core_arc) = cache.get_version_manifest(&edge.name, &edge.spec) {
+                let resolved = ResolvedPackage {
+                    name: edge.name.clone(),
+                    version: core_arc.version.clone(),
+                    manifest: core_arc,
+                };
+                let edge_info = crate::resolver::edges::DependencyEdgeInfo {
+                    edge_id: edge.edge_id,
+                    name: edge.name.clone(),
+                    spec: edge.spec.clone(),
+                    edge_type: edge.edge_type,
+                };
+                if let ProcessResult::Created(new_idx) = process_dependency_with_resolved(
+                    graph,
+                    idx,
+                    &edge_info,
+                    &resolved,
+                    build_config,
+                ) {
+                    work_stack.push(new_idx);
+                }
+                continue;
+            }
+
+            edge_targets
+                .entry(key.clone())
+                .or_default()
+                .push((idx, edge.edge_id));
+            if seen_specs.insert(key.clone()) {
+                out.push(key);
+            }
+        }
+    }
+}
diff --git a/crates/ruborist/src/resolver/mod.rs b/crates/ruborist/src/resolver/mod.rs
index 582e03b31..2d0a288d9 100644
--- a/crates/ruborist/src/resolver/mod.rs
+++ b/crates/ruborist/src/resolver/mod.rs
@@ -3,10 +3,12 @@
 pub mod builder;
 pub mod common;
 pub mod edges;
+pub mod fast_preload;
 #[cfg(feature = "native-git")]
 pub mod git;
 #[cfg(feature = "http-tarball")]
 pub mod http;
+pub mod mb_resolve;
 pub mod preload;
 pub mod registry;
 pub mod runtime;
diff --git a/crates/ruborist/src/resolver/preload.rs b/crates/ruborist/src/resolver/preload.rs
index 1230c5bf6..e9a777407 100644
--- a/crates/ruborist/src/resolver/preload.rs
+++ b/crates/ruborist/src/resolver/preload.rs
@@ -99,8 +99,17 @@ where
     let mut in_flight = 0usize;
     let mut started = false;
 
+    // Main-loop overhead instrumentation. Atomic accumulators so we
+    // can attribute the gap between manifest-bench's pure-HTTP wall
+    // and ruborist's preload wall: how much of the gap is bookkeeping
+    // (dedup hash, extract_transitive_deps, queue push, events) vs
+    // actual fetch wait?
+    let mut total_dispatch_us: u64 = 0;
+    let mut total_result_us: u64 = 0;
+
     loop {
         // Fill up to concurrency limit
+        let dispatch_start = tokio::time::Instant::now();
         while in_flight < concurrency {
             let item = loop {
                 let Some((name, spec)) = pending.pop_front() else {
@@ -134,6 +143,7 @@ where
             });
             in_flight += 1;
         }
+        total_dispatch_us += dispatch_start.elapsed().as_micros() as u64;
 
         if in_flight == 0 {
             break;
@@ -142,6 +152,7 @@ where
         let Some((name, result, elapsed_ms)) = futures.next().await else {
             break;
         };
+        let result_start = tokio::time::Instant::now();
         in_flight -= 1;
 
         if stats.success_count == 0 && stats.failed_count == 0 {
@@ -174,8 +185,15 @@ where
                 tracing::debug!("Failed to preload {}: {}", name, e);
             }
         }
+        total_result_us += result_start.elapsed().as_micros() as u64;
     }
 
+    tracing::info!(
+        "p1-breakdown preload_loop_dispatch_us={} preload_loop_result_us={}",
+        total_dispatch_us,
+        total_result_us,
+    );
+
     stats.total_processed = processed.len();
 
     receiver.on_event(BuildEvent::PreloadComplete {
diff --git a/crates/ruborist/src/service/api.rs b/crates/ruborist/src/service/api.rs
index 878b357a1..2dc7d62e8 100644
--- a/crates/ruborist/src/service/api.rs
+++ b/crates/ruborist/src/service/api.rs
@@ -37,6 +37,8 @@ use crate::model::util::parse_package_spec;
 use crate::resolver::builder::{
     BuildDepsConfig, DevDeps, EdgeContext, PeerDeps, add_edges_from, build_deps_with_config,
 };
+use crate::resolver::mb_resolve::mb_fetch_with_graph;
+use crate::resolver::preload::PreloadConfig;
 use crate::resolver::runtime::install_runtime_from_map;
 use crate::resolver::workspace::WorkspaceDiscovery;
 use crate::spec::Catalogs;
@@ -70,6 +72,16 @@ pub struct BuildDepsOptions<G, R> {
     /// Catalog definitions for the `catalog:` dependency protocol.
     /// Key `""` = default catalog, other keys = named catalogs.
     pub catalogs: Catalogs,
+    /// When true, skip the up-front `run_preload_phase`. Set by callers
+    /// that don't consume the `BuildEvent::PackageResolved` pipeline
+    /// stream — e.g. `utoo deps` (lockfile-only). The BFS phase has its
+    /// own per-level prefetch that warms the manifest cache, so dropping
+    /// preload doesn't change correctness, only avoids the redundant
+    /// up-front fetch + dedicated wall.
+    /// Install paths (which feed `PipelineReceiver` to start tarball
+    /// downloads as resolves complete) leave this false so preload still
+    /// emits PackageResolved events to the pipeline.
+    pub skip_preload: bool,
 }
 
 impl<G, R> BuildDepsOptions<G, R> {
@@ -91,6 +103,7 @@ impl<G, R> BuildDepsOptions<G, R> {
             receiver,
             supports_semver: None,
             catalogs: HashMap::new(),
+            skip_preload: false,
         }
     }
 }
@@ -118,7 +131,7 @@ pub struct BuildDepsOutput {
 pub async fn build_deps<G, R>(options: BuildDepsOptions<G, R>) -> Result<BuildDepsOutput>
 where
     G: Glob + Clone,
-    R: EventReceiver,
+    R: EventReceiver + 'static,
 {
     let BuildDepsOptions {
         cwd,
@@ -132,6 +145,7 @@ where
         receiver,
         supports_semver,
         catalogs,
+        skip_preload: skip_preload_caller,
     } = options;
 
     // 1. Find root path (workspace root if applicable)
@@ -234,7 +248,13 @@ where
         registry.supports_semver(),
     );
 
-    let skip_preload = cache_count > 0;
+    // Skip preload when:
+    //   - the caller asked us to (e.g. `utoo deps`, no pipeline consumer
+    //     for PackageResolved events — BFS does its own per-level
+    //     prefetch, preload is redundant), OR
+    //   - the project's warm cache already has manifests covering most
+    //     of the workload (existing skip-on-warm behavior).
+    let skip_preload = skip_preload_caller || cache_count > 0;
     let mut config = BuildDepsConfig::default()
         .with_peer_deps(peer_deps)
         .with_concurrency(concurrency)
@@ -251,16 +271,56 @@ where
         );
     }
 
+    // Lockfile-only callers (`utoo deps`) route through
+    // `mb_fetch_with_graph` — a folded streaming preload + graph
+    // build. The fetch loop drives manifest IO; per-result inline
+    // `process_dependency_with_resolved` mutates the graph. Result:
+    // no separate BFS phase. The follow-up
+    // `build_deps_with_config` call still runs to handle any
+    // non-registry edges (workspace / git / http / file) the fold
+    // path skipped, but on registry-only workloads it's near no-op.
+    // Wrap receiver in Arc so the folded mb_fetch_with_graph can
+    // share it with its spawned graph_worker task. The follow-up
+    // BFS sweep also holds an &Arc<R> via deref.
+    let receiver = Arc::new(receiver);
+
+    let folded = skip_preload_caller && cache_count == 0;
+    if folded {
+        let preload_config = PreloadConfig {
+            peer_deps,
+            concurrency,
+        };
+        let (returned_graph, _stats) = mb_fetch_with_graph(
+            graph,
+            registry.registry_url(),
+            registry.cache(),
+            &preload_config,
+            &config,
+            Arc::clone(&receiver),
+        )
+        .await
+        .map_err(|e| e.context("mb_fetch_with_graph failed"))?;
+        graph = returned_graph;
+    }
+
     // Preserve the typed error via `Error::new` + `.context(...)` so CLI
     // renderers (e.g. pm's format_print) can downcast and pretty-print the
     // dependency chain carried by `ResolveError::WithChain`.
-    build_deps_with_config(&mut graph, &registry, config, &receiver)
+    //
+    // For the folded path this BFS sweeps remaining unresolved edges
+    // (non-registry: workspace / git / http / file). On
+    // registry-only workloads (the common case) the graph is fully
+    // built already, BFS walks nothing.
+    build_deps_with_config(&mut graph, &registry, config, &*receiver)
         .await
         .map_err(|e| anyhow::Error::new(e).context("Dependency resolution failed"))?;
 
+    let t_serialize_start = std::time::Instant::now();
     let (packages, _total) = graph.serialize_to_packages(&root_path);
+    let serialize_us = t_serialize_start.elapsed().as_micros() as u64;
 
     // Export project cache from memory cache for the host to persist.
+    let t_cache_export_start = std::time::Instant::now();
     let mut project_cache = ProjectCacheData::default();
     for (key, manifest) in registry.cache().export_version_manifests() {
         // `parse_package_spec` rather than `split_once('@')` so scoped names
@@ -271,6 +331,13 @@ where
         pkg_cache.specs.insert(spec.to_string(), version.clone());
         pkg_cache.manifests.insert(version, (*manifest).clone());
     }
+    let cache_export_us = t_cache_export_start.elapsed().as_micros() as u64;
+
+    tracing::info!(
+        "p1-breakdown serialize_us={} cache_export_us={}",
+        serialize_us,
+        cache_export_us,
+    );
 
     Ok(BuildDepsOutput {
         lock: PackageLock::new(&pkg.name, &pkg.version, packages),
@@ -324,6 +391,7 @@ mod tests {
             receiver: NoopReceiver,
             supports_semver: None,
             catalogs: HashMap::new(),
+            skip_preload: false,
         };
 
         assert_eq!(options.concurrency, 20);
diff --git a/crates/ruborist/src/service/manifest.rs b/crates/ruborist/src/service/manifest.rs
index 74baf3b9c..1f65a12c8 100644
--- a/crates/ruborist/src/service/manifest.rs
+++ b/crates/ruborist/src/service/manifest.rs
@@ -4,7 +4,11 @@
 //! [`crate::service::fetch`] so retry policy stays uniform across registry
 //! manifest fetches and non-registry resolvers (git, http tarball).
 
+use std::collections::HashMap;
+use std::sync::Arc;
+
 use anyhow::{Result, anyhow};
+use serde::Deserialize;
 use tokio_retry::RetryIf;
 
 use super::fetch::{
@@ -12,17 +16,40 @@ use super::fetch::{
 };
 use super::http::get_client;
 use crate::model::manifest::{CoreVersionManifest, FullManifest};
+use crate::resolver::version::resolve_target_version;
+use crate::util::FETCH_TIMINGS;
 
-/// Parse JSON bytes on rayon's CPU thread pool (native) or inline
-/// (wasm32). Keeps the tokio runtime free of `simd_json` work so other
-/// in-flight manifest fetches keep driving network IO while this one
-/// parses.
+/// Parse JSON bytes on tokio's blocking thread pool.
+///
+/// The history of this function captures three different attempts:
+///   - rayon::spawn (original): rayon's pool is `num_cpus` (= 2 on
+///     GHA), 64 concurrent parses queued behind 2 workers → avg_parse
+///     30ms wall vs ~5ms CPU. round-0 baseline.
+///   - inline (round 1, reverted): no rayon hop, but the simd_json
+///     call blocks the tokio runtime worker, so other in-flight
+///     fetches couldn't drive their socket I/O — avg_request grew
+///     35ms → 52ms (+17ms), eff_parallel 42 → 35, net p1 wall +0.37s.
+///   - spawn_blocking (current): tokio's dedicated blocking pool has
+///     a much higher default cap (512), so 64 concurrent parses are
+///     never queued. Unlike rayon there's no contention with the
+///     install path's parallel-write rayon usage, and unlike inline
+///     the tokio runtime workers stay free to drive network I/O on
+///     all in-flight fetches.
 async fn parse_json_off_runtime<T>(mut bytes: Vec<u8>) -> Result<T, anyhow::Error>
 where
     T: serde::de::DeserializeOwned + Send + 'static,
 {
     #[cfg(not(target_arch = "wasm32"))]
     {
+        // A/B-reverted to rayon::spawn after PR #2923 bench showed
+        // spawn_blocking (intended to avoid rayon's small-pool queue
+        // wait) routes parse contention through tokio's blocking
+        // pool, which the install path's download / clone workers
+        // also share. With 4647 deps × parse on the legacy
+        // preload+BFS install path, the contention compounded enough
+        // to add ~2s vs utoo-next on p3_cold_install. rayon::spawn
+        // sends parse to its own dedicated pool — small (num_cpus)
+        // but isolated from install IO workers.
         let (tx, rx) = tokio::sync::oneshot::channel();
         rayon::spawn(move || {
             let result = simd_json::serde::from_slice::<T>(&mut bytes)
@@ -91,7 +118,9 @@ pub async fn fetch_full_manifest(opts: FetchManifestOptions<'_>) -> Result<Fetch
                     request = request.header("If-None-Match", etag_value);
                 }
 
+                let t_request_start = std::time::Instant::now();
                 let response = request.send().await.map_err(classify_reqwest_error)?;
+                let request_us = t_request_start.elapsed().as_micros() as u64;
                 let status = response.status();
 
                 if status == reqwest::StatusCode::NOT_MODIFIED {
@@ -109,19 +138,25 @@ pub async fn fetch_full_manifest(opts: FetchManifestOptions<'_>) -> Result<Fetch
                         .and_then(|v| v.to_str().ok())
                         .map(|s| s.to_string());
 
+                    let t_body_start = std::time::Instant::now();
                     let raw_bytes = response
                         .bytes()
                         .await
                         .map_err(|e| FetchError::Permanent(anyhow!("Response read error: {e}")))?
                         .to_vec();
+                    let body_us = t_body_start.elapsed().as_micros() as u64;
+                    let bytes_len = raw_bytes.len() as u64;
                     // simd_json mutates the parse buffer; clone so the raw
                     // bytes survive for `manifest.raw`.
                     let parse_buf = raw_bytes.clone();
+                    let t_parse_start = std::time::Instant::now();
                     let mut manifest: FullManifest = parse_json_off_runtime(parse_buf)
                         .await
                         .map_err(FetchError::Permanent)?;
+                    let parse_us = t_parse_start.elapsed().as_micros() as u64;
                     manifest.raw = std::sync::Arc::from(raw_bytes);
 
+                    FETCH_TIMINGS.record(request_us, body_us, parse_us, bytes_len);
                     Ok(FetchManifestResult::Ok(manifest, new_etag))
                 } else {
                     Err(classify_status(status, &url))
@@ -138,6 +173,209 @@ pub async fn fetch_full_manifest(opts: FetchManifestOptions<'_>) -> Result<Fetch
     })
 }
 
+/// Outcome of [`fetch_full_manifest_with_settle`] — a full manifest
+/// plus the parsed `CoreVersionManifest` for the requested spec, when
+/// it resolves to a known version. Both are produced from a single
+/// `simd_json::to_borrowed_value` pass over the response body, so
+/// callers that need the version subtree never pay the typed-serde
+/// envelope parse + per-version `to_borrowed_value` reparse.
+pub struct FetchWithSettle {
+    pub manifest: FullManifest,
+    pub etag: Option<String>,
+    /// `Some` when the requested spec resolves to a real version in
+    /// `manifest.versions`. `None` only on no-match (rare; usually a
+    /// spec referring to a yanked or moved version).
+    pub primary_settle: Option<PrimarySettleResult>,
+}
+
+/// `(resolved_version, parsed_subtree)` — what
+/// [`fetch_full_manifest_with_settle`] hands back to callers that
+/// supplied a `primary_spec`.
+pub type PrimarySettleResult = (String, Arc<CoreVersionManifest>);
+
+#[allow(clippy::large_enum_variant)]
+pub enum FetchWithSettleResult {
+    Ok(FetchWithSettle),
+    NotModified,
+}
+
+/// Fetch a full manifest and resolve the primary spec from the same
+/// parse pass.
+///
+/// Where [`fetch_full_manifest`] uses `simd_json::serde::from_slice`
+/// to materialize a typed `FullManifest` (cheap envelope, deep
+/// `versions` subtrees skipped via `IgnoredAny`) and leaves version
+/// subtree extraction to a later `simd_json::to_borrowed_value`
+/// reparse, this entry point does the borrowed-value parse once and
+/// extracts:
+///   * envelope fields needed by the resolver (`name`, `dist-tags`,
+///     `versions` keys),
+///   * the resolved-version subtree as a typed
+///     [`CoreVersionManifest`].
+///
+/// Saves one full simd_json pass on the parse hot path —
+/// `fast_preload` uses ~2700 of these per `utoo deps` cold run, so
+/// halving the per-fetch parse work meaningfully reduces CPU on
+/// 2-core CI.
+pub async fn fetch_full_manifest_with_settle(
+    opts: FetchManifestOptions<'_>,
+    primary_spec: &str,
+) -> Result<FetchWithSettleResult> {
+    let url = format!("{}/{}", opts.registry_url, opts.name);
+    let etag_owned = opts.etag.map(|s| s.to_string());
+    let primary_spec_owned = primary_spec.to_string();
+    let accept = match opts.format {
+        MetadataFormat::Abbreviated => "application/vnd.npm.install-v1+json",
+        MetadataFormat::Complete => "application/json",
+    };
+
+    RetryIf::spawn(
+        retry_strategy(),
+        || {
+            let url = url.clone();
+            let etag = etag_owned.clone();
+            let primary_spec = primary_spec_owned.clone();
+            async move {
+                let mut request = get_client()
+                    .map_err(FetchError::Permanent)?
+                    .get(&url)
+                    .header("Accept", accept);
+                if let Some(etag_value) = &etag {
+                    request = request.header("If-None-Match", etag_value);
+                }
+
+                let t_request_start = std::time::Instant::now();
+                let response = request.send().await.map_err(classify_reqwest_error)?;
+                let request_us = t_request_start.elapsed().as_micros() as u64;
+                let status = response.status();
+
+                if status == reqwest::StatusCode::NOT_MODIFIED {
+                    if etag.is_some() {
+                        return Ok(FetchWithSettleResult::NotModified);
+                    }
+                    return Err(classify_status(status, &url));
+                }
+
+                if status.is_success() {
+                    let new_etag = response
+                        .headers()
+                        .get("etag")
+                        .and_then(|v| v.to_str().ok())
+                        .map(|s| s.to_string());
+
+                    let t_body_start = std::time::Instant::now();
+                    let raw_bytes = response
+                        .bytes()
+                        .await
+                        .map_err(|e| FetchError::Permanent(anyhow!("Response read error: {e}")))?
+                        .to_vec();
+                    let body_us = t_body_start.elapsed().as_micros() as u64;
+                    let bytes_len = raw_bytes.len() as u64;
+                    let raw_arc: Arc<[u8]> = Arc::from(raw_bytes);
+
+                    let t_parse_start = std::time::Instant::now();
+                    let parse_result =
+                        parse_envelope_and_settle(Arc::clone(&raw_arc), primary_spec)
+                            .await
+                            .map_err(FetchError::Permanent)?;
+                    let parse_us = t_parse_start.elapsed().as_micros() as u64;
+
+                    FETCH_TIMINGS.record(request_us, body_us, parse_us, bytes_len);
+
+                    let (manifest, primary_settle) = parse_result;
+                    Ok(FetchWithSettleResult::Ok(FetchWithSettle {
+                        manifest,
+                        etag: new_etag,
+                        primary_settle,
+                    }))
+                } else {
+                    Err(classify_status(status, &url))
+                }
+            }
+        },
+        is_retryable,
+    )
+    .await
+    .map_err(|e| match e {
+        FetchError::Retryable(e) | FetchError::Permanent(e) => {
+            anyhow!("Failed to fetch {}: {:#}", opts.name, e)
+        }
+    })
+}
+
+/// Off-runtime combined parse: `simd_json::to_borrowed_value` once,
+/// extract envelope into [`FullManifest`] + resolve `primary_spec`
+/// against the parsed `versions` keys + materialize the resolved
+/// version's subtree into [`CoreVersionManifest`].
+///
+/// Constructs `FullManifest` manually rather than via typed serde so
+/// the work is exactly one parse pass. Other `FullManifest` fields
+/// (`description`, `time`, `maintainers`, etc.) stay at `Default`
+/// values — none are read on the resolver hot path.
+async fn parse_envelope_and_settle(
+    raw: Arc<[u8]>,
+    primary_spec: String,
+) -> Result<(FullManifest, Option<PrimarySettleResult>)> {
+    #[cfg(not(target_arch = "wasm32"))]
+    {
+        tokio::task::spawn_blocking(move || parse_envelope_and_settle_sync(raw, &primary_spec))
+            .await
+            .map_err(|e| anyhow!("spawn_blocking parse panicked: {e}"))?
+    }
+    #[cfg(target_arch = "wasm32")]
+    {
+        parse_envelope_and_settle_sync(raw, &primary_spec)
+    }
+}
+
+fn parse_envelope_and_settle_sync(
+    raw: Arc<[u8]>,
+    primary_spec: &str,
+) -> Result<(FullManifest, Option<PrimarySettleResult>)> {
+    use simd_json::prelude::{ValueAsScalar, ValueObjectAccess};
+
+    let mut buf = (*raw).to_vec();
+    let parsed =
+        simd_json::to_borrowed_value(&mut buf).map_err(|e| anyhow!("JSON parse error: {e}"))?;
+
+    let name = parsed
+        .get("name")
+        .and_then(|v| v.as_str())
+        .map(|s| s.to_string())
+        .unwrap_or_default();
+
+    let dist_tags: HashMap<String, String> = parsed
+        .get("dist-tags")
+        .and_then(|v| HashMap::<String, String>::deserialize(v).ok())
+        .unwrap_or_default();
+
+    let versions_keys: Vec<String> = parsed
+        .get("versions")
+        .and_then(simd_json::prelude::ValueAsObject::as_object)
+        .map(|obj| obj.keys().map(|k| k.to_string()).collect())
+        .unwrap_or_default();
+
+    let manifest = FullManifest {
+        name,
+        dist_tags: dist_tags.clone(),
+        versions: versions_keys,
+        raw,
+        ..Default::default()
+    };
+
+    // Resolve spec against the just-extracted envelope.
+    let primary_settle = match resolve_target_version((&manifest).into(), primary_spec) {
+        Ok(resolved) => parsed
+            .get("versions")
+            .and_then(|v| v.get(resolved.as_str()))
+            .and_then(|version_obj| CoreVersionManifest::deserialize(version_obj).ok())
+            .map(|core| (resolved, Arc::new(core))),
+        Err(_) => None,
+    };
+
+    Ok((manifest, primary_settle))
+}
+
 /// Fetch full manifest without ETag / 304 support.
 ///
 /// Convenience wrapper around [`fetch_full_manifest`] for callers that never
@@ -190,6 +428,7 @@ pub async fn fetch_version_manifest(
         || {
             let url = url.clone();
             async move {
+                let t_request_start = std::time::Instant::now();
                 let response = get_client()
                     .map_err(FetchError::Permanent)?
                     .get(&url)
@@ -197,16 +436,26 @@ pub async fn fetch_version_manifest(
                     .send()
                     .await
                     .map_err(classify_reqwest_error)?;
+                let request_us = t_request_start.elapsed().as_micros() as u64;
 
                 if response.status().is_success() {
+                    let t_body_start = std::time::Instant::now();
                     let bytes = response
                         .bytes()
                         .await
                         .map_err(|e| FetchError::Permanent(anyhow!("Response read error: {e}")))?
                         .to_vec();
-                    parse_json_off_runtime::<CoreVersionManifest>(bytes)
+                    let body_us = t_body_start.elapsed().as_micros() as u64;
+                    let bytes_len = bytes.len() as u64;
+                    let t_parse_start = std::time::Instant::now();
+                    let parsed = parse_json_off_runtime::<CoreVersionManifest>(bytes)
                         .await
-                        .map_err(FetchError::Permanent)
+                        .map_err(FetchError::Permanent);
+                    let parse_us = t_parse_start.elapsed().as_micros() as u64;
+                    if parsed.is_ok() {
+                        FETCH_TIMINGS.record(request_us, body_us, parse_us, bytes_len);
+                    }
+                    parsed
                 } else {
                     Err(classify_status(response.status(), &url))
                 }
diff --git a/crates/ruborist/src/service/mod.rs b/crates/ruborist/src/service/mod.rs
index 13109e994..5adb6bf0b 100644
--- a/crates/ruborist/src/service/mod.rs
+++ b/crates/ruborist/src/service/mod.rs
@@ -60,8 +60,9 @@ pub use cache::{
 pub use fs::{Glob, NoopGlob, exists, read_to_string};
 pub use http::client_builder;
 pub use manifest::{
-    FetchManifestOptions, FetchManifestResult, FetchVersionManifestOptions, MetadataFormat,
-    fetch_full_manifest, fetch_full_manifest_fresh, fetch_version_manifest,
+    FetchManifestOptions, FetchManifestResult, FetchVersionManifestOptions, FetchWithSettle,
+    FetchWithSettleResult, MetadataFormat, fetch_full_manifest, fetch_full_manifest_fresh,
+    fetch_full_manifest_with_settle, fetch_version_manifest,
 };
 pub use registry::UnifiedRegistry;
 pub use store::{ManifestStore, NoopStore};
diff --git a/crates/ruborist/src/util/mod.rs b/crates/ruborist/src/util/mod.rs
index 649e47c95..a7f0b7b7d 100644
--- a/crates/ruborist/src/util/mod.rs
+++ b/crates/ruborist/src/util/mod.rs
@@ -1,6 +1,8 @@
 //! Shared utility primitives for ruborist and downstream consumers.
 
 pub mod oncemap;
+pub mod timing;
 
 pub use crate::model::util::{PackageNameStr, parse_package_spec, read_package_json};
 pub use oncemap::OnceMap;
+pub use timing::{FETCH_TIMINGS, FetchTimings, FetchTimingsSnapshot};
diff --git a/crates/ruborist/src/util/timing.rs b/crates/ruborist/src/util/timing.rs
new file mode 100644
index 000000000..f50e921b9
--- /dev/null
+++ b/crates/ruborist/src/util/timing.rs
@@ -0,0 +1,134 @@
+//! Per-phase manifest fetch timing accumulator for p1 perf investigation.
+//!
+//! Splits each `fetch_*_manifest` call into three observable pieces:
+//!   - `request_us`: from `request.send().await` to response headers
+//!     received. Captures TCP connect (when not pooled), TLS handshake,
+//!     HTTP request roundtrip, and server-side processing.
+//!   - `body_us`: from response headers to the entire JSON body buffered.
+//!     Network-bandwidth bound for large packuments.
+//!   - `parse_us`: from full body buffered to a typed manifest. CPU bound
+//!     (simd_json on a spawn_blocking thread).
+//!
+//! `parse_us` is wall-clock for the await on `parse_json_off_runtime` —
+//! since JSON parse runs on `spawn_blocking`, this includes scheduling
+//! latency rather than pure CPU time. Together with the per-fetch total
+//! already tracked in `preload_manifests`, this lets us answer "where
+//! did p1's wall time go?" without external profiling.
+//!
+//! All counters are `AtomicU64` so the recording path is lock-free.
+//! Numbers are reset between resolves via [`reset()`] so successive
+//! `utoo deps` invocations report independently.
+
+use std::sync::atomic::{AtomicU64, Ordering};
+
+/// Per-process accumulator for manifest fetch timings.
+#[derive(Default, Debug)]
+pub struct FetchTimings {
+    /// Number of fetches recorded (full + version manifest).
+    pub count: AtomicU64,
+    /// Sum of microseconds spent in `request.send().await`.
+    pub request_us: AtomicU64,
+    /// Sum of microseconds spent in `response.bytes().await`.
+    pub body_us: AtomicU64,
+    /// Sum of microseconds spent awaiting `parse_json_off_runtime`.
+    pub parse_us: AtomicU64,
+    /// Sum of body bytes received across all fetches.
+    pub bytes: AtomicU64,
+}
+
+impl FetchTimings {
+    /// Record one fetch's split timings. Call once per successful fetch.
+    pub fn record(&self, request_us: u64, body_us: u64, parse_us: u64, bytes: u64) {
+        self.count.fetch_add(1, Ordering::Relaxed);
+        self.request_us.fetch_add(request_us, Ordering::Relaxed);
+        self.body_us.fetch_add(body_us, Ordering::Relaxed);
+        self.parse_us.fetch_add(parse_us, Ordering::Relaxed);
+        self.bytes.fetch_add(bytes, Ordering::Relaxed);
+    }
+
+    /// Reset all counters to zero.
+    pub fn reset(&self) {
+        self.count.store(0, Ordering::Relaxed);
+        self.request_us.store(0, Ordering::Relaxed);
+        self.body_us.store(0, Ordering::Relaxed);
+        self.parse_us.store(0, Ordering::Relaxed);
+        self.bytes.store(0, Ordering::Relaxed);
+    }
+
+    /// Snapshot of the current accumulator state.
+    pub fn snapshot(&self) -> FetchTimingsSnapshot {
+        FetchTimingsSnapshot {
+            count: self.count.load(Ordering::Relaxed),
+            request_us: self.request_us.load(Ordering::Relaxed),
+            body_us: self.body_us.load(Ordering::Relaxed),
+            parse_us: self.parse_us.load(Ordering::Relaxed),
+            bytes: self.bytes.load(Ordering::Relaxed),
+        }
+    }
+}
+
+/// Immutable snapshot suitable for printing.
+#[derive(Debug, Clone, Copy)]
+pub struct FetchTimingsSnapshot {
+    pub count: u64,
+    pub request_us: u64,
+    pub body_us: u64,
+    pub parse_us: u64,
+    pub bytes: u64,
+}
+
+impl FetchTimingsSnapshot {
+    /// One-line summary for tracing logs.
+    pub fn summary_line(&self) -> String {
+        if self.count == 0 {
+            return "fetch-timings: no requests recorded".to_string();
+        }
+        let count = self.count;
+        let avg_req = self.request_us / count;
+        let avg_body = self.body_us / count;
+        let avg_parse = self.parse_us / count;
+        let avg_bytes = self.bytes / count;
+        format!(
+            "fetch-timings: n={} sum_request={}ms sum_body={}ms sum_parse={}ms total_bytes={}MB | avg_request={}us avg_body={}us avg_parse={}us avg_bytes={}KB",
+            count,
+            self.request_us / 1_000,
+            self.body_us / 1_000,
+            self.parse_us / 1_000,
+            self.bytes / 1_000_000,
+            avg_req,
+            avg_body,
+            avg_parse,
+            avg_bytes / 1_024,
+        )
+    }
+}
+
+/// Process-wide manifest fetch timing accumulator.
+pub static FETCH_TIMINGS: FetchTimings = FetchTimings {
+    count: AtomicU64::new(0),
+    request_us: AtomicU64::new(0),
+    body_us: AtomicU64::new(0),
+    parse_us: AtomicU64::new(0),
+    bytes: AtomicU64::new(0),
+};
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn record_and_snapshot() {
+        FETCH_TIMINGS.reset();
+        FETCH_TIMINGS.record(100, 200, 300, 1024);
+        FETCH_TIMINGS.record(150, 250, 350, 2048);
+        let snap = FETCH_TIMINGS.snapshot();
+        assert_eq!(snap.count, 2);
+        assert_eq!(snap.request_us, 250);
+        assert_eq!(snap.body_us, 450);
+        assert_eq!(snap.parse_us, 650);
+        assert_eq!(snap.bytes, 3072);
+        FETCH_TIMINGS.reset();
+        let snap2 = FETCH_TIMINGS.snapshot();
+        assert_eq!(snap2.count, 0);
+    }
+}