From cb7dedc8a06ffae2facd53758ed38768f3249c1c Mon Sep 17 00:00:00 2001 From: Santiago Date: Sun, 19 Apr 2026 08:30:48 -0300 Subject: [PATCH] test(cardano): switch epoch test fixtures to use OCI images --- .github/workflows/epoch-tests.yml | 102 +++++++------- Cargo.lock | 3 + crates/testing/src/harness/cardano.rs | 16 ++- tests/epoch_pots/README.md | 36 +++++ xtask/Cargo.toml | 3 + xtask/src/config.rs | 18 +++ xtask/src/fixture/mod.rs | 109 +++++++++++++++ xtask/src/fixture/oras.rs | 84 ++++++++++++ xtask/src/fixture/pack.rs | 56 ++++++++ xtask/src/fixture/pull.rs | 173 ++++++++++++++++++++++++ xtask/src/fixture/push.rs | 187 ++++++++++++++++++++++++++ xtask/src/main.rs | 6 + xtask/tests/fixture_pack.rs | 85 ++++++++++++ 13 files changed, 821 insertions(+), 57 deletions(-) create mode 100644 xtask/src/fixture/mod.rs create mode 100644 xtask/src/fixture/oras.rs create mode 100644 xtask/src/fixture/pack.rs create mode 100644 xtask/src/fixture/pull.rs create mode 100644 xtask/src/fixture/push.rs create mode 100644 xtask/tests/fixture_pack.rs diff --git a/.github/workflows/epoch-tests.yml b/.github/workflows/epoch-tests.yml index b5fd8db38..d62a106ff 100644 --- a/.github/workflows/epoch-tests.yml +++ b/.github/workflows/epoch-tests.yml @@ -6,52 +6,49 @@ on: jobs: epoch-test: runs-on: ubuntu-latest + permissions: + contents: read + packages: read strategy: fail-fast: false max-parallel: 1 matrix: include: - # mainnet: seed 200, upstream 200-400 - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 242, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 243, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 245, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 250, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 278, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 279, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 280, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 285, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 286, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 287, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 288, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 289, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 290, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 300, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 325, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 326, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 350, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 352, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 375, upstream_range: "200-400" } - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 400, upstream_range: "200-400" } - # mainnet: seed 200, upstream 200-500 - - { network: mainnet, seed_epoch: 200, ground_truth_epoch: 500, upstream_range: "200-500" } - # preview: seed 500, upstream 500-700 - - { network: preview, seed_epoch: 500, ground_truth_epoch: 550, upstream_range: "500-700" } - - { network: preview, seed_epoch: 500, ground_truth_epoch: 649, upstream_range: "500-700" } - - { network: preview, seed_epoch: 500, ground_truth_epoch: 700, upstream_range: "500-700" } + - { network: mainnet, epoch: 242 } + - { network: mainnet, epoch: 243 } + - { network: mainnet, epoch: 245 } + - { network: mainnet, epoch: 250 } + - { network: mainnet, epoch: 278 } + - { network: mainnet, epoch: 279 } + - { network: mainnet, epoch: 280 } + - { network: mainnet, epoch: 285 } + - { network: mainnet, epoch: 286 } + - { network: mainnet, epoch: 287 } + - { network: mainnet, epoch: 288 } + - { network: mainnet, epoch: 289 } + - { network: mainnet, epoch: 290 } + - { network: mainnet, epoch: 300 } + - { network: mainnet, epoch: 325 } + - { network: mainnet, epoch: 326 } + - { network: mainnet, epoch: 350 } + - { network: mainnet, epoch: 352 } + - { network: mainnet, epoch: 375 } + - { network: mainnet, epoch: 400 } + - { network: mainnet, epoch: 500 } + - { network: preview, epoch: 100 } + - { network: preview, epoch: 550 } + - { network: preview, epoch: 649 } + - { network: preview, epoch: 700 } - name: "${{ matrix.network }}-${{ matrix.ground_truth_epoch }}" + name: "${{ matrix.network }}-${{ matrix.epoch }}" + + env: + DOLOS_FIXTURE_DIR: ${{ github.workspace }}/fixtures steps: - name: Checkout uses: actions/checkout@v4 - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v6 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ vars.AWS_REGION }} - - name: Install Rust toolchain uses: dtolnay/rust-toolchain@stable @@ -66,32 +63,27 @@ jobs: restore-keys: | ${{ runner.os }}-cargo-epoch-tests- - - name: Download seed - run: | - mkdir -p /tmp/fixtures/seeds - aws s3 cp "s3://${{ vars.S3_BUCKET }}/seeds/${{ matrix.network }}-${{ matrix.seed_epoch }}.tar" /tmp/seed.tar - tar xf /tmp/seed.tar -C /tmp/fixtures/seeds - rm /tmp/seed.tar + - name: Install oras + uses: oras-project/setup-oras@v1 + with: + version: 1.2.0 - - name: Download ground truth + - name: Log in to GHCR + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - mkdir -p /tmp/fixtures/ground-truth - aws s3 cp "s3://${{ vars.S3_BUCKET }}/ground-truth/${{ matrix.network }}-${{ matrix.ground_truth_epoch }}.tar" /tmp/ground-truth.tar - tar xf /tmp/ground-truth.tar -C /tmp/fixtures/ground-truth - rm /tmp/ground-truth.tar + export DOCKER_CONFIG="$(mktemp -d)" + echo "DOCKER_CONFIG=$DOCKER_CONFIG" >> "$GITHUB_ENV" + echo "$GITHUB_TOKEN" | oras login ghcr.io -u "${{ github.actor }}" --password-stdin - - name: Download upstream + - name: Pull test fixture run: | - mkdir -p /tmp/fixtures/upstream - aws s3 cp "s3://${{ vars.S3_BUCKET }}/upstream/${{ matrix.network }}-${{ matrix.upstream_range }}.tar" /tmp/upstream.tar - tar xf /tmp/upstream.tar -C /tmp/fixtures/upstream - rm /tmp/upstream.tar + mkdir -p "$DOLOS_FIXTURE_DIR" + cargo xtask fixture pull --network ${{ matrix.network }} --epoch ${{ matrix.epoch }} - name: Run epoch test - env: - DOLOS_FIXTURE_DIR: /tmp/fixtures - run: cargo test --test epoch_pots -- --nocapture + run: cargo test --test epoch_pots --release -- --nocapture - name: Cleanup if: always() - run: rm -rf /tmp/fixtures + run: rm -rf "$DOLOS_FIXTURE_DIR" diff --git a/Cargo.lock b/Cargo.lock index 27606569b..c72344a36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6587,8 +6587,11 @@ dependencies = [ "postgres-native-tls", "serde", "serde_json", + "tar", + "tempfile", "toml 0.8.20", "xshell", + "zstd", ] [[package]] diff --git a/crates/testing/src/harness/cardano.rs b/crates/testing/src/harness/cardano.rs index b5b7ceb75..e0ccdd194 100644 --- a/crates/testing/src/harness/cardano.rs +++ b/crates/testing/src/harness/cardano.rs @@ -230,8 +230,20 @@ impl LedgerHarness { genesis, }; - // 5. Bootstrap (integrity check + drain pending work) - domain.bootstrap()?; + // 5. Seed WAL from state cursor so the integrity check accepts + // seed-based runs. Mirrors CLI's seed_wal_from_state (#950). + if let Some(cursor) = domain.state.read_cursor()? { + if cursor.is_fully_defined() { + domain.wal.reset_to(&cursor)?; + } + } + + // 6. Integrity check only. We skip the rest of `bootstrap()` + // (catch_up_stores + drain_pending_work) because the harness + // uses NoOp archive/index stores, and catch_up_indexes would + // try to decode the synthetic `LogValue::origin()` block + // produced by `reset_to` and fail. + domain.check_integrity()?; Ok(Self { domain, diff --git a/tests/epoch_pots/README.md b/tests/epoch_pots/README.md index 0a9b98f2a..a65311fba 100644 --- a/tests/epoch_pots/README.md +++ b/tests/epoch_pots/README.md @@ -68,3 +68,39 @@ cargo test --test epoch_pots -- --nocapture ``` Set `EPOCH_POTS_KEEP_DIR=1` to preserve the temporary working directories for debugging. + +## Pulling Fixtures From GHCR + +Fixtures are published as OCI artifacts on the GitHub Container Registry, one +tag per test: `ghcr.io/txpipe/dolos-fixtures:{network}-{subject_epoch}`. Each +tag bundles the seed, ground-truth, and upstream-fragment layers a single test +needs. + +```bash +# one-time: install oras (see https://oras.land/docs/installation) +# one-time: log in to GHCR +echo "$GH_PAT" | oras login ghcr.io -u --password-stdin + +# pull everything preview-100 needs +cargo xtask fixture pull --network preview --epoch 100 + +# run the test +cargo test --test epoch_pots --release -- --nocapture +``` + +`cargo xtask fixture pull` extracts the three layers into +`$DOLOS_FIXTURE_DIR/{seeds,ground-truth,upstream}//` (the same layout the +test harness discovers). `DOLOS_FIXTURE_DIR` defaults to the `fixtures.local_dir` +value in `xtask.toml` but can be overridden by exporting the env var. + +To publish a new fixture bundle: arrange the three source directories under +`$DOLOS_FIXTURE_DIR` (seed under `seeds/-/`, ground-truth +under `ground-truth/-/`, upstream fragment under +`upstream/--/`), then: + +```bash +cargo xtask fixture push \ + --network --epoch \ + --seed-epoch \ + --upstream-start --upstream-end +``` diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml index 3bdfdaa3c..9e1a65556 100644 --- a/xtask/Cargo.toml +++ b/xtask/Cargo.toml @@ -23,3 +23,6 @@ postgres = "0.19" postgres-native-tls = "0.5" bech32 = { workspace = true } pallas = { workspace = true } +tar = "0.4" +zstd = "0.13" +tempfile = "3.20.0" diff --git a/xtask/src/config.rs b/xtask/src/config.rs index addfdd26a..2dc8d3f68 100644 --- a/xtask/src/config.rs +++ b/xtask/src/config.rs @@ -32,6 +32,8 @@ pub struct XtaskConfig { pub dbsync: DbSyncConfig, #[serde(default)] pub seeds: SeedConfig, + #[serde(default)] + pub fixtures: FixtureConfig, } impl Default for XtaskConfig { @@ -41,6 +43,7 @@ impl Default for XtaskConfig { snapshots: SnapshotConfig::default(), dbsync: DbSyncConfig::default(), seeds: SeedConfig::default(), + fixtures: FixtureConfig::default(), } } } @@ -95,6 +98,21 @@ impl SeedConfig { } } +/// Fixture packaging / registry configuration. +/// +/// `local_dir` is the base under which fixtures live on disk, with subdirs +/// `seeds/`, `ground-truth/`, `upstream/`. This is also where fixture pulls +/// extract to and where pushes read from — i.e. the path the test harness +/// uses as `DOLOS_FIXTURE_DIR`. +/// +/// `registry` is the OCI registry + repo reference (without a tag) used for +/// pushing and pulling test-fixture artifacts. +#[derive(Debug, Deserialize, Default)] +pub struct FixtureConfig { + pub local_dir: Option, + pub registry: Option, +} + /// DBSync connection URLs per network. #[derive(Debug, Deserialize, Default)] pub struct DbSyncConfig { diff --git a/xtask/src/fixture/mod.rs b/xtask/src/fixture/mod.rs new file mode 100644 index 000000000..3e11f16b6 --- /dev/null +++ b/xtask/src/fixture/mod.rs @@ -0,0 +1,109 @@ +//! fixture subcommands. +//! +//! Push and pull test-fixture bundles to/from an OCI registry (GHCR by default). +//! Each tag `{network}-{subject_epoch}` is an OCI artifact with three layers +//! (seed, ground-truth, upstream fragment) stitched together — one pull per +//! test. + +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use clap::Subcommand; +use xshell::Shell; + +use crate::config::XtaskConfig; +use crate::util::resolve_path; + +pub mod oras; +pub mod pack; +pub mod pull; +pub mod push; + +/// Artifact type for a test-fixture manifest. +pub const ARTIFACT_TYPE: &str = "application/vnd.txpipe.dolos.test-fixture.v1"; + +/// Layer media type for seed fixtures (fjall state dir). +pub const SEED_MEDIA_TYPE: &str = "application/vnd.txpipe.dolos.fixture.seed.v1.tar+zstd"; + +/// Layer media type for ground-truth fixtures (CSV bundle). +pub const GROUND_TRUTH_MEDIA_TYPE: &str = + "application/vnd.txpipe.dolos.fixture.ground-truth.v1.tar+zstd"; + +/// Layer media type for upstream fragments (Mithril immutable chunks). +pub const UPSTREAM_MEDIA_TYPE: &str = "application/vnd.txpipe.dolos.fixture.upstream.v1.tar+zstd"; + +/// Current fixture schema version. Bump if the on-disk layout changes. +pub const SCHEMA_VERSION: &str = "1"; + +// Manifest-level annotation keys. +pub const ANN_SCHEMA_VERSION: &str = "io.txpipe.dolos.schema-version"; +pub const ANN_NETWORK: &str = "io.txpipe.dolos.network"; +pub const ANN_SUBJECT_EPOCH: &str = "io.txpipe.dolos.subject-epoch"; +pub const ANN_SEED_EPOCH: &str = "io.txpipe.dolos.seed-epoch"; +pub const ANN_UPSTREAM_START: &str = "io.txpipe.dolos.upstream-start"; +pub const ANN_UPSTREAM_END: &str = "io.txpipe.dolos.upstream-end"; + +/// Layer filenames — also used as `org.opencontainers.image.title` annotations, +/// which is how `oras pull` decides the output filename for each blob. +pub const SEED_LAYER_FILE: &str = "seed.tar.zst"; +pub const GROUND_TRUTH_LAYER_FILE: &str = "ground-truth.tar.zst"; +pub const UPSTREAM_LAYER_FILE: &str = "upstream.tar.zst"; + +#[derive(Debug, Subcommand)] +pub enum FixtureCmd { + /// Pack and push a test-fixture bundle to the configured registry. + Push(push::PushArgs), + + /// Pull a test-fixture bundle from the registry and extract it locally. + Pull(pull::PullArgs), +} + +pub fn run(sh: &Shell, cmd: FixtureCmd) -> Result<()> { + match cmd { + FixtureCmd::Push(args) => push::run(sh, &args), + FixtureCmd::Pull(args) => pull::run(sh, &args), + } +} + +/// Build the full registry reference `{registry}:{network}-{subject_epoch}`. +pub fn build_ref(registry: &str, network: &str, subject_epoch: u64) -> String { + format!("{registry}:{network}-{subject_epoch}") +} + +/// On-disk directory name for a seed fixture: `{network}-{epoch}`. +pub fn seed_key(network: &str, epoch: u64) -> String { + format!("{network}-{epoch}") +} + +/// On-disk directory name for a ground-truth fixture: `{network}-{epoch}`. +pub fn ground_truth_key(network: &str, epoch: u64) -> String { + format!("{network}-{epoch}") +} + +/// On-disk directory name for an upstream fragment: `{network}-{start}-{end}`. +pub fn upstream_key(network: &str, start: u64, end: u64) -> String { + format!("{network}-{start}-{end}") +} + +/// Resolve the base directory where fixtures are read from / extracted to. +/// +/// Precedence: `$DOLOS_FIXTURE_DIR` env var (matches what the test reads) → +/// `xtask.toml` `fixtures.local_dir` key. The env override is how CI points +/// at a runner-local path without touching the repo's `xtask.toml`. +pub fn resolve_local_dir(repo_root: &Path, config: &XtaskConfig) -> Result { + if let Ok(v) = std::env::var("DOLOS_FIXTURE_DIR") { + if !v.is_empty() { + return Ok(PathBuf::from(v)); + } + } + + config + .fixtures + .local_dir + .as_ref() + .map(|p| resolve_path(repo_root, p)) + .context( + "fixtures.local_dir not configured (set `[fixtures] local_dir` in xtask.toml \ + or export DOLOS_FIXTURE_DIR)", + ) +} diff --git a/xtask/src/fixture/oras.rs b/xtask/src/fixture/oras.rs new file mode 100644 index 000000000..2a9d3c2c7 --- /dev/null +++ b/xtask/src/fixture/oras.rs @@ -0,0 +1,84 @@ +//! Thin wrappers around the `oras` CLI. +//! +//! We shell out via xshell rather than depending on a Rust OCI library to +//! keep xtask light and match the rest of this crate's pattern (which already +//! shells out to `dolos`, `psql`, etc.). + +use std::path::Path; + +use anyhow::{Context, Result}; +use serde_json::Value; +use xshell::{cmd, Shell}; + +/// A single layer to push: local file + OCI media type. +pub struct Layer<'a> { + pub file: &'a Path, + pub media_type: &'a str, +} + +/// Push a multi-layer artifact to `reference` (e.g. `ghcr.io/foo/bar:tag`). +/// +/// `artifact_type` sets the manifest's `artifactType`; `annotations` are +/// manifest-level annotations applied via `--annotation`. +pub fn push( + sh: &Shell, + reference: &str, + artifact_type: &str, + annotations: &[(String, String)], + layers: &[Layer], +) -> Result<()> { + let mut ann_args: Vec = Vec::with_capacity(annotations.len() * 2); + for (k, v) in annotations { + ann_args.push("--annotation".to_string()); + ann_args.push(format!("{k}={v}")); + } + + let mut file_args: Vec = Vec::with_capacity(layers.len()); + for layer in layers { + file_args.push(format!("{}:{}", layer.file.display(), layer.media_type)); + } + + cmd!( + sh, + "oras push {reference} --artifact-type {artifact_type} {ann_args...} {file_args...}" + ) + .run() + .with_context(|| format!("oras push {reference}"))?; + + Ok(()) +} + +/// Pull `reference` into `output_dir`. Blob filenames come from the layer +/// `org.opencontainers.image.title` annotations, which oras auto-populates on +/// push based on the local filename of each layer file. +pub fn pull(sh: &Shell, reference: &str, output_dir: &Path) -> Result<()> { + cmd!(sh, "oras pull {reference} --output {output_dir}") + .run() + .with_context(|| format!("oras pull {reference}"))?; + Ok(()) +} + +/// Fetch the manifest JSON for `reference`. +pub fn manifest_fetch(sh: &Shell, reference: &str) -> Result { + let out = cmd!(sh, "oras manifest fetch {reference}") + .read() + .with_context(|| format!("oras manifest fetch {reference}"))?; + + let parsed: Value = serde_json::from_str(&out) + .with_context(|| format!("parsing manifest JSON for {reference}"))?; + + Ok(parsed) +} + +/// Log in to a registry. Reads the token from stdin. +#[allow(dead_code)] +pub fn login(sh: &Shell, registry: &str, username: &str, token: &str) -> Result<()> { + cmd!( + sh, + "oras login {registry} -u {username} --password-stdin" + ) + .stdin(token) + .run() + .with_context(|| format!("oras login {registry}"))?; + Ok(()) +} diff --git a/xtask/src/fixture/pack.rs b/xtask/src/fixture/pack.rs new file mode 100644 index 000000000..13b411ba1 --- /dev/null +++ b/xtask/src/fixture/pack.rs @@ -0,0 +1,56 @@ +//! tar + zstd helpers for fixture archives. + +use std::fs::File; +use std::path::Path; + +use anyhow::{Context, Result}; + +/// Pack the *contents* of `src_dir` into a tar.zst at `out_path`. +/// +/// The archive has no leading `src_dir` path component — entries are relative +/// to `src_dir`. Extraction therefore writes straight into whatever directory +/// the caller chooses, with no kind/epoch prefix embedded in the archive. +pub fn pack_tar_zst(src_dir: &Path, out_path: &Path, level: i32) -> Result<()> { + let out_file = + File::create(out_path).with_context(|| format!("creating {}", out_path.display()))?; + + let zstd_writer = zstd::Encoder::new(out_file, level) + .context("creating zstd encoder")? + .auto_finish(); + + let mut tar_builder = tar::Builder::new(zstd_writer); + // Follow symlinks so the archive contains the real content rather than a + // symlink entry pointing at a path that won't exist on the consumer side. + tar_builder.follow_symlinks(true); + + tar_builder + .append_dir_all(".", src_dir) + .with_context(|| format!("adding contents of {} to tar", src_dir.display()))?; + + tar_builder.finish().context("finishing tar")?; + + Ok(()) +} + +/// Stream-extract a tar.zst archive at `src_path` into `dst_dir`. +/// +/// `dst_dir` is created if it does not exist. Existing contents are not +/// cleared — the caller is expected to extract into a fresh temp dir and +/// rename into place to get atomicity. +pub fn extract_tar_zst(src_path: &Path, dst_dir: &Path) -> Result<()> { + std::fs::create_dir_all(dst_dir) + .with_context(|| format!("creating {}", dst_dir.display()))?; + + let src_file = + File::open(src_path).with_context(|| format!("opening {}", src_path.display()))?; + + let zstd_reader = zstd::Decoder::new(src_file).context("creating zstd decoder")?; + + let mut tar_archive = tar::Archive::new(zstd_reader); + tar_archive.set_preserve_permissions(true); + tar_archive + .unpack(dst_dir) + .with_context(|| format!("extracting into {}", dst_dir.display()))?; + + Ok(()) +} diff --git a/xtask/src/fixture/pull.rs b/xtask/src/fixture/pull.rs new file mode 100644 index 000000000..a09165a19 --- /dev/null +++ b/xtask/src/fixture/pull.rs @@ -0,0 +1,173 @@ +//! `fixture pull` — pull a test-fixture bundle and extract layers locally. + +use std::path::{Path, PathBuf}; + +use anyhow::{bail, Context, Result}; +use clap::Args; +use serde_json::Value; +use xshell::Shell; + +use crate::config::{load_xtask_config, Network}; +use crate::fixture::{ + build_ref, ground_truth_key, oras, pack, resolve_local_dir, seed_key, upstream_key, + ANN_SEED_EPOCH, ANN_UPSTREAM_END, ANN_UPSTREAM_START, GROUND_TRUTH_LAYER_FILE, + SEED_LAYER_FILE, UPSTREAM_LAYER_FILE, +}; + +#[derive(Debug, Args)] +pub struct PullArgs { + /// Target network + #[arg(long, value_enum)] + pub network: Network, + + /// Subject epoch identifying the test-fixture tag + #[arg(long)] + pub epoch: u64, + + /// Overwrite existing local fixture dirs for this test + #[arg(long, action)] + pub force: bool, + + /// Resolve the manifest but skip the pull/extract + #[arg(long, action)] + pub dry_run: bool, +} + +pub fn run(sh: &Shell, args: &PullArgs) -> Result<()> { + let repo_root = std::env::current_dir().context("detecting repo root")?; + let xtask_config = load_xtask_config(&repo_root)?; + + let local_dir = resolve_local_dir(&repo_root, &xtask_config)?; + + let registry = xtask_config + .fixtures + .registry + .as_deref() + .context("fixtures.registry not configured in xtask.toml")?; + + let network_str = args.network.as_str(); + let reference = build_ref(registry, network_str, args.epoch); + + println!("Pulling {reference}"); + + if args.dry_run { + println!("(dry-run, not calling oras manifest fetch or pull)"); + return Ok(()); + } + + let manifest = oras::manifest_fetch(sh, &reference)?; + + let seed_epoch = read_u64_annotation(&manifest, ANN_SEED_EPOCH)?; + let upstream_start = read_u64_annotation(&manifest, ANN_UPSTREAM_START)?; + let upstream_end = read_u64_annotation(&manifest, ANN_UPSTREAM_END)?; + + let seed_dest = local_dir + .join("seeds") + .join(seed_key(network_str, seed_epoch)); + let gt_dest = local_dir + .join("ground-truth") + .join(ground_truth_key(network_str, args.epoch)); + let upstream_dest = local_dir + .join("upstream") + .join(upstream_key(network_str, upstream_start, upstream_end)); + + println!(" seed ({seed_epoch}): {}", seed_dest.display()); + println!(" ground-truth ({}): {}", args.epoch, gt_dest.display()); + println!( + " upstream ({upstream_start}-{upstream_end}): {}", + upstream_dest.display() + ); + + if !args.force { + let all_exist = seed_dest.exists() && gt_dest.exists() && upstream_dest.exists(); + if all_exist { + println!("All destinations already exist; skipping (pass --force to overwrite)"); + return Ok(()); + } + } + + std::fs::create_dir_all(local_dir.join("seeds"))?; + std::fs::create_dir_all(local_dir.join("ground-truth"))?; + std::fs::create_dir_all(local_dir.join("upstream"))?; + + let download = tempfile::Builder::new() + .prefix("dolos-fixture-pull-") + .tempdir_in(&local_dir) + .context("creating pull staging dir")?; + + oras::pull(sh, &reference, download.path())?; + + let seed_tar = download.path().join(SEED_LAYER_FILE); + let gt_tar = download.path().join(GROUND_TRUTH_LAYER_FILE); + let upstream_tar = download.path().join(UPSTREAM_LAYER_FILE); + + for (label, tar, dest) in [ + ("seed", &seed_tar, &seed_dest), + ("ground-truth", >_tar, >_dest), + ("upstream", &upstream_tar, &upstream_dest), + ] { + if !tar.exists() { + bail!( + "{label} layer not found in pulled artifact (expected {})", + tar.display() + ); + } + extract_atomic(tar, dest, args.force) + .with_context(|| format!("extracting {label} to {}", dest.display()))?; + } + + println!("Pull complete"); + Ok(()) +} + +fn read_u64_annotation(manifest: &Value, key: &str) -> Result { + let value = manifest + .get("annotations") + .and_then(|a| a.get(key)) + .and_then(|v| v.as_str()) + .with_context(|| format!("manifest annotation `{key}` missing or not a string"))?; + + value + .parse::() + .with_context(|| format!("parsing annotation `{key}` (value: {value:?})")) +} + +/// Extract `tar_path` into `dest` by unpacking into a sibling temp dir and +/// renaming atomically. If `dest` exists, it is removed first (only when +/// `force` is set — caller has already vetted the overwrite policy). +fn extract_atomic(tar_path: &Path, dest: &Path, force: bool) -> Result<()> { + let parent = dest + .parent() + .context("fixture destination has no parent dir")?; + + std::fs::create_dir_all(parent) + .with_context(|| format!("creating parent {}", parent.display()))?; + + let staged = tempfile::Builder::new() + .prefix(".dolos-fixture-extract-") + .tempdir_in(parent) + .with_context(|| format!("creating staging dir in {}", parent.display()))?; + + pack::extract_tar_zst(tar_path, staged.path())?; + + if dest.exists() { + if !force { + println!( + " destination exists, leaving as-is: {}", + dest.display() + ); + return Ok(()); + } + remove_dir_all_best_effort(dest)?; + } + + let staged_path: PathBuf = staged.keep(); + std::fs::rename(&staged_path, dest) + .with_context(|| format!("rename {} -> {}", staged_path.display(), dest.display()))?; + + Ok(()) +} + +fn remove_dir_all_best_effort(path: &Path) -> Result<()> { + std::fs::remove_dir_all(path).with_context(|| format!("removing {}", path.display())) +} diff --git a/xtask/src/fixture/push.rs b/xtask/src/fixture/push.rs new file mode 100644 index 000000000..0e0228952 --- /dev/null +++ b/xtask/src/fixture/push.rs @@ -0,0 +1,187 @@ +//! `fixture push` — pack and push a test-fixture bundle. + +use anyhow::{bail, Context, Result}; +use clap::Args; +use xshell::Shell; + +use crate::config::{load_xtask_config, Network}; +use crate::fixture::{ + build_ref, ground_truth_key, oras, pack, resolve_local_dir, seed_key, upstream_key, + ANN_NETWORK, ANN_SCHEMA_VERSION, ANN_SEED_EPOCH, ANN_SUBJECT_EPOCH, ANN_UPSTREAM_END, + ANN_UPSTREAM_START, ARTIFACT_TYPE, GROUND_TRUTH_LAYER_FILE, GROUND_TRUTH_MEDIA_TYPE, + SCHEMA_VERSION, SEED_LAYER_FILE, SEED_MEDIA_TYPE, UPSTREAM_LAYER_FILE, UPSTREAM_MEDIA_TYPE, +}; + +const DEFAULT_ZSTD_LEVEL: i32 = 3; + +#[derive(Debug, Args)] +pub struct PushArgs { + /// Target network + #[arg(long, value_enum)] + pub network: Network, + + /// Subject epoch for this test (used as the tag's epoch component) + #[arg(long)] + pub epoch: u64, + + /// Epoch of the seed to bundle — resolves to `seeds/{network}-{seed_epoch}/` + #[arg(long)] + pub seed_epoch: u64, + + /// Start of the upstream range to bundle + #[arg(long)] + pub upstream_start: u64, + + /// End of the upstream range to bundle + #[arg(long)] + pub upstream_end: u64, + + /// Show what would be pushed without calling oras + #[arg(long, action)] + pub dry_run: bool, +} + +pub fn run(sh: &Shell, args: &PushArgs) -> Result<()> { + let repo_root = std::env::current_dir().context("detecting repo root")?; + let xtask_config = load_xtask_config(&repo_root)?; + + let local_dir = resolve_local_dir(&repo_root, &xtask_config)?; + + let registry = xtask_config + .fixtures + .registry + .as_deref() + .context("fixtures.registry not configured in xtask.toml")?; + + let network_str = args.network.as_str(); + + let seed_dir = local_dir + .join("seeds") + .join(seed_key(network_str, args.seed_epoch)); + let gt_dir = local_dir + .join("ground-truth") + .join(ground_truth_key(network_str, args.epoch)); + let upstream_dir = local_dir.join("upstream").join(upstream_key( + network_str, + args.upstream_start, + args.upstream_end, + )); + + for (label, dir) in [ + ("seed", &seed_dir), + ("ground-truth", >_dir), + ("upstream", &upstream_dir), + ] { + if !dir.exists() { + bail!("{label} source dir not found: {}", dir.display()); + } + } + + let reference = build_ref(registry, network_str, args.epoch); + + println!("Push target: {reference}"); + println!(" seed: {}", seed_dir.display()); + println!(" ground-truth: {}", gt_dir.display()); + println!(" upstream: {}", upstream_dir.display()); + + if args.dry_run { + println!("(dry-run, not packing or pushing)"); + return Ok(()); + } + + let staging = tempfile::Builder::new() + .prefix("dolos-fixture-push-") + .tempdir() + .context("creating push staging dir")?; + + let seed_tar = staging.path().join(SEED_LAYER_FILE); + let gt_tar = staging.path().join(GROUND_TRUTH_LAYER_FILE); + let upstream_tar = staging.path().join(UPSTREAM_LAYER_FILE); + + println!("Packing seed…"); + pack::pack_tar_zst(&seed_dir, &seed_tar, DEFAULT_ZSTD_LEVEL)?; + println!(" -> {} ({})", seed_tar.display(), human_size(&seed_tar)?); + + println!("Packing ground-truth…"); + pack::pack_tar_zst(>_dir, >_tar, DEFAULT_ZSTD_LEVEL)?; + println!(" -> {} ({})", gt_tar.display(), human_size(>_tar)?); + + println!("Packing upstream…"); + pack::pack_tar_zst(&upstream_dir, &upstream_tar, DEFAULT_ZSTD_LEVEL)?; + println!( + " -> {} ({})", + upstream_tar.display(), + human_size(&upstream_tar)? + ); + + let annotations = vec![ + (ANN_SCHEMA_VERSION.to_string(), SCHEMA_VERSION.to_string()), + (ANN_NETWORK.to_string(), network_str.to_string()), + (ANN_SUBJECT_EPOCH.to_string(), args.epoch.to_string()), + (ANN_SEED_EPOCH.to_string(), args.seed_epoch.to_string()), + ( + ANN_UPSTREAM_START.to_string(), + args.upstream_start.to_string(), + ), + ( + ANN_UPSTREAM_END.to_string(), + args.upstream_end.to_string(), + ), + ]; + + let layers = [ + oras::Layer { + file: &seed_tar, + media_type: SEED_MEDIA_TYPE, + }, + oras::Layer { + file: >_tar, + media_type: GROUND_TRUTH_MEDIA_TYPE, + }, + oras::Layer { + file: &upstream_tar, + media_type: UPSTREAM_MEDIA_TYPE, + }, + ]; + + // `oras push` needs to see layer files by just their filename so that the + // auto-populated `org.opencontainers.image.title` annotation matches what + // pull expects. Run with cwd set to staging so relative paths match. + let _dir_guard = sh.push_dir(staging.path()); + + let layers_rel: Vec = layers + .iter() + .map(|l| oras::Layer { + file: std::path::Path::new(l.file.file_name().unwrap()), + media_type: l.media_type, + }) + .collect(); + + println!("Pushing to {reference}…"); + oras::push(sh, &reference, ARTIFACT_TYPE, &annotations, &layers_rel)?; + + println!("Push complete: {reference}"); + Ok(()) +} + +fn human_size(path: &std::path::Path) -> Result { + let bytes = std::fs::metadata(path) + .with_context(|| format!("stat {}", path.display()))? + .len(); + Ok(format_bytes(bytes)) +} + +fn format_bytes(n: u64) -> String { + const UNITS: &[&str] = &["B", "KiB", "MiB", "GiB", "TiB"]; + let mut size = n as f64; + let mut unit = 0; + while size >= 1024.0 && unit < UNITS.len() - 1 { + size /= 1024.0; + unit += 1; + } + if unit == 0 { + format!("{n} {}", UNITS[unit]) + } else { + format!("{size:.2} {}", UNITS[unit]) + } +} diff --git a/xtask/src/main.rs b/xtask/src/main.rs index 8f4ba127f..6897423fb 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -4,6 +4,7 @@ use xshell::{cmd, Shell}; mod bootstrap; mod config; +mod fixture; mod ground_truth; mod test_instance; mod util; @@ -31,6 +32,10 @@ enum Commands { /// Test instance management commands (create, delete) #[command(subcommand)] TestInstance(test_instance::TestInstanceCmd), + + /// Fixture artifact commands (push, pull via OCI/GHCR) + #[command(subcommand)] + Fixture(fixture::FixtureCmd), } fn main() -> Result<()> { @@ -48,6 +53,7 @@ fn main() -> Result<()> { Commands::BootstrapMithrilLocal(args) => bootstrap::run(&sh, &args)?, Commands::GroundTruth(cmd) => ground_truth::run(cmd)?, Commands::TestInstance(cmd) => test_instance::run(&sh, cmd)?, + Commands::Fixture(cmd) => fixture::run(&sh, cmd)?, } Ok(()) diff --git a/xtask/tests/fixture_pack.rs b/xtask/tests/fixture_pack.rs new file mode 100644 index 000000000..618f9a027 --- /dev/null +++ b/xtask/tests/fixture_pack.rs @@ -0,0 +1,85 @@ +//! Round-trip tests for fixture tar.zst pack/extract. +//! +//! Exercises the pack + extract path with no OCI interaction. + +use std::fs; +use std::path::{Path, PathBuf}; + +use tempfile::tempdir; + +// The pack module lives inside the xtask binary crate. Include it directly +// via `#[path]` so tests can drive the functions without exposing a lib. +#[path = "../src/fixture/pack.rs"] +mod pack; + +fn write_file(p: &Path, content: &[u8]) { + if let Some(parent) = p.parent() { + fs::create_dir_all(parent).unwrap(); + } + fs::write(p, content).unwrap(); +} + +fn collect_files(root: &Path) -> Vec<(PathBuf, Vec)> { + let mut out = Vec::new(); + walk(root, root, &mut out); + out.sort_by(|a, b| a.0.cmp(&b.0)); + out +} + +fn walk(root: &Path, dir: &Path, out: &mut Vec<(PathBuf, Vec)>) { + for entry in fs::read_dir(dir).unwrap() { + let entry = entry.unwrap(); + let path = entry.path(); + let md = fs::metadata(&path).unwrap(); + if md.is_dir() { + walk(root, &path, out); + } else if md.is_file() { + let rel = path.strip_prefix(root).unwrap().to_path_buf(); + out.push((rel, fs::read(&path).unwrap())); + } + } +} + +#[test] +fn pack_then_extract_round_trip() { + let src = tempdir().unwrap(); + write_file(&src.path().join("epochs.csv"), b"epoch_no\n100\n"); + write_file( + &src.path().join("nested").join("stake-98.csv"), + b"stake,pool,lovelace\n", + ); + + let archive = tempdir().unwrap(); + let archive_path = archive.path().join("payload.tar.zst"); + pack::pack_tar_zst(src.path(), &archive_path, 3).unwrap(); + + assert!( + archive_path.exists() && archive_path.metadata().unwrap().len() > 0, + "archive was not written" + ); + + let dst = tempdir().unwrap(); + pack::extract_tar_zst(&archive_path, dst.path()).unwrap(); + + assert_eq!(collect_files(src.path()), collect_files(dst.path())); +} + +#[test] +fn pack_follows_symlinks() { + let target = tempdir().unwrap(); + write_file(&target.path().join("hello.txt"), b"hello world"); + + let src = tempdir().unwrap(); + std::os::unix::fs::symlink(target.path(), src.path().join("link")).unwrap(); + + let archive = tempdir().unwrap(); + let archive_path = archive.path().join("payload.tar.zst"); + pack::pack_tar_zst(src.path(), &archive_path, 3).unwrap(); + + let dst = tempdir().unwrap(); + pack::extract_tar_zst(&archive_path, dst.path()).unwrap(); + + let extracted = dst.path().join("link").join("hello.txt"); + assert!(extracted.exists(), "symlinked file not extracted"); + assert_eq!(fs::read(&extracted).unwrap(), b"hello world"); +}