diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000..c757ab59c3 --- /dev/null +++ b/.env.example @@ -0,0 +1,23 @@ +# Copy this file to .env and customize values for your local environment. +# The .env file is ignored by git. + +# Required workspace paths +PUDL_INPUT=/absolute/path/to/pudl-input +PUDL_OUTPUT=/absolute/path/to/pudl-output +DAGSTER_HOME=/absolute/path/to/dagster-home + +# Logging controls (read by pudl.logging_helpers.configure_root_logger) +PUDL_LOGLEVEL=INFO +PUDL_COLOR_LOGS=true + +# Optional: write logs to a file in addition to console output. +# Leave unset for console-only logging. +# PUDL_LOGFILE=/absolute/path/to/pudl/logs/pudl.log + +# Optional: don't try and use intersphinx to link to external documentation +# during the docs build -- it can be flaky and isn't required for most docs edits. +# PUDL_DOCS_DISABLE_INTERSPHINX=1 + +# Optional: don't remove generated rst files after the docs build. Can be helpful +# when debugging formatting errors. +# PUDL_DOCS_KEEP_GENERATED_FILES=1 diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000000..85d77ac56f --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +skills/** diff --git a/.github/skills b/.github/skills~Updated upstream similarity index 100% rename from .github/skills rename to .github/skills~Updated upstream diff --git a/.github/workflows/build-deploy-ferceqr.yml b/.github/workflows/build-deploy-ferceqr.yml index 6891d241d7..85da91e945 100644 --- a/.github/workflows/build-deploy-ferceqr.yml +++ b/.github/workflows/build-deploy-ferceqr.yml @@ -11,6 +11,8 @@ env: GCS_LOGS_BUCKET: gs://builds.catalyst.coop/ferceqr_logs S3_OUTPUT_BUCKET: s3://pudl.catalyst.coop/ferceqr BATCH_JOB_JSON: batch_job.json + BUILD_ID: "" + BATCH_JOB_ID: "" jobs: build_and_deploy_eqr: diff --git a/.github/workflows/build-deploy-pudl.yml b/.github/workflows/build-deploy-pudl.yml index 912d739357..5c107508a6 100644 --- a/.github/workflows/build-deploy-pudl.yml +++ b/.github/workflows/build-deploy-pudl.yml @@ -139,7 +139,7 @@ jobs: --container-env OMP_NUM_THREADS=4 \ --container-env PUDL_BOT_PAT=${{ secrets.PUDL_BOT_PAT }} \ --container-env PUDL_GCS_OUTPUT=${{ env.PUDL_GCS_OUTPUT }} \ - --container-env PUDL_SETTINGS_YML="/home/ubuntu/pudl/src/pudl/package_data/settings/etl_full.yml" \ + --container-env DG_NIGHTLY_CONFIG="src/pudl/package_data/settings/dg_nightly.yml" \ --container-env SLACK_TOKEN=${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} \ --container-env ZENODO_SANDBOX_TOKEN_PUBLISH=${{ secrets.ZENODO_SANDBOX_TOKEN_PUBLISH }} \ --container-env ZENODO_TARGET_ENV=${{ (startsWith(github.ref_name, 'v20') && 'production') || 'sandbox' }} \ diff --git a/.github/workflows/com-dev-notify.yml b/.github/workflows/com-dev-notify.yml index d2676efc65..d95a50ffbe 100644 --- a/.github/workflows/com-dev-notify.yml +++ b/.github/workflows/com-dev-notify.yml @@ -7,8 +7,8 @@ on: types: [created] env: - username: ${{ github.event.issue.user.login }} - url: ${{ github.event.issue.html_url }} + username: "" + url: "" org: catalyst-cooperative jobs: @@ -16,8 +16,14 @@ jobs: name: Notify Catalyst of community activity runs-on: ubuntu-latest steps: + - name: Get username if an issue was opened + if: ${{ github.event_name == 'issues' }} + run: | + echo "username=${{ github.event.issue.user.login }}" >> "${GITHUB_ENV}" + echo "url=${{ github.event.issue.html_url }}" >> "${GITHUB_ENV}" + - name: Get username if a discussion was created - if: ${{ (github.event_name == 'discussion') }} + if: ${{ github.event_name == 'discussion' }} run: | echo "username=${{ github.event.discussion.user.login }}" >> "${GITHUB_ENV}" echo "url=${{ github.event.discussion.html_url }}" >> "${GITHUB_ENV}" @@ -36,13 +42,11 @@ jobs: uses: slackapi/slack-github-action@v3 with: - # Slack channel id, channel name, or user id to post message. - # See also: https://api.slack.com/methods/chat.postMessage#channels - # You can pass in multiple channels to post to by providing a comma-delimited list of channel IDs. - channel-id: "community-dev" - # For posting a markdown message + method: chat.postMessage + token: ${{ secrets.COMMUNITY_DEV_SLACK_BOT_TOKEN }} payload: | { + "channel": "community-dev", "blocks": [ { "type": "section", @@ -53,5 +57,3 @@ jobs: } ] } - env: - SLACK_BOT_TOKEN: ${{ secrets.COMMUNITY_DEV_SLACK_BOT_TOKEN }} diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index b3c6619cd8..612d0261ba 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -20,8 +20,8 @@ jobs: permissions: pull-requests: read outputs: - # 2025-07-17: because merge_group is an Object and run_code_checks is not a conditional, need to explicitly check for null-ness instead of relying on truthiness. - run_code_checks: ${{ github.event_name =='workflow_dispatch' || (steps.filter.outputs.code == 'true' && (github.event.merge_group != null)) }} + # Run code checks for manual dispatches and merge queue runs with code changes. + run_code_checks: ${{ github.event_name == 'workflow_dispatch' || (github.event_name == 'merge_group' && steps.filter.outputs.code == 'true') }} steps: - uses: actions/checkout@v6 with: @@ -47,6 +47,7 @@ jobs: - '!.github/workflows/bot-auto-merge.yml' - '!.github/workflows/build-deploy-docs.yml' - '!.github/workflows/build-deploy-pudl.yml' + - '!.github/workflows/deploy-pudl.yml' - '!.github/workflows/com-dev-notify.yml' - '!.github/workflows/docker-build-test.yml' - '!.github/workflows/q-update-issue-scheduler.yml' @@ -77,7 +78,7 @@ jobs: run: | echo "event name (${{ github.event_name }}) is workflow dispatch: ${{ github.event_name == 'workflow_dispatch' }}" echo "found code changes: ${{ steps.filter.outputs.code }}" - echo "merge_group ${{ github.event.merge_group }} is not null: ${{ github.event.merge_group != null }}" + echo "event name (${{ github.event_name }}) is merge_group: ${{ github.event_name == 'merge_group' }}" ci-docs: permissions: diff --git a/.github/workflows/update-lockfiles.yml b/.github/workflows/update-lockfiles.yml index 3f88586aa5..11c9225cb0 100644 --- a/.github/workflows/update-lockfiles.yml +++ b/.github/workflows/update-lockfiles.yml @@ -10,6 +10,9 @@ on: # - workflow_dispatch: Whatever branch it was run against. # - schedule: Always runs on main +env: + TODAY: "" + jobs: update-lockfiles: runs-on: ubuntu-latest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a67b3c3346..e0ed85a1f7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -72,6 +72,7 @@ repos: |.*\.bib |.*\.csv |.*\.html + |.*\.json |src/pudl/package_data/ferc1/.*_categories\.yaml )$ | migrations/ | devtools/ | test/ | notebooks/ | src/pudl/metadata/codes.py | src/pudl/transform/params/ferc1.py args: [] # Make this read, not write @@ -153,6 +154,15 @@ repos: always_run: false entry: pixi run jupyter nbconvert --clear-output + - id: pixi-lock-update + name: pixi-lock-update + stages: [pre-commit] + language: system + verbose: false + pass_filenames: false + always_run: true + entry: bash -c 'pixi install --quiet && git add pixi.lock' + - id: unit-tests name: unit-tests stages: [pre-commit] @@ -172,5 +182,13 @@ ci: autoupdate_branch: main autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate" autoupdate_schedule: weekly - skip: [unit-tests, nb-output-clear, shellcheck, trufflehog, detect-secrets] + skip: + [ + detect-secrets, + nb-output-clear, + pixi-lock-update, + shellcheck, + trufflehog, + unit-tests, + ] submodules: false diff --git a/AGENTS.md b/AGENTS.md index a844668bfa..b0773c94dd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,112 +1,486 @@ -# LLM coding agent instructions for the Public Utility Data Liberation (PUDL) Project - -## PUDL Project Overview - -- The PUDL Project implements a data processing pipeline that ingests raw energy system - data from public agencies like the US Energy Information Administration (EIA) and the - Federal Energy Regulatory Commission (FERC) and transforms it into clean, well - organized tables for use in analysis and modeling. -- PUDL uses the Dagster data orchestration framework to manage dependencies between - different assets, and to enable parallel execution of different portions of the data - processing pipeline. -- The raw input data for the PUDL data processing pipeline can be found in the directory - indicated by the `$PUDL_INPUT` environment variable. The raw inputs are downloaded as - needed by the data pipeline, but can be pre-downloaded in bulk using the - `pudl_datastore` command line interface. -- The PUDL data processing pipeline primarily generates Apache Parquet files as its - outputs. These outputs can be found in `$PUDL_OUTPUT/parquet/` where `$PUDL_OUTPUT` is - an environment variable which should be set by the user. - -## Development environment tips - -- PUDL uses pixi to manage its Python environment and dependencies. All dependencies and - configuration are defined in `pyproject.toml`. -- The default pixi environment includes all development tools. -- To run commands in the pixi environment, prefix them with `pixi run` (e.g., - `pixi run pytest`) -- Pixi environments and tasks are defined in `pyproject.toml` under `[tool.pixi]` - sections. -- PUDL uses ruff to lint and automatically format python code. Before staging files for - a commit, always run `pixi run prek run ruff-check --all-files` and - `pixi run prek run ruff-format --all-files` -- A number of pre-commit hooks are defined in .pre-commit-config.yaml. -- We try to use appropriate type annotations in function, class, and method definitions, - but they are not yet checked or enforced. They are primarily to improve readability - for humans, LLMs, and IDEs. +# AGENTS.md + +## Project overview + +PUDL ingests raw public energy data (EIA, FERC, EPA, and others) and transforms it into +clean, analysis-ready tables. The pipeline is orchestrated using Dagster assets and jobs. + +## About this file + +`AGENTS.md` is the canonical instruction file for this repository. `CLAUDE.md` in +the same directory is a symlink that points to `AGENTS.md` — they are always +identical, not independent files to be kept in sync. + +### Working across multiple worktrees + +When working in multiple git worktrees simultaneously, any `AGENTS.md` injected +into your context at session start reflects the **primary working directory** only. +If you encounter an `AGENTS.md` at a different path during the same session, do not +assume it is the same file — it may be a different version of this document, or +belong to an entirely different repository. + +To avoid this confusion: + +- Always use full absolute paths when referencing or comparing `AGENTS.md` files + across worktrees — this makes the distinction visible immediately. +- When you need to know which `AGENTS.md` governs a particular worktree, read it + directly from that worktree's directory rather than relying on the + context-loaded version. + +## Repository structure + +Key directories under `src/pudl/`: + +- `extract/` — one module per data source; reads raw inputs via the datastore and + produces lightly-typed DataFrames +- `transform/` — one module per data source; cleans, normalizes, and validates data +- `etl/` — Dagster asset and job definitions, organized by data source; top-level + `defs` object and all jobs live in `pudl.etl` +- `metadata/` — table and column metadata (`classes.py`, `fields.py`, `resources.py`); + "Resources" are tables, "Fields" are columns +- `glue/` — entity resolution tables that link IDs across data sources +- `analysis/` — higher-level analytical assets built on top of the core ETL outputs +- `helpers.py` — shared utility functions; check here before writing new helpers +- `io_managers.py` — Dagster IO managers for SQLite, Parquet, and FERC SQLite reads +- `settings.py` — Pydantic settings models for all datasets and ETL configuration +- `resources.py` — Dagster resources (`etl_settings`, `datastore`, `zenodo_dois`, etc.) +- `ferc_sqlite_provenance.py` — fingerprinting and compatibility checks for FERC SQLite + databases across separate job runs + +Other important directories: + +- `dbt/` — dbt models used for data validation only (not transformation) +- `test/unit/` — fast unit tests; run these during development +- `test/integration/` — slow integration tests; do not run interactively +- `docs/` — Sphinx documentation source (reStructuredText) +- `src/pudl/package_data/settings/` — packaged Dagster run config YAML files + (`dg_fast.yml`, `dg_full.yml`, `dg_pytest.yml`, `dg_nightly.yml`) + +## Development environment + +### Inputs and outputs + +Raw inputs and pipeline outputs live **outside the repository**, in directories with +sufficient disk space. Their locations are set by two environment variables: + +- `$PUDL_INPUT` — root of the raw input datastore. Raw data files are downloaded here + by the `pudl_datastore` CLI and read by the pipeline at runtime. Do not write to + this directory manually. +- `$PUDL_OUTPUT` — root of all pipeline outputs. Contents include: + - Apache Parquet files (`$PUDL_OUTPUT/parquet/`) — the primary analytical outputs + - SQLite databases (`$PUDL_OUTPUT/*.sqlite`) — used for FERC raw data and some outputs + - DuckDB databases (`$PUDL_OUTPUT/*.duckdb`) — currently only for FERC XBRL data + - JSON datapackage descriptors (`$PUDL_OUTPUT/*_datapackage.json`) — frictionless + datapackage metadata describing the schema and structure of the tabular outputs + +Never assume these directories are inside the repository. Never hardcode paths to them. + +### Python environment + +PUDL uses `pixi` for dependency and task management. Always use `pixi run ` to +ensure commands run in the correct environment. + +**Never** use `uv`, `pip`, `conda`, `venv`, or any other tool to install packages or +create Python environments. Do not run `uv run`, `uv pip install`, `python -m venv`, or +anything similar. Pixi is the only permitted environment manager. + +Project tasks and environments are defined in `pyproject.toml` under `[tool.pixi]`. +Git pre-commit hooks are defined in `.pre-commit-config.yaml`. We use `prek` to run +pre-commit hooks both interactively and as git hooks. + +### Adding dependencies + +Adding a **runtime dependency** requires updating `pyproject.toml` in two places: + +1. `[tool.pixi.dependencies]` — the full conda-forge listing (dev + test + runtime). +2. `[tool.pixi.package.run-dependencies]` — the minimal runtime set used when PUDL is + installed as a conda package (e.g. by `pudl-archiver`). Keep this in sync with the + expansive list above. + +Dev-only or test-only dependencies belong only in `[tool.pixi.dependencies]` (or +`[tool.pixi.feature.dev.dependencies]`). After editing, run `pixi install` to update +the lockfile. + +If a new pre-commit hook is needed, add it to `.pre-commit-config.yaml` and run +`pixi run prek install` to update the git hooks. + +## Common commands + +```bash +# New worktree initialization +pixi install +pixi run prek install + +# Linting and formatting +pixi run prek run --all-files # run all hooks on all files +pixi run prek run ruff-check --all-files # lint without fixing +pixi run prek run ruff-format --all-files # fix formatting + +# Type checking (faster than mypy) +pixi run ty check src/pudl/path/to/file.py + +# Unit tests (fast; run before every commit) +pixi run pytest-unit + +# dbt data validation tests +pixi run dbt_helper validate # all dbt tests +pixi run dbt_helper validate --asset-select "key:out_eia__yearly_generators" # one asset + +# Running the ETL +pixi run ferc-to-sqlite # FERC SQLite only +pixi run pudl-with-ferc-to-sqlite # full ETL (local dev) +pixi run dg launch --job pudl_with_ferc_to_sqlite --config src/pudl/package_data/settings/dg_fast.yml + +# Dagster UI and CLI +pixi run dg dev # start webserver and daemons +pixi run dg check defs --verbose # sanity-check that all defs load + +# Documentation build +pixi run docs-check # faster, disables intersphinx, no rendered output +pixi run docs-build # slower, requires network, produces HTML in docs/_build/html +``` + +## Preferred CLI tools + +### rg (ripgrep) + +Use instead of `grep` for all codebase searches. Faster, respects `.gitignore`, and +produces cleaner output. + +```bash +rg "class FercDbfExtractor" src/ # basic search +rg -t py "FercSQLiteProvenance" src/ # restrict to Python files +rg -C 3 "assert_ferc_sqlite_compatible" src/ # show 3 lines of context +rg '"plant_id_eia"' src/pudl/metadata/ # find a field/column definition in metadata +``` + +### ty + +Use to find Python type errors before committing. `ty` uses its own error codes +(e.g. `missing-argument`, `unresolved-attribute`) which differ from mypy's +(e.g. `call-arg`, `attr-defined`). + +For suppression syntax, see **Type error suppression** in the Code Style section. + +### jq + +Parses JSON files. Takes the filename as a positional argument. + +```bash +jq '.nodes | keys' dbt/target/manifest.json # all dbt node names +jq '[.nodes[] | select(.resource_type == "model") | .name]' dbt/target/manifest.json # model names only +jq '.nodes["model.pudl.fuel_ferc1"]' dbt/target/manifest.json # inspect a specific dbt node +jq '[.resources[].name]' "$PUDL_OUTPUT/ferc1_xbrl_datapackage.json" # table names in a datapackage +jq '.resources[] | select(.name == "identification_001_duration") | .schema.fields[].name' "$PUDL_OUTPUT/ferc1_xbrl_datapackage.json" # column names for a table +``` + +### dbt_helper + +PUDL's wrapper around `dbt build` that annotates test failures with the actual query +results, making failures much easier to diagnose. Use instead of raw `dbt build`. +Accepts Dagster asset selection syntax via `--asset-select`. + +```bash +pixi run dbt_helper validate # run all dbt tests +pixi run dbt_helper validate --asset-select "key:out_eia__yearly_generators" # one asset +pixi run dbt_helper validate --asset-select "+key:out_eia__yearly_generators" # asset + upstream +pixi run dbt_helper validate --asset-select "key:out_eia__yearly_generators" --exclude "*check_row_counts*" +``` + +**`dbt_helper validate` runs against the Parquet files in `$PUDL_OUTPUT`.** If those +files were not produced by materializing the corresponding asset using the full ETL +settings on the current branch, the validation results will not be reliable. Only +validate tables you are actively working on, and always materialize them first on the +current branch: + +```bash +pixi run dg launch --assets "out_eia__yearly_generators" --config src/pudl/package_data/settings/dg_full.yml +pixi run dbt_helper validate --asset-select "key:out_eia__yearly_generators" +``` + +### dg + +Use `pixi run dg` to ensure `dg` is always run in the correct environment. + +### ruff + +Can be run directly on specific files or via `prek` on all files. + +```bash +pixi run ruff check src/pudl/path/to/file.py # check a specific file +pixi run ruff format src/pudl/path/to/file.py # format a specific file +pixi run prek run ruff-check --all-files # check everything before committing +pixi run prek run ruff-format --all-files # format everything before committing +``` ## Available skills -There are a number of skills defined in skills-lock.json that should be available to you. -If they're not available, use `pixi run install-skills` to install them. - -## Testing instructions - -- PUDL uses pytest to manage its unit and integration tests. -- Tests should avoid using unittest and monkeypatch, and use pytest-mock. -- Rather than enumerating various test cases within a single test function, the - tests should use the pytest.parametrize decorator to enumerate tests cases, specifying - the appropriate success or failure or exception to be raised for each test as - appropriate. -- Tests must be run inside the pixi environment. -- When individual tests are run, we should turn off coverage collection, since otherwise - they will fail since they only cover a small portion of the codebase. -- Test coverage collection should be disabled using `--no-cov` when running individual - tests to avoid getting spurious warnings. -- For example, the unit tests can be run with `pixi run pytest --no-cov test/unit`. -- We use dbt only for data validation, and NOT for data transformations. The PUDL data - tests are under the `dbt/` directory. -- dbt commands must be typically run from within the dbt directory, e.g.: - `cd dbt && pixi run dbt build` -- The PUDL integration tests process a substantial amount of data and take up to an hour - to run, and so should not generally be run during development interactively. - -## Code Style Guidelines - -- Follow pandas naming conventions: use `df` for DataFrames, descriptive column names -- Prefer longer, readable, descriptive variable names over short, cryptic ones. -- Use explicit type hints for function parameters and returns where helpful. -- Prefer method chaining for pandas operations when it improves readability. -- Use `pathlib.Path` for file system operations instead of string concatenation. -- Follow snake_case for functions/variables, PascalCase for classes. -- Use f-strings for string formatting, including in logging statements. -- Write docstrings for all public functions/classes using Google style python - docstrings. -- Limit lines to 88 characters for better readability. Do not artificially restrict line - length to 80 characters. -- Do not use `print()` statements; use python's logging system instead. - -## PUDL-Specific Patterns - -- Asset dependencies in Dagster should be explicit and well-documented -- In general, data validation should happen in dbt, not in Dagster asset checks. -- Sanity checks that validate assumptions about the data should be done as it is being - transformed, with assertions failing loudly if expectations are not met. -- Use PUDL's existing utility functions in `pudl.helpers` when available. -- Raw data access should use the datastore pattern, not direct file I/O. -- Use nullable pandas dtypes (e.g. `pd.Int64Dtype()` or `pd.StringDtype()`) when - possible, to avoid generic `object` dtypes and mixed NULL values. -- Parquet outputs should use snappy compression and pyarrow dtypes. -- Metadata describing the tables, columns, and data sources can be found in the - `pudl.metadata` subpackage. "Resources" are tables and "Fields" are columns. -- Metadata classes defined in the `pudl.metadata.classes` module using Pydantic - generally mirror the frictionless datapackage standard. -- Our documentation is built using Sphinx. The source files are in the `docs/` - directory. The source files are in reStructuredText format. -- Whenever we make significant changes to the codebase, they should be noted in the PUDL - release notes found at `docs/release_notes.rst`. - -## Performance Considerations - -- Use vectorized pandas operations instead of row-wise `apply` or loops. -- Consider using just-in-time compilation with numba for performance-critical code. -- Do not use inplace operations on pandas DataFrames. -- Avoid chained indexing in pandas to prevent SettingWithCopyWarning. -- Use efficient pandas merging and joining techniques, ensuring indexes are set - appropriately. -- Avoid creating unnecessary intermediate DataFrames. -- Use categorical dtypes for columns with a limited set of values to save memory. -- Profile and optimize any code that processes large datasets. -- PUDL relies primarily on pandas for data processing, but in cases where performance or - memory limitations are important, we may also use DuckDB or polars dataframes. -- For large datasets (>1GB), consider polars for aggregations before pandas. -- Use polars for memory-intensive operations or when pandas performance is limiting. +Skills are defined in `skills-lock.json`. If not already installed, run +`pixi run install-skills`. + +- **`dagster-expert`** — Dagster and `dg` CLI reference. Use when adding or modifying + assets, resources, IO managers, jobs, sensors, or any other Dagster construct. +- **`dignified-python`** — production Python coding standards (3.10-3.13). Use when + writing, reviewing, or refactoring Python code. + +## Dagster architecture + +**Settings flow**: Always pass config via `dg launch --config dg_xxx.yml`. Never +hand-assemble `run_config` dicts. The YAML path is read by `PudlEtlSettingsResource`, +which loads `EtlSettings` and injects it into all assets and IO managers. + +**FERC SQLite provenance**: each FERC SQLite materialization records a fingerprint +(Zenodo DOI, years, ETL settings hash) in Dagster asset metadata. Downstream IO managers +call `assert_ferc_sqlite_compatible()` before reading and raise a descriptive +`RuntimeError` if the stored fingerprint does not match the current run. + +## Testing + +Always include `--no-cov` when running pytest directly (not via a pixi task) to skip +coverage collection and avoid spurious failures. + +### Unit tests + +Unit tests live under `test/unit/`, take up to 2 minutes, and run automatically via the +pre-commit hook on every commit. + +```bash +pixi run pytest-unit # all unit tests +pixi run pytest --no-cov test/unit/path/to/test_file.py # single file +pixi run pytest --no-cov test/unit/extract/excel_test.py::TestGenericExtractor # single class +``` + +### Integration tests + +Integration tests live under `test/integration/` and take up to 60 minutes. They use a +`prebuilt_outputs` fixture that runs the full ETL via `dg launch` as a subprocess with +`dg_pytest.yml` as the default config. Do not run them interactively during development. + +```bash +pixi run pytest-integration # full integration suite +pixi run pytest-ci # docs + unit + integration + dbt + coverage +pixi run pytest --no-cov --live-pudl-output test/integration # using existing local outputs +``` + +### Custom pytest flags + +- `--live-pudl-output` — skip the prebuild and use existing local PUDL outputs. Useful + when your change doesn't affect the ETL itself (e.g. analysis code or data validation). + **Cannot be combined with unit tests** in the same session; the two suites require + incompatible `$PUDL_OUTPUT` environment variable handling. +- `--temp-pudl-input` — download a fresh copy of raw inputs for this run only, instead + of reusing the local datastore cache. Use when testing datastore functionality. +- `--dg-config PATH` — override the default `dg_pytest.yml` with a custom Dagster config. + +### Fixture constraints + +Do not run tests that depend on `prebuilt_outputs`, `pudl_engine`, `ferc1_xbrl_engine`, +or `ferc1_dbf_engine` fixtures during development — these require a full integration ETL +build. Exception: with `--live-pudl-output`, these fixtures use existing outputs and run +quickly. + +### Test style + +Use pytest-mock (`mocker`). Avoid `unittest` and `monkeypatch`. + +## Code style + +**Acronyms in compound class names**: In compound class names that contain multiple +acronyms, capitalize acronyms as words: e.g. `FercDbf`, `FercXbrl`, `SQLite` (SQLite +is a special case — all SQL letters are capitalized because the L participates in both +the acronym and the word "Lite"). + +**Line length**: limit lines to 88 characters. Do not artificially restrict to 80. + +**Type annotations**: add annotations wherever they aid readability or IDE +inference — function signatures are the highest-value target. Annotations on +internal variables are optional; add them only when the type is non-obvious. +Enforcement is currently light and experimental, so err on the side of annotating +rather than skipping. + +**Type error suppression**: prefer `# type: ignore[specific-code]` when a single +code suppresses the warning across all type checkers being used. When `ty` and mypy +use different codes for the same error and there is no shared code, fall back to a +bare `# type: ignore` — but you must also add `# noqa: PGH003` to silence ruff's +rule that forbids bare ignores: + +```python +result = some_dynamic_call() # type: ignore # noqa: PGH003 +``` + +The `# noqa: PGH003` is not optional: without it, ruff will reject the bare ignore. +Reserve this pattern for cases where no single error code works across checkers. +If the suppression is non-obvious, add a prose comment explaining why: + +```python +# EIA raw data uses mixed string/int years; we coerce downstream. +year = row["year"] # type: ignore # noqa: PGH003 +``` + +**Logging**: never use `print()` outside of CLI interfaces. Use Python's `logging` +module via the `pudl.logging_helpers` module. Obtain a logger at module level with +`logger = pudl.logging_helpers.get_logger(__name__)`. Never use old-style `%` +formatting or `str.format()` for log messages that include variable values. Instead +always use f-strings for log messages that need to include variable values: + +```python +from pudl import logging_helpers +logger = logging_helpers.get_logger(__name__) + +logger.info(f"Processing {len(df)} rows for year {year}.") +logger.warning(f"Skipping {plant_id=} — missing required field.") +``` + +## Architecture and key patterns + +- Data validation belongs in dbt, not in Dagster asset checks. +- Sanity checks on data assumptions embedded within the `pudl.transform` modules + should raise `AssertionError` loudly when assumptions are violated, rather than + silently passing and causing downstream issues. +- Use existing utility functions in `pudl.helpers` before writing new ones. +- Raw data access must use the datastore pattern, not direct file I/O. +- Use nullable pandas dtypes (`pd.Int64Dtype()`, `pd.StringDtype()`) to avoid + generic `object` dtypes and mixed NULL values. +- Parquet outputs use snappy compression and pyarrow dtypes. +- For large datasets (>1GB), use polars or DuckDB to read data instead of pandas. + +### API compatibility and refactoring scope + +PUDL is an **application**, not a library. No external consumers depend on its internal +APIs. You do not need to maintain backwards-compatibility shims, deprecation warnings, or +gradual cut-overs when refactoring. A change is complete when every call-site and +reference inside the repository has been updated and the tests pass. + +**Excluded from scope**: the `devtools/` and `notebooks/` directories are informal, +untested, and already broken. Do not spend time updating them, and do not let broken +references there block a refactor. + +## Metadata system + +Metadata describing tables, columns, and data sources lives in `pudl.metadata`. +"Resources" are tables; "Fields" are columns. Metadata classes in +`pudl.metadata.classes` use Pydantic and mirror the frictionless datapackage standard. + +## PUDL developer reference docs + +The following files under `docs/dev/` and `docs/methodology/` cover PUDL-specific +concepts that are not in the dagster-expert or dignified-python skills. **Read the +relevant file before working in that area** rather than guessing at conventions. + +| File | When to read it | +| ---- | --------------- | +| `docs/dev/naming_conventions.rst` | Before naming any asset, table, column, variable, or code identifier — covers layer prefixes (`raw_`/`core_`/`out_`), table-type suffixes (`_assn`, `_ent`, `_scd`, etc.), and column-name patterns for IDs, codes, units, and flags | +| `docs/dev/metadata.rst` | Before adding or modifying a table, column, or data source — explains how Resources (tables) and Fields (columns) are defined, validated with Pandera, and wired into the frictionless datapackage | +| `docs/dev/data_guidelines.rst` | Before designing a new transformation — establishes what changes to raw data are acceptable, tidy-data requirements, unit conventions, and time-series completeness expectations | +| `docs/dev/existing_data_updates.rst` | When integrating a new year or version of an existing data source — step-by-step workflow covering file maps, extraction, transformation, schema updates, ID mapping, and validation | +| `docs/dev/datastore.rst` | When working with raw input data, the `pudl_datastore` CLI, or Zenodo DOI references in `zenodo_dois.yml` | +| `docs/dev/clone_ferc1.rst` | Before touching FERC extraction or the `ferc_to_sqlite` job — explains the DBF→SQLite and XBRL→SQLite conversion pipeline and the raw FERC asset group | +| `docs/dev/pudl_id_mapping.rst` | When working with cross-dataset entity resolution (`plant_id_pudl`, `utility_id_pudl`) or the manual ID mapping spreadsheet | +| `docs/methodology/entity_resolution.rst` | When working with `pudl.glue` or entity/SCD tables — explains how PUDL reconciles inconsistent plant and utility identities across EIA and FERC reporting | + +## Documentation and release notes + +Docs are built with Sphinx from reStructuredText source in `docs/`. Significant +user-visible or developer-visible changes must be summarized in `docs/release_notes.rst` +with references to the PR and any related issue numbers. Add release notes after +the feature on a branch is complete and prior to marking a PR as ready for review. + +**When writing release notes, commit messages, or any other summary of changes, always +compare the current branch against the branch it will merge into** (typically `main`, +but check the PR base branch) using `git diff ...HEAD` or +`git log ..HEAD`. Do not rely on memory or recent file reads alone — the diff +is the authoritative record of what actually changed. + +### Generated documentation files + +Several pages are assembled at build time from Jinja templates rather than edited +directly: + +- Data source pages are generated from metadata in `src/pudl/metadata/`. +- The PUDL data dictionary is generated from the full `PUDL_PACKAGE` metadata object. +- The Jinja templates for both live under `docs/templates/`. + +These generated files appear under `docs/` during the build and are cleaned up +automatically when it completes. Do not edit them — edit the templates or the metadata +instead. When searching for documentation source files, ignore `docs/_build/` entirely. + +If `pixi run docs-build` or `pixi run docs-check` fails with an error in a generated +file (e.g. an RST formatting error with a line number), re-run with +`PUDL_DOCS_KEEP_GENERATED_FILES=1` to prevent automatic cleanup and inspect the file: + +```bash +PUDL_DOCS_KEEP_GENERATED_FILES=1 pixi run docs-check +``` + +After debugging, remove the generated files with `pixi run docs-clean`. + +## Contribution workflow + +Create a new branch and a matching worktree with the same name for each feature that +will become a PR. Initialize the new worktree with: + +```bash +pixi install && pixi run prek install +``` + +After editing files run `pixi run ruff check` and `pixi run ruff format` on the changed +files. If issues still remain, fix them yourself, until no issues remain. + +Newly added functionality should be accompanied by unit tests and release notes. Include +both the issue number and PR number in release notes entries. + +Make focused commits composed of related changes. **When relocating code, +documentation, or any other text between modules, move it verbatim — never rewrite or +reinterpret it in the same step.** If the moved content also needs edits, make those as +a separate step with explicit user approval. When moving entire files, always use `git +mv` to preserve history. When moving blocks of code between files or within the same +file, first commit the verbatim move without changes, then make any necessary edits in a +subsequent commit. + +Write informative commit messages that summarize the changes and their motivation. Use +plaintext no more than 80 characters wide, with a short summary line (max 50 chars). For +significant changes, follow the first summary with a blank line and a more detailed +description. Do not add spurious or generated files to source control. + +When you are ready to commit, run `pixi run prek-run` and fix all issues before +attempting to commit. NEVER skip pre-commit hooks by using `--no-verify`. Fix the +underlying issue or ask for help — do not bypass the checks. + +### PR checklist + +Before marking a PR as ready for review: + +- [ ] Pre-commit hooks pass on all files: `pixi run prek-run` +- [ ] All unit tests pass: `pixi run pytest-unit` +- [ ] Docs check passes with no errors or warnings: `pixi run docs-check` +- [ ] If public or contributor-facing behavior changed: release notes entry has been + added to `docs/release_notes.rst` with the issue number and PR number +- [ ] The PR description includes a summary of the change, the motivation, and any + relevant context to direct reviewer attention +- [ ] PR is marked as closing the appropriate issue on GitHub, if applicable +- [ ] No generated files, credentials, or unrelated changes included in the diff + +## Sandbox safe execution + +**zsh autocorrect**: the `.pixi/` directory in the project root causes zsh to suggest +correcting `pixi` → `.pixi`. This is always wrong. Disable autocorrect before running +any pixi commands: + +```bash +unsetopt CORRECT CORRECT_ALL +pixi run ... +``` + +**Frozen execution**: use `pixi run --frozen` to prevent pixi from updating the +environment when running commands, avoiding unexpected dependency resolution: + +```bash +pixi run --frozen pytest --no-cov test/unit +pixi run --frozen dg check defs --verbose +``` + +Prefer already-installed binaries (`dg`, `rg`, `ruff`, `ty`, `jq`) before invoking +commands that may trigger package resolution or updates. + +When running `pixi run docs-build` in a sandbox environment, set +`PUDL_DOCS_DISABLE_INTERSPHINX=1` to avoid the need for external network connectivity. diff --git a/docker/gcp_ferceqr_etl.sh b/docker/gcp_ferceqr_etl.sh index 3ea1a2b472..37ef5f7627 100644 --- a/docker/gcp_ferceqr_etl.sh +++ b/docker/gcp_ferceqr_etl.sh @@ -31,8 +31,8 @@ function run_ferceqr_etl() { authenticate_gcp && dagster dev & - # Kick off the ferceqr_etl job asynchronously - dagster job backfill --noprompt -j ferceqr_etl --location pudl.etl + # Kick off the ferceqr job asynchronously + dagster job backfill --noprompt -j ferceqr --location pudl.etl # Wait for a file called 'SUCCESS' or 'FAILURE' to be created in PUDL_OUTPUT indicating completion # Timeout after 6 hours if file still doesn't exist inotifywait -e create -t 21600 --include 'SUCCESS|FAILURE' "$PUDL_OUTPUT" diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index f287f0f512..6bcee9858b 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -36,41 +36,12 @@ function initialize_postgres() { psql -c "CREATE DATABASE dagster OWNER dagster" -h127.0.0.1 -p5433 } -function run_ferc_to_sqlite() { - echo "Running FERC to SQLite conversion" - send_slack_msg ":play: Deployment started for $BUILD_ID :floppy_disk:" +function run_dagster() { + echo "Launching Dagster and running the PUDL job" + send_slack_msg ":play: Launching Dagster and running the PUDL job for $BUILD_ID :floppy_disk:" initialize_postgres && authenticate_gcp && - alembic upgrade head && - ferc_to_sqlite \ - --loglevel DEBUG \ - --workers 8 \ - "$PUDL_SETTINGS_YML" -} - -function run_pudl_etl() { - echo "Running PUDL ETL" - pudl_etl \ - --loglevel DEBUG \ - "$PUDL_SETTINGS_YML" -} - -function run_unit_tests() { - echo "Running unit tests" - pytest \ - -n auto \ - --etl-settings "$PUDL_SETTINGS_YML" \ - --live-dbs test/unit \ - --no-cov -} - -function run_integration_tests() { - echo "Running integration tests" - pytest \ - -n auto \ - --etl-settings "$PUDL_SETTINGS_YML" \ - --live-dbs test/integration \ - --no-cov + pixi run pudl-with-ferc-to-sqlite-nightly } function write_pudl_datapackage() { @@ -278,18 +249,18 @@ function notify_slack() { fi message+=":time: \`[${total_build_duration}]\` Total Build Duration\n\n" - message+="$(slack_stage_status "Run FERC to SQLite" "$FERC_TO_SQLITE_STATUS" "$FERC_TO_SQLITE_DURATION")\n" - message+="$(slack_stage_status "Run PUDL ETL" "$PUDL_ETL_STATUS" "$PUDL_ETL_DURATION")\n" + message+="$(slack_stage_status "Run PUDL Dagster Job" "$DAGSTER_STATUS" "$DAGSTER_DURATION")\n" message+="$(slack_stage_status "Unit Tests" "$UNIT_TEST_STATUS" "$UNIT_TEST_DURATION")\n" message+="$(slack_stage_status "Integration Tests" "$INTEGRATION_TEST_STATUS" "$INTEGRATION_TEST_DURATION")\n" + message+="$(slack_stage_status "Data Validations (FKs/dbt)" "$DATA_VALIDATION_STATUS" "$DATA_VALIDATION_DURATION")\n" message+="$(slack_stage_status "Write PUDL Datapackage" "$WRITE_DATAPACKAGE_STATUS" "$WRITE_DATAPACKAGE_DURATION")\n" message+="$(slack_stage_status "Save Build Outputs" "$SAVE_OUTPUTS_STATUS" "$SAVE_OUTPUTS_DURATION")\n" - message+="$(slack_stage_status "Prep Outputs for Distribution" "$CLEAN_UP_OUTPUTS_STATUS" "$CLEAN_UP_OUTPUTS_DURATION")\n" + message+="$(slack_stage_status "Prep Outputs for Distribution" "$PREP_OUTPUTS_STATUS" "$PREP_OUTPUTS_DURATION")\n" message+="$(slack_stage_status "Update \`nightly\` Branch" "$UPDATE_NIGHTLY_STATUS" "$UPDATE_NIGHTLY_DURATION")\n" message+="$(slack_stage_status "Update \`stable\` Branch" "$UPDATE_STABLE_STATUS" "$UPDATE_STABLE_DURATION")\n" - message+="$(slack_stage_status "Distribute Outputs to S3/GCS" "$DISTRIBUTION_BUCKET_STATUS" "$DISTRIBUTION_BUCKET_DURATION")\n" - message+="$(slack_stage_status "Redeploy PUDL Data Viewer :eel: :hole:" "$TRIGGER_DATA_VIEWER_DEPLOY_STATUS" "$TRIGGER_DATA_VIEWER_DEPLOY_DURATION")\n" - message+="$(slack_stage_status "Write-protect \`$BUILD_REF\` Outputs on GCS" "$GCS_TEMPORARY_HOLD_STATUS" "$GCS_TEMPORARY_HOLD_DURATION")\n\n" + message+="$(slack_stage_status "Distribute \`$BUILD_REF\` to S3/GCS" "$DISTRIBUTION_BUCKET_STATUS" "$DISTRIBUTION_BUCKET_DURATION")\n" + message+="$(slack_stage_status "Redeploy Eel Hole :eel: :hole:" "$TRIGGER_DATA_VIEWER_DEPLOY_STATUS" "$TRIGGER_DATA_VIEWER_DEPLOY_DURATION")\n" + message+="$(slack_stage_status "Protect \`$BUILD_REF\` GCS Outputs" "$GCS_TEMPORARY_HOLD_STATUS" "$GCS_TEMPORARY_HOLD_DURATION")\n\n" # we need to trim off the last dash-delimited section off the build ID to get a valid log link message+="\n\n" message+="\n\n" @@ -308,7 +279,7 @@ function merge_tag_into_branch() { git remote set-url origin "https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git" && set -x && echo "Updating $BRANCH branch to point at $TAG." && - # Check out the original row counts so the working tree is clean. + # Check out the original row counts so the working tree is . # This is a temporary hack around the unstable row-counts in some tables. # TODO: fix this for real in issue #4364 / PR #4367 git checkout -- dbt/seeds/ && @@ -332,7 +303,7 @@ function upload_stable_distribution() { upload_to_dist_path "stable" } -function clean_up_outputs_for_distribution() { +function prep_outputs_for_distribution() { # Compress the SQLite DBs for easier distribution pushd "$PUDL_OUTPUT" && find ./ -maxdepth 1 -type f -name '*.sqlite' -print | parallel --will-cite 'zip -9 "{1}.zip" "{1}"' && @@ -347,7 +318,7 @@ function clean_up_outputs_for_distribution() { # Move the parquet datapackage to the output directory also! mv ./pudl_parquet_datapackage.json "$PUDL_OUTPUT" && popd && - # Remove any remaiining files and directories we don't want to distribute + # Remove any remaining files and directories we don't want to distribute rm -rf "$PUDL_OUTPUT/parquet" && rm -f "$PUDL_OUTPUT/pudl_dbt_tests.duckdb" } @@ -361,28 +332,28 @@ STAGE_SKIPPED="skipped" BUILD_START_EPOCH_SECONDS=$(date +%s) # Initialize our stage-status variables so they all definitely have a value to check -FERC_TO_SQLITE_STATUS="$STAGE_SKIPPED" -PUDL_ETL_STATUS="$STAGE_SKIPPED" +DAGSTER_STATUS="$STAGE_SKIPPED" UNIT_TEST_STATUS="$STAGE_SKIPPED" INTEGRATION_TEST_STATUS="$STAGE_SKIPPED" -WRITE_DATAPACKAGE_STATUS="$STAGE_SKIPPED" +DATA_VALIDATION_STATUS="$STAGE_SKIPPED" SAVE_OUTPUTS_STATUS="$STAGE_SKIPPED" UPDATE_NIGHTLY_STATUS="$STAGE_SKIPPED" UPDATE_STABLE_STATUS="$STAGE_SKIPPED" -CLEAN_UP_OUTPUTS_STATUS="$STAGE_SKIPPED" +WRITE_DATAPACKAGE_STATUS="$STAGE_SKIPPED" +PREP_OUTPUTS_STATUS="$STAGE_SKIPPED" DISTRIBUTION_BUCKET_STATUS="$STAGE_SKIPPED" TRIGGER_DATA_VIEWER_DEPLOY_STATUS="$STAGE_SKIPPED" GCS_TEMPORARY_HOLD_STATUS="$STAGE_SKIPPED" -FERC_TO_SQLITE_DURATION="" -PUDL_ETL_DURATION="" +DAGSTER_DURATION="" UNIT_TEST_DURATION="" INTEGRATION_TEST_DURATION="" -WRITE_DATAPACKAGE_DURATION="" +DATA_VALIDATION_DURATION="" SAVE_OUTPUTS_DURATION="" UPDATE_NIGHTLY_DURATION="" UPDATE_STABLE_DURATION="" -CLEAN_UP_OUTPUTS_DURATION="" +WRITE_DATAPACKAGE_DURATION="" +PREP_OUTPUTS_DURATION="" DISTRIBUTION_BUCKET_DURATION="" TRIGGER_DATA_VIEWER_DEPLOY_DURATION="" GCS_TEMPORARY_HOLD_DURATION="" @@ -403,7 +374,10 @@ fi # Set these variables *only* if they are not already set by the container or workflow: : "${PUDL_GCS_OUTPUT:=gs://builds.catalyst.coop/$BUILD_ID}" -: "${PUDL_SETTINGS_YML:=/home/ubuntu/pudl/src/pudl/package_data/settings/etl_full.yml}" +# Keep the nightly Dagster config path repo-relative so the same pixi task commands +# work both locally and inside the nightly build container. +: "${DG_NIGHTLY_CONFIG:=src/pudl/package_data/settings/dg_nightly.yml}" +export DG_NIGHTLY_CONFIG # Save credentials for working with AWS S3 # set +x / set -x is used to avoid printing the AWS credentials in the logs @@ -415,16 +389,16 @@ echo "aws_access_key_id = ${AWS_ACCESS_KEY_ID}" >>~/.aws/credentials echo "aws_secret_access_key = ${AWS_SECRET_ACCESS_KEY}" >>~/.aws/credentials set -x -run_stage FERC_TO_SQLITE_STATUS FERC_TO_SQLITE_DURATION overwrite run_ferc_to_sqlite -run_stage PUDL_ETL_STATUS PUDL_ETL_DURATION append run_pudl_etl -run_stage UNIT_TEST_STATUS UNIT_TEST_DURATION append run_unit_tests -run_stage INTEGRATION_TEST_STATUS INTEGRATION_TEST_DURATION append run_integration_tests +run_stage DAGSTER_STATUS DAGSTER_DURATION overwrite run_dagster +run_stage UNIT_TEST_STATUS UNIT_TEST_DURATION append pixi run pytest-unit-nightly +run_stage INTEGRATION_TEST_STATUS INTEGRATION_TEST_DURATION append pixi run pytest-integration-nightly +run_stage DATA_VALIDATION_STATUS DATA_VALIDATION_DURATION append pixi run pytest-data-validation-nightly if ! any_stage_failed \ - "$FERC_TO_SQLITE_STATUS" \ - "$PUDL_ETL_STATUS" \ + "$DAGSTER_STATUS" \ "$UNIT_TEST_STATUS" \ - "$INTEGRATION_TEST_STATUS"; then + "$INTEGRATION_TEST_STATUS" \ + "$DATA_VALIDATION_STATUS"; then touch "$PUDL_OUTPUT/success" fi @@ -439,17 +413,17 @@ pg_ctlcluster "$PG_VERSION" dagster stop 2>&1 | tee -a "$LOGFILE" run_stage SAVE_OUTPUTS_STATUS SAVE_OUTPUTS_DURATION append save_outputs_to_gcs -exit_on_stage_failure "$FERC_TO_SQLITE_STATUS" -exit_on_stage_failure "$PUDL_ETL_STATUS" +exit_on_stage_failure "$DAGSTER_STATUS" exit_on_stage_failure "$UNIT_TEST_STATUS" exit_on_stage_failure "$INTEGRATION_TEST_STATUS" +exit_on_stage_failure "$DATA_VALIDATION_STATUS" if [[ "$BUILD_TYPE" == "nightly" ]]; then run_stage UPDATE_NIGHTLY_STATUS UPDATE_NIGHTLY_DURATION append merge_tag_into_branch "$NIGHTLY_TAG" nightly # Remove files we don't want to distribute and zip SQLite and Parquet outputs - run_stage CLEAN_UP_OUTPUTS_STATUS CLEAN_UP_OUTPUTS_DURATION append clean_up_outputs_for_distribution - exit_on_stage_failure "$CLEAN_UP_OUTPUTS_STATUS" - # Copy cleaned up outputs to the S3 and GCS distribution buckets + run_stage PREP_OUTPUTS_STATUS PREP_OUTPUTS_DURATION append prep_outputs_for_distribution + exit_on_stage_failure "$PREP_OUTPUTS_STATUS" + # Copy ed up outputs to the S3 and GCS distribution buckets run_stage DISTRIBUTION_BUCKET_STATUS DISTRIBUTION_BUCKET_DURATION append upload_nightly_distribution run_stage TRIGGER_DATA_VIEWER_DEPLOY_STATUS TRIGGER_DATA_VIEWER_DEPLOY_DURATION append deploy_data_viewer if ! stage_failed "$DISTRIBUTION_BUCKET_STATUS"; then @@ -463,9 +437,9 @@ if [[ "$BUILD_TYPE" == "nightly" ]]; then elif [[ "$BUILD_TYPE" == "stable" ]]; then run_stage UPDATE_STABLE_STATUS UPDATE_STABLE_DURATION append merge_tag_into_branch "$BUILD_REF" stable # Remove files we don't want to distribute and zip SQLite and Parquet outputs - run_stage CLEAN_UP_OUTPUTS_STATUS CLEAN_UP_OUTPUTS_DURATION append clean_up_outputs_for_distribution - exit_on_stage_failure "$CLEAN_UP_OUTPUTS_STATUS" - # Copy cleaned up outputs to the S3 and GCS distribution buckets + run_stage PREP_OUTPUTS_STATUS PREP_OUTPUTS_DURATION append prep_outputs_for_distribution + exit_on_stage_failure "$PREP_OUTPUTS_STATUS" + # Copy ed up outputs to the S3 and GCS distribution buckets run_stage DISTRIBUTION_BUCKET_STATUS DISTRIBUTION_BUCKET_DURATION append upload_stable_distribution # This is a versioned release. Ensure that outputs can't be accidentally deleted. # We can only do this on the GCS bucket, not S3 @@ -481,8 +455,8 @@ elif [[ "$BUILD_TYPE" == "stable" ]]; then elif [[ "$BUILD_TYPE" == "workflow_dispatch" ]]; then # Remove files we don't want to distribute and zip SQLite and Parquet outputs - run_stage CLEAN_UP_OUTPUTS_STATUS CLEAN_UP_OUTPUTS_DURATION append clean_up_outputs_for_distribution - exit_on_stage_failure "$CLEAN_UP_OUTPUTS_STATUS" + run_stage PREP_OUTPUTS_STATUS PREP_OUTPUTS_DURATION append prep_outputs_for_distribution + exit_on_stage_failure "$PREP_OUTPUTS_STATUS" # Disable the test upload to the distribution bucket for now to avoid egress fees # and speed up the build. Uncomment if you need to test the distribution upload. @@ -521,18 +495,18 @@ rm -f ~/.aws/credentials # Notify slack about entire pipeline's success or failure; if ! any_stage_failed \ - "$FERC_TO_SQLITE_STATUS" \ - "$PUDL_ETL_STATUS" \ + "$DAGSTER_STATUS" \ "$UNIT_TEST_STATUS" \ "$INTEGRATION_TEST_STATUS" \ + "$DATA_VALIDATION_STATUS" \ "$WRITE_DATAPACKAGE_STATUS" \ "$SAVE_OUTPUTS_STATUS" \ "$UPDATE_NIGHTLY_STATUS" \ "$UPDATE_STABLE_STATUS" \ - "$CLEAN_UP_OUTPUTS_STATUS" \ + "$PREP_OUTPUTS_STATUS" \ "$DISTRIBUTION_BUCKET_STATUS" \ - "$TRIGGER_DATA_VIEWER_DEPLOY_STATUS" \ - "$GCS_TEMPORARY_HOLD_STATUS"; then + "$GCS_TEMPORARY_HOLD_STATUS" \ + "$TRIGGER_DATA_VIEWER_DEPLOY_STATUS"; then notify_slack "success" else notify_slack "failure" diff --git a/docs/dev/clone_ferc1.rst b/docs/dev/clone_ferc1.rst index 83d25b9629..361b09e622 100644 --- a/docs/dev/clone_ferc1.rst +++ b/docs/dev/clone_ferc1.rst @@ -32,17 +32,56 @@ If you need to work with this relatively unprocessed data, we highly recommend downloading it from one of our stable data releases or nightly build outputs, which can be found in the PUDL :ref:`access-zenodo` or :ref:`access-cloud`. -Cloning the original FERC database is the first step in the PUDL ETL process. This can -be done using the dagster UI (see :ref:`run-dagster-ui`) or with the ``ferc_to_sqlite`` -script (see :ref:`run-cli`). +The conversion of the raw FERC data is represented in the PUDL Dagster project by the +``raw_ferc_to_sqlite`` asset group which is defined in :data:`pudl.etl.defs`. If you +only need the raw FERC SQLite (or experimental DuckDB) outputs, use the dedicated +``ferc_to_sqlite`` job. If you are running the full ETL from scratch, use the +``pudl_with_ferc_to_sqlite`` job, which also includes FERC SQLite assets. + +The raw FERC conversion flow within ``ferc_to_sqlite`` looks like this: + +.. mermaid:: + + flowchart TD + A[FERC archives in Zenodo datastore] --> B[raw_ferc_to_sqlite assets] + + B --> C[DBF extraction
1994-2020] + B --> D[XBRL extraction
2021-present] + + C --> E[ferc1_dbf.sqlite] + + D --> F[ferc1_xbrl.sqlite] + D --> G[ferc1_xbrl.duckdb
(experimental)] + D --> H[ferc1_xbrl_datapackage.json] + D --> I[ferc1_xbrl_taxonomy_metadata.json] + + E --> J[Clean FERC tables in PUDL
1994-present] + F --> J + H --> J + I --> J + +The separation between FERC and PUDL is intentional: the raw FERC conversion step +produces large source databases with hundreds of tables and thousands of columns that +can be reused across multiple downstream PUDL runs (and external applications), while +the ``pudl`` job standardizes and cleans a smaller subset of those tables in the main +PUDL data products. + +The conversion can be done using the Dagster UI (see :ref:`run-dagster-ui`) or the +Dagster CLI (see :ref:`run-cli`). For local command-line usage, options include: + +.. code-block:: console + + $ pixi run ferc-to-sqlite + $ pixi run dg launch --job ferc_to_sqlite --config src/pudl/package_data/settings/dg_full.yml .. note:: We recommend using the Dagster UI to execute the ETL as it provides additional functionality for re-execution and viewing dependences. -Executing a ``ferc_to_sqlite`` job will create several outputs in your ``$PUDL_OUTPUT`` -directory. For example the FERC Form 1 outputs will include: +Executing either the ``ferc_to_sqlite`` job or the full ``pudl_with_ferc_to_sqlite`` job +will create several outputs in your ``$PUDL_OUTPUT`` directory. For example the FERC +Form 1 outputs will include: * ``$PUDL_OUTPUT/ferc1_dbf.sqlite``: Data from 1994-2020 (FoxPro/DBF) * ``$PUDL_OUTPUT/ferc1_xbrl.sqlite``: Data from 2021 onward (XBRL) @@ -53,5 +92,8 @@ directory. For example the FERC Form 1 outputs will include: * ``pudl_output/ferc1_xbrl_taxonomy_metadata.json``: A JSON version of the XBRL Taxonomy, containing additional metadata. -By default, the script pulls in all available years and tables of data. The -``ferc_to_sqlite`` jobs also extract the XBRL data for FERC Form 1, 2, 6, 60 and 714. +By default, the ``ferc_to_sqlite`` job converts all available years and tables of data +for all available FERC Data, which includes Forms 1, 2, 6, 60 and 714. You can also +choose to materialize any combination of form an format (DBF or XBRL). Note that the +earlier FERC 714 data (2006-2020) is distributed as CSVs, and PUDL does not yet load +all available tables. diff --git a/docs/dev/dev_setup.rst b/docs/dev/dev_setup.rst index 2031ce45b5..3b61b62b97 100644 --- a/docs/dev/dev_setup.rst +++ b/docs/dev/dev_setup.rst @@ -109,6 +109,13 @@ command in the pixi environment. There's additional information about running tests in the :doc:`testing` documentation. +Once your environment is installed, a good quick sanity check is to confirm that the +Dagster project loads successfully: + +.. code-block:: console + + $ pixi run dg check defs --verbose + ------------------------------------------------------------------------------- Updating the PUDL Development Environment ------------------------------------------------------------------------------- @@ -191,7 +198,7 @@ project codebase. This is known as `code linting makes the code easier to read, and helps catch some types of bugs before they are committed. These tools are part of the PUDL pixi environment and their configuration files are checked into the GitHub repository. If you've cloned the pudl repo and are -working inside the pudl conda environment, they should be installed and ready to go. +working inside the PUDL pixi environment, they should be installed and ready to go. Git Pre-commit Hooks ^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/dev/existing_data_updates.rst b/docs/dev/existing_data_updates.rst index 3a7539b171..237c72feff 100644 --- a/docs/dev/existing_data_updates.rst +++ b/docs/dev/existing_data_updates.rst @@ -462,11 +462,12 @@ run all the integration tests against your live PUDL DB with: .. code-block:: console - $ pixi run pytest-integration-full + $ pixi run pytest-integration-nightly + $ pixi run pytest-data-validation-nightly -We expect ``test/integration/dbt_test.py::test_dbt`` to fail at this point, but -everything else should pass. Fix any remaining failures and we'll fix dbt in the next -step. +We expect ``test/integration/data_validation_test.py::test_dbt`` to fail at this point, +but everything else should pass. Fix any remaining failures and we'll fix dbt in the +next step. **9.2)** When the non-dbt integration tests are passing against all years of data, sanity check the data in the database and the derived outputs by running diff --git a/docs/dev/index.rst b/docs/dev/index.rst index 9a53dd432d..42e24a54e1 100644 --- a/docs/dev/index.rst +++ b/docs/dev/index.rst @@ -7,7 +7,7 @@ Developer Guide dev_setup run_the_etl - dev_dagster + troubleshooting_dagster project_management testing data_validation_quickstart diff --git a/docs/dev/nightly_data_builds.rst b/docs/dev/nightly_data_builds.rst index 384556b324..7fc304ebe5 100644 --- a/docs/dev/nightly_data_builds.rst +++ b/docs/dev/nightly_data_builds.rst @@ -4,15 +4,17 @@ Nightly Data Builds =============================================================================== -The complete ETL and tests are run each night on a Google Compute Engine (GCE) instance +The complete ETL and tests are run each night on a Google Batch-managed VM to ensure that any new changes merged into ``main`` are fully tested. These complete builds also enable continuous deployment of PUDL's data outputs. If no changes have been merged into ``main`` since the last time the builds ran, the builds are skipped. The builds are kicked off by the ``build-deploy-pudl`` GitHub Action, which builds and pushes a Docker image with PUDL installed to `Docker Hub `__ -and deploys the image as a container to a GCE instance. The container runs the ETL and -tests, then copies the outputs to a public AWS S3 bucket for distribution. +and then launches a Google Batch job using that image. Inside the container, +``docker/gcp_pudl_etl.sh`` runs the ETL and tests, saves the raw build outputs to +``gs://builds.catalyst.coop``, and if successful publishes the distributable outputs to +our public cloud buckets. Breaking the Builds ------------------- @@ -21,12 +23,20 @@ tests. When they pass, we consider the results fit for public consumption. The are expected to pass. If they don't then someone needs to take responsibility for getting them working again with some urgency. -Because of how long the full build & tests take, we don't typically run them -individually before merging every PR into ``main``. However, running ``make nuke`` -(roughly equivalent to the full builds) is recommended when you've added a new year of -data or made other changes that would be expected to break the data validations, so that -the appropriate changes can be made prior to those changes hitting ``main`` and the -nightly builds. +Because of how long the full build and tests take, we don't typically run them +individually before merging every PR into ``main``. However, if you've added a new year +of data or made changes that are likely to affect the full ETL or data validations, it +is often worth running a full local build that resembles the nightly builds: + +.. code-block:: console + + $ pixi run pudl-with-ferc-to-sqlite + $ pixi run pytest-nightly + +For local development, we recommend ``pixi run pudl-with-ferc-to-sqlite`` rather than +``pixi run pudl-with-ferc-to-sqlite-nightly``. The nightly ETL task uses the same asset +graph and datasets, but it is tuned for the higher-resource nightly build environment +and enables more verbose logging. If your PR causes the build to fail, you are probably the best person to fix the problem, since you already have context on all of the changes that went into it. @@ -59,55 +69,49 @@ occurred: The GitHub Action ----------------- The ``build-deploy-pudl`` GitHub action contains the main coordination logic for -the Nightly Data Builds. The action is triggered every night and when new tags are -pushed to the PUDL repository. This way, new data outputs are automatically updated -on code releases, and PUDL's code and data are tested every night. The action is -modeled after an `example from the setup-gcloud GitHub action repository `__. +the Nightly Data Builds. The action is triggered every night and when new versioned +release tags are pushed to the PUDL repository. This way, new data outputs are +automatically updated for releases, and PUDL's code and data are tested every night. The ``gcloud`` command in ``build-deploy-pudl`` requires certain Google Cloud -Platform (GCP) permissions to start and update the GCE instance. We use Workflow +Platform (GCP) permissions to start and update the Google Batch VM. We use Workflow Identity Federation to authenticate the GitHub Action with GCP in the GitHub Action workflow. Deployment Action ----------------- -The ``deploy-pudl`` action separates deployment from the build process. This action -takes a git tag as an input and will find build outputs, and determine the deployment -type (stable or nightly) from the tag. It will then upload outputs from the build to -GCS and S3, update the git branch associated with the deployment type, and trigger -a zenodo release. This action can also take an optional ``staging`` flag will upload -outputs to a dedicated staging area, and will not update the git branch or trigger a -zenodo release. +The experimental ``deploy-pudl`` action separates deployment from the build process. +This action takes a git tag that has already been built as an input and will find the +corresponding build outputs and determine the deployment type (``stable`` or +``nightly``) from the tag. It will then upload outputs from the build to GCS and S3, +update the git branch associated with the deployment type, and trigger a zenodo release. +This action can also take an optional ``staging`` flag will upload outputs to a +dedicated staging area, and will not update the git branch or trigger a Zenodo release. Eventually, the deployment functionality will be removed from the ``build-deploy-pudl`` action and it will instead trigger this action at the end of a successful build. Google Compute Engine --------------------- -The PUDL image is deployed on a `Container Optimized GCE -`__ -instance, a type of virtual machine (VM) built to run containers. - We use ephemeral VMs created with `Google Batch `__ to run the nightly builds. Once the build has finished -- successfully or not -- the VM -is shut down. The build VMs use the ``e2-highmem-8`` machine type (8 CPUs and 64GB of -RAM) to accommodate the PUDL ETL's memory-intensive steps. Currently, these VMs do not -have swap space enabled, so if they run out of memory, the build will immediately +shuts itself down. The build VMs use the ``e2-highmem-8`` machine type (8 CPUs and 64GB +of RAM) to accommodate the PUDL ETL's memory-intensive steps. Currently, these VMs do +not have swap space enabled, so if they run out of memory, the build will immediately terminate. The ``deploy-pudl-vm-service-account`` service account has permissions to: 1. Write logs to Cloud Logging. -2. Start and stop the VM so the container can shut the instance off when the ETL - is complete, so Catalyst does not incur unnecessary charges. -3. Bill the ``catalyst-cooperative-pudl`` project for egress fees from accessing - the ``zenodo-cache.catalyst.coop`` bucket. Note: The ``catalyst-cooperative-pudl`` - won't be charged anything because the data stays within Google's network. +2. Start and stop the VM so the container can shut the instance off when the ETL is + complete, so Catalyst does not incur unnecessary charges. +3. Bill the ``catalyst-cooperative-pudl`` project for egress fees from accessing the + ``zenodo-cache.catalyst.coop`` bucket. Note: The ``catalyst-cooperative-pudl`` won't + be charged anything because the data stays within Google's network. 4. Write logs and build outputs to the ``gs://builds.catalyst.coop``, - ``gs://pudl.catalyst.coop`` and ``s3://pudl.catalyst.coop`` buckets. - Egress and storage costs for the S3 bucket are covered by - `Amazon Web Services's Open Data Sponsorship Program - `__. + ``gs://pudl.catalyst.coop`` and ``s3://pudl.catalyst.coop`` buckets. Egress and + storage costs for the S3 bucket are covered by `Amazon Web Services's Open Data + Sponsorship Program `__. Build outputs and logs are saved to the ``gs://builds.catalyst.coop`` bucket so you can access them later. Build logs and outputs are retained for 30 days and then deleted @@ -120,16 +124,26 @@ are configured to run the ``docker/gcp_pudl_etl.sh`` script. This script: 1. Notifies the ``pudl-deployments`` Slack channel that a deployment has started. Note: if the container is manually stopped, slack will not be notified. -2. Runs the ETL and full test suite. -3. Copies the outputs and logs to a directory in the ``gs://builds.catalyst.coop`` +2. Runs ``pixi run pudl-with-ferc-to-sqlite-nightly``. +3. Runs ``pixi run pytest-unit-nightly``, ``pixi run pytest-integration-nightly``, + and ``pixi run pytest-data-validation-nightly`` as separate stages. +4. Copies the outputs and logs to a directory in the ``gs://builds.catalyst.coop`` bucket. The directory is named using the git SHA of the commit that launched the build. -4. Copies the outputs to the ``gs://pudl.catalyst.coop`` and ``s3://pudl.catalyst.coop`` +5. Copies the outputs to the ``gs://pudl.catalyst.coop`` and ``s3://pudl.catalyst.coop`` buckets if the ETL and test suite run successfully. -5. Notifies the ``pudl-deployments`` Slack channel with the final build status. +6. Notifies the ``pudl-deployments`` Slack channel with the final build status, + including per-stage status and durations. + +The ``gcp_pudl_etl.sh script`` is only intended to run on a Google Batch VM with +adequate permissions. -The ``gcp_pudl_etl.sh script`` is only intended to run on a GCE VM with adequate -permissions. +The nightly ETL task, nightly pytest tasks, and the nightly build script all share the +same ``DG_NIGHTLY_CONFIG`` environment variable, which points at +``src/pudl/package_data/settings/dg_nightly.yml`` relative to the repository root. +Using a repo-relative path avoids a second hard-coded config path for the container and +keeps the nightly build behavior centralized in pixi while still allowing local runs to +reuse the nightly pytest commands. How to access the nightly build outputs from AWS ------------------------------------------------ @@ -169,14 +183,14 @@ command, which behaves very much like the Unix ``cp`` command: .. code:: - gcloud cp s3://pudl.catalyst.coop/nightly/pudl.sqlite.zip ./ + gcloud storage cp s3://pudl.catalyst.coop/nightly/pudl.sqlite.zip ./ If you wanted to download all of the build outputs (more than 25GB!) you can use a recursive copy: .. code:: - gcloud cp --recursive s3://pudl.catalyst.coop/nightly/ ./ + gcloud storage cp --recursive s3://pudl.catalyst.coop/nightly/ ./ How to access the nightly build outputs and logs (for the Catalyst team only) ----------------------------------------------------------------------------- @@ -237,7 +251,7 @@ like this: gcloud storage ls --long --readable-sizes gs://builds.catalyst.coop/2024-11-15-0603-60f488239-main - 6.60MiB 2024-11-15T13:28:20Z gs://builds.catalyst.coop/2024-11-15-0603-60f488239-main/2024-11-15-0603-60f488239-main-pudl-etl.log + 6.60MiB 2024-11-15T13:28:20Z gs://builds.catalyst.coop/2024-11-15-0603-60f488239-main/2024-11-15-0603-60f488239-main.log 804.57MiB 2024-11-15T12:40:35Z gs://builds.catalyst.coop/2024-11-15-0603-60f488239-main/censusdp1tract.sqlite 759.32MiB 2024-11-15T12:41:01Z gs://builds.catalyst.coop/2024-11-15-0603-60f488239-main/ferc1_dbf.sqlite 1.19GiB 2024-11-15T12:41:12Z gs://builds.catalyst.coop/2024-11-15-0603-60f488239-main/ferc1_xbrl.sqlite diff --git a/docs/dev/pudl_id_mapping.rst b/docs/dev/pudl_id_mapping.rst index 999faf0554..8d8c940e92 100644 --- a/docs/dev/pudl_id_mapping.rst +++ b/docs/dev/pudl_id_mapping.rst @@ -104,7 +104,7 @@ just the script that extracts the unmapped IDs with: .. code-block:: console - $ pixi run pytest test/integration/glue_test.py --live-dbs --save-unmapped-ids + $ pixi run pytest test/integration/glue_test.py --live-pudl-output --save-unmapped-ids The ``--save-unmapped-ids`` flag saves unmapped plants and utilities in the ``devtools/ferc1-eia-glue`` folder by default. @@ -192,9 +192,9 @@ notebook to speed up this process. capacity first to see if the facilities are the same. If that’s indeterminate, you can Google the plant to see if it has the same location or if there is ownership or construction history that helps determine if the facilities are the same or - co-located. Presuming you've run the ETL with the ``--ignore-foreign-key-constraints`` - flag, you can also look at the PUDL ``plants_eia860`` and ``plants_all_ferc1`` tables - to compare the records' location information. + co-located. Once you've run the ETL, you can also look at the PUDL + ``plants_eia860`` and ``plants_all_ferc1`` tables to compare the records' + location information. Mapping Utilities ^^^^^^^^^^^^^^^^^ @@ -248,7 +248,7 @@ live PUDL DB, and all of the new FERC 1 data loaded into your cloned FERC 1 DB: .. code-block:: console - $ pytest --live-dbs test/integration/glue_test.py + $ pytest --live-pudl-output test/integration/glue_test.py Integrating Newly Mapped Records into PUDL ------------------------------------------ diff --git a/docs/dev/run_the_etl.rst b/docs/dev/run_the_etl.rst index df6d8a750d..d3d7433414 100644 --- a/docs/dev/run_the_etl.rst +++ b/docs/dev/run_the_etl.rst @@ -6,31 +6,40 @@ Running the ETL Pipeline So you want to run the PUDL data processing pipeline? This is the most involved way to get access to PUDL data. It's only recommended if you want to edit the ETL process -or contribute to the code base. Check out the :doc:`/data_access` documentation if you -just want to use already processed data. +or contribute to the codebase. Check out the :doc:`/data_access` documentation if you +just want to use the data we process and distribute. These instructions assume you have already gone through the :ref:`dev_setup`. +Alembic +------- + +PUDL uses `Alembic `__ to manage the creation our +database and migrations of the schema as it changes over time. However, we only use +file-based databases (SQLite, DuckDB) and these migrations are mostly a way to allow +us to change the schema without needing to repopulate the entire database from scratch. +They are not used in production. + Database initialization ------------------------ +^^^^^^^^^^^^^^^^^^^^^^^ Before we run anything, we'll need to make sure that the schema in the database -actually matches the schema in the code - run ``alembic upgrade head`` to create -the database with the right schema. If you already have a ``pudl.sqlite`` you'll -need to delete it first. +actually matches the schema defined by the code. Run ``pixi run alembic upgrade head`` +to create the database with the right schema. If you already have a ``pudl.sqlite`` +you'll probably need to delete it first. Database schema migration -------------------------- - -If you've changed the database schema, you'll need to make a migration for that -change and apply that migration to the database to keep the database schema up- -to-date: +^^^^^^^^^^^^^^^^^^^^^^^^^ +If you've changed the database schema locally (by renaming a column, adding a table, +defining a new primary key, changing a datatype, etc.), you'll need to make a migration +reflecting that change and apply the migration to the database to keep the database +schema synchronized with the code: .. code-block:: bash - $ alembic revision --autogenerate -m "Add my cool table" - $ alembic upgrade head + $ pixi run alembic revision --autogenerate -m "Add my cool table" + $ pixi run alembic upgrade head $ git add migrations $ git commit -m "Migration: added my cool table" @@ -56,25 +65,97 @@ More information can be found in the `Alembic docs Dagster ------- -PUDL uses `Dagster `__ to orchestrate its data pipelines. Dagster -makes it easy to manage data dependences, parallelize processes, cache results -and handle IO. If you are planning on contributing to PUDL, it is recommended you -read through the `Dagster Docs `__ to -familiarize yourself with the tool's main concepts. -^^^^^^^^^^^^^^^^^ -``dg`` quickstart -^^^^^^^^^^^^^^^^^ +Catalyst uses `Dagster `__ to manage our data pipelines. +Dagster is an open source data orchestration framework written in Python. It makes it +easy to manage data dependences, parallelize processes, cache results and handle IO. + +If you are interested in contributing to PUDL, you may want to familiarize yourself with +Dagster's excellent documentation: + +* `Getting Started (open source) `__ +* `Dagster Core Concepts `__ +* `Dagster Basics Tutorial `__ +* `Dagster Essentials `__ (Dagster Course) + +If you use coding agents, you may also want to check out `the Dagster agent skills +`__: + +* `dagster-expert `__ +* `dignified-python `__ +* `AI Driven Data Engineering `__ (Dagster Course) -PUDL is configured as a ``dg`` project. ``dg`` is Dagster's official CLI. It can run -most if not all of the tasks managed through the UI. +These skills are also configured in the PUDL repo and can be installed with this pixi +task (which uses `npx skill `__). + +.. code-block:: console + + $ pixi run install-skills + +Because Dagster's documentation is extensive and constantly being updated, the rest of +this section will focus only on the specifics of the PUDL project, with links to the +Dagster docs for more info. + +Core Dagster concepts used in PUDL +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* **`The Dagster UI `__** is used for + monitoring and executing data processing runs interactively through a web app. +* **`Assets `__** are the + primary building blocks in Dagster. They represent the underlying entities in our + pipelines, such as database tables or machine learning models. In PUDL, most assets + represent a :py:class:`pandas.DataFrame` that is written to Parquet and SQLite files + on disk. Depending on which part of the PUDL DAG you are looking at, assets might + represent messy raw dataframes extracted from spreadsheets, partially cleaned + intermediary dataframes, or fully normalized tables ready for distribution. +* **`IO Managers `__** in Dagster let + us keep the code for data processing separate from the code for reading and writing + data. PUDL defines I/O Managers for reading data out of the FERC SQLite databases we + curate, for reading and writing Parquet files, and for writing out to SQLite. For + example :class:`pudl.io_managers.PudlMixedFormatIOManager` allows assets to read and + write dataframes to SQLite and Parquet-backed outputs using a single logical + interface. +* **`Resources `__** are + objects used by Dagster assets to provide access to external systems, databases, or + services. In PUDL, we've defined a :py:class:`pudl.workspace.datastore.Datastore` + Resource that pulls our raw input data from `archives on Zenodo + `__ identified by DOI. The + :py:class:`pudl.workspace.datastore.ZenodoDoiSettings` Resource defines the current + Zenodo DOI for each dataset. We also store our dataset-specific ETL settings (like + what years of EIA-861 data to process) in a Resource + :py:class:`pudl.resources.PudlEtlSettingsResource`. +* **`Jobs `__** are preconfigured collections + of assets, resources and IO Managers. Jobs are the main unit of execution in Dagster. + The main jobs defined in :mod:`pudl.etl` are: + + - ``ferc_to_sqlite`` to rebuild the raw FERC prerequisite databases only. + - ``pudl`` to run the main PUDL ETL assuming those raw FERC databases already exist. + - ``pudl_with_ferc_to_sqlite`` to run the full end-to-end build in one Dagster job. + - ``ferceqr`` a DuckDB based pipeline to process the very large FERC EQR dataset. + +* **`Definitions `__** + are the top-level collection of Dagster objects that get loaded into a code location. + They bundle together the assets, asset checks, resources, jobs, schedules, and + sensors that Dagster can see and execute. In PUDL, the main Dagster definitions live + in :mod:`pudl.etl` and are exposed via :data:`pudl.etl.defs`. +* **`Configs `__** + are the runtime settings passed to Dagster jobs, assets, and resources to control + what gets executed and how. In PUDL, we usually store these settings in YAML files + like ``dg_fast.yml``, ``dg_full.yml``, ``dg_pytest.yml``, and ``dg_nightly.yml``, + which configure execution options and shared resources like ``etl_settings``. + +The Dagster CLI: ``dg`` +^^^^^^^^^^^^^^^^^^^^^^^ + +PUDL is configured as a ``dg`` project. ``dg`` is Dagster's official CLI. It can +perform many of the same actions managed through the Dagster UI. Some examples: .. code-block:: console # Start up the Dagster UI webserver and daemons $ pixi run dg dev # Launch a full job with its default config - $ pixi run dg launch --job etl_fast + $ pixi run dg launch --job pudl # Select a subset of assets to materialize $ pixi run dg launch --assets "group:raw_eia861" # List all of the Dagster definitions @@ -83,118 +164,15 @@ most if not all of the tasks managed through the UI. For full ``dg`` CLI documentation and options, see the Dagster docs: `dg CLI reference `__. -There are a handful of Dagster concepts worth understanding prior -to interacting with the PUDL data processing pipeline: - -Dagster UI -^^^^^^^^^^ - -`The Dagster UI `__ -is used for monitoring and executing ETL runs. - -Software Defined Assets (SDAs) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - *An asset is an object in persistent storage, such as a table, file, or - persisted machine learning model. A software-defined asset is a Dagster object that - couples an asset to the function and upstream assets that are used to produce - its contents.* - -`SDAs `__ -or "assets", are the computation building blocks in a Dagster project. -Assets are linked together to form a direct acyclic graph (DAG) which can -be executed to persist the data created by the assets. In PUDL, each asset -is a dataframe written to SQLite or parquet files. Assets in PUDL can be -raw extracted dataframes, partially cleaned tables or fully normalized -tables. - -SDAs are created by applying the ``@asset`` decorator to a function. - -The main PUDL ETL is composed of assets. Assets can be "materialized", which -means running the associated functions and writing the output to disk -somewhere. When you are running the main PUDL ETL, you are **materializing -assets**. - -Operations (Ops): -^^^^^^^^^^^^^^^^^ - -`Ops `__ are functions -that are run in a graph. They are not linked to assets, and are a lower-level -building block for orchestrating data processing pipelines. - -Due to some limitations of the asset model, we need to use bare ops for the -FERC-to-SQLite workflow. When you are running that phase, you are **launching a -job run**. - -IO Managers: -^^^^^^^^^^^^ - - *IO Managers are user-provided objects that store asset outputs - and load them as inputs to downstream assets.* - -Each asset has an `IO Manager -`__ that tells -Dagster how to handle the objects returned by the software defined asset's -underlying function. The IO Managers in PUDL read and write dataframes to and -from sqlite, pickle and parquet files. For example, the -:func:`pudl.io_managers.pudl_sqlite_io_manager` allows assets to read and write -dataframes and execute SQL statements. - -Resources: -^^^^^^^^^^ - -`Resources `__ are objects -that can be shared across multiple software-defined assets. -For example, multiple PUDL assets use the :func:`pudl.resources.datastore` -resource to pull data from PUDL's raw data archives on Zenodo. - -Generally, inputs to assets should either be other assets or -python objects in Resources. - -Jobs -^^^^ -`Jobs `__ -are preconfigured collections of assets, resources and IO Managers. -Jobs are the main unit of execution in Dagster. For example, -the ``etl_fast`` job defined in :mod:`pudl.etl` executes the -FERC, EIA and EPA CEMS pipelines for the most recent year. - -Definitions -^^^^^^^^^^^ -`Definitions `__ -are collections of assets, resources, IO managers and jobs that can -be loaded into the dagster UI and executed. Definitions can have multiple -preconfigured jobs. For example, the ``pudl.ferc_to_sqlite`` definition -contains ``etl_fast`` and ``etl_full`` jobs. - -There are two main Definitions in the PUDL processing pipeline: - -1. :func:`pudl.ferc_to_sqlite.defs` :doc:`converts the FERC Form 1, 2, 6, 60 and - 714 DBF/XBRL files ` into `SQLite `__ - databases so that the data are easier to extract, and so all of the raw FERC - data is available in a modern format. You must run a job in this definition - before you can execute a job in :func:`pudl.etl.defs`. -2. :func:`pudl.etl.defs` coordinates the "Extract, Transform, Load" process that - processes 20+ years worth of data from the FERC Form 1 database, dozens of EIA - spreadsheets, and the thousands of CSV files that make up the EPA CEMS hourly - emissions data into a clean, well normalized SQLite database (for the FERC and - EIA data), and an `Apache Parquet `__ dataset that - is partitioned by state and year (for the EPA CEMS). - -Both definitions have two preconfigured jobs: - - ``etl_fast`` processes one year of data - - ``etl_full`` processes all years of data - .. _run-dagster-ui: Running the ETL via the Dagster UI ---------------------------------- -Dagster needs a directory to store run logs and some interim assets. We don't -distribute these outputs, so we want to store them separately from -``PUDL_OUTPUT``. Create a new directory outside of the pudl repository -directory called ``dagster_home/``. Then set the ``DAGSTER_HOME`` environment -variable to the path of the new directory: +Dagster needs a directory to store run logs system state, and interim assets that are +not written to Parquet or SQLite for distribution. Create a new directory +**outside of your cloned PUDL repository** and then define an environment variable +named ``DAGSTER_HOME`` to the path of the new directory. E.g. .. code-block:: console @@ -221,117 +199,49 @@ Once ``DAGSTER_HOME`` is set, launch the dagster UI by running: `__ will be saved to a temporary directory that is deleted when the ``dagster`` process exits. -This will launch the dagster UI at http://localhost:3000/. You should see -a window that looks like this: - -.. image:: ../images/dagster_ui_home.png - :width: 800 - :alt: Dagster UI home +This will launch the Dagster UI on `localhost:3000 `__. See the +`Dagster UI docs `__` for all the +details on how to use the UI. -Click the hamburger button in the upper left to view the definitions, -assets and jobs. - -^^^^^^^^^^^^^^^^^^^^^^^^^^ Cloning the FERC databases ^^^^^^^^^^^^^^^^^^^^^^^^^^ -To run the data pipelines, you'll first need to create the raw FERC databases by -clicking on one of the ``pudl.ferc_to_sqlite`` jobs. Then select "Launchpad" -where you can adjust the years to extract for each dataset. Then click -"Launch Run" in the lower right hand corner of the window. The UI will -take you to a new window that provides information about the status of -the job. The bottom part of the window contains dagster logs. You can -view logs from the ``pudl`` package in the CLI window where ``dg dev`` -is running. - -You can adjust the years to process for each dataset using the Launchpad tab:: - - resources: - ferc_to_sqlite_settings: - config: - ferc1_dbf_to_sqlite_settings: - years: - - 2020 - - 2019 - - 2018 - ferc1_xbrl_to_sqlite_settings: - years: - - 2021 - ferc2_xbrl_to_sqlite_settings: - years: - - 2021 - ferc60_xbrl_to_sqlite_settings: - years: - - 2021 - ferc6_xbrl_to_sqlite_settings: - years: - - 2021 - ferc714_xbrl_to_sqlite_settings: - years: - - 2021 - -.. note:: +The raw FERC SQLite databases are part of the ``raw_ferc_to_sqlite`` asset group. If +you only need those outputs, select the ``ferc_to_sqlite`` job and hit ``Materialize +All``, or you can select the specific FERC Form you actually need. If you want to run +the whole ETL from scratch, use the ``pudl_with_ferc_to_sqlite`` job. The ``pudl`` job +is intended for day-to-day development once compatible raw FERC outputs have been +materialized locally. See :doc:`/dev/clone_ferc1` for more background on this process. - We are experimenting with producing DuckDB outputs from the XBRL (and possibly DBF) - data that FERC publishes. For the time being, ``ferc_to_sqlite`` will produce both SQLite - and DuckDB outputs by default. - -^^^^^^^^^^^^^^^^^^^^ Running the PUDL ETL ^^^^^^^^^^^^^^^^^^^^ -Once the raw FERC databases are created by a ``pudl.ferc_to_sqlite`` job, -you can execute the main PUDL ETL. - -.. note:: - - Make sure you've extracted the raw FERC years you are planning to process - with the main PUDL ETL. Jobs in the ``pudl.etl`` definition will fail if - the raw FERC databases are missing requested years. For example, if you want - to process all years available in the ``pudl.etl`` definition make sure - you've extracted all years of the raw FERC data. - -Select one of the ``pudl.etl`` jobs. -This will bring you to a window that displays all of the asset dependencies -in the ``pudl.etl`` definition. Subsets of the ``pudl.etl`` asset graph -are organized by asset groups. These groups are helfpul for visualizing and +For most day-to-day development, you will want to select the ``pudl`` job. This will +bring you to a window that displays all of the assets and their dependencies. Subsets +of the asset graph are organized by asset groups, which are helpful for visualizing and executing subsets of the asset graph. -To execute the job, select ``etl_fast`` or ``etl_full`` and click "Materialize all". -You can configure which years to process by shift+clicking "Materialize all". -Read the :ref:`resource_config` section to learn more. -To view the status of the run, click the date next to "Latest run:". +To execute the whole ``pudl`` job end-to-end click "Materialize all". Depending on how +many CPUs and how much memory your computer has, this may take hours. On an M1 Macbook +Pro with 32GB of RAM and 10 CPUs it takes about 90 minutes. To run the full ETL you'll +need at least 16GB of RAM. -.. image:: ../images/dagster_ui_pudl_etl.png - :width: 800 - :alt: Dagster UI pudl_etl +Read the +:ref:`resource_config` section to learn more. To view the status of the run, click the +date next to "Latest run:". -You can also re-execute specific assets by selecting one or -multiple assets in the "Overview" tab and clicking "Materialize selected". -This is helpful if you are updating the logic of a specific asset and don't -want to rerun the entire ETL. +You can also re-execute specific assets by selecting one or multiple assets in the +"Overview" tab and clicking "Materialize selected". This is helpful if you are updating +the logic of a specific asset and don't want to rerun the entire ETL. .. note:: - Dagster does not allow you to select asset groups for a specific job. For example, if - you click on the ``raw_eia860`` asset group in the Dagster UI click "Materialize All", - the default configuration values will be used so all available years of the data will - be extracted. - To process a subset of years for a specific asset group, select the asset group, - shift+click "Materialize all" and configure the ``dataset_settings`` resource with the + shift+click "Materialize all" and configure the ``etl_settings`` resource with the desired years. -.. note:: - - Dagster will throw an ``DagsterInvalidSubsetError`` if you try to - re-execute a subset of assets produced by a single function. This can - be resolved by re-materializing the asset group of the desired asset. +See :ref:`troubleshooting_dagster` for tips on how to fix common issues we run into. -Read the :ref:`dev_dagster` documentation page to learn more about working -with dagster. - -^^^^^^^^^^^^^^^^^^^^^^^^^^ Running the FERC EQR ETL ^^^^^^^^^^^^^^^^^^^^^^^^^^ All processing for FERC EQR data is contained in a separate ETL from the @@ -341,7 +251,7 @@ to access private cloud storage containing the raw data. Any external contributors interested in working on this ETL should contact the Catalyst team to set up access to the raw data. -The FERC EQR ETL is contained in a Dagster job called ``ferceqr_etl``. +The FERC EQR ETL is contained in a Dagster job called ``ferceqr``. Executing this job from the Dagster UI is slightly different from the main PUDL ETL jobs because the EQR job uses Dagster partitions. After selecting "Materialize All" (or "Materialize selected" for a selection of assets), @@ -368,8 +278,8 @@ out-of-memory issues while running many quarters in one ``backfill``. .. _run-cli: -Running the ETL with CLI Commands ---------------------------------- +Running the ETL via CLI +----------------------- The ``dg`` command line interface is Dagster's official tool and has a ton of built-in functionality. For full documentation see the Dagster docs: @@ -384,15 +294,14 @@ launching runs: $ pixi run dg check defs --verbose $ pixi run dg list defs -You can also kick off full jobs with their default configuration using ``dg launch``. -The Dagster UI does not need to be running for this to work, but if it is running, -you'll see the run appear in it. +You can also kick off jobs directly with ``dg launch``. The Dagster UI does not need to +be running for this to work, but if it is running, you'll see the run appear in it. .. code-block:: console $ pixi run dg launch --job ferc_to_sqlite - $ pixi run dg launch --job etl_fast - $ pixi run dg launch --job etl_full + $ pixi run dg launch --job pudl + $ pixi run dg launch --job pudl_with_ferc_to_sqlite --config src/pudl/package_data/settings/dg_full.yml You can also target specific assets rather than an entire job, and use Dagster's rich `asset selection syntax `__ @@ -405,135 +314,81 @@ to pick and choose: # Materialize all assets upstream and downstream of a table $ pixi run dg launch --assets "+key:core_eia923__fuel_receipts_costs+" - -.. note:: - - We recommend using the Dagster UI to execute the ETL as it provides additional - functionality for re-execution and viewing asset dependences. - -PUDL also has a couple of custom job launching scripts, which automatically use one of -our preset YAML files to configure the execution graph, including data sources, years, -etc. - -1. ``ferc_to_sqlite`` executes the ``pudl.ferc_to_sqlite`` dagster graph. - You must run this script before you can run ``pudl_etl``. -2. ``pudl_etl`` executes the ``pudl.etl`` asset graph. - -.. note:: - - We plan to deprecate these custom scripts in 2026Q2, and move to using Dagster's - built-in file-based configuration system. - -We also have ``pixi`` tasks defined in ``pyproject.toml`` that correspond to running the -above scripts with default configurations, to process all data (these can take hours): +We also have a ``pixi`` task defined in ``pyproject.toml`` to process all data with +the full default configuration (this can take hours): .. code-block:: console - $ pixi run ferc $ pixi run pudl -Settings Files --------------- -These CLI commands use YAML settings files in place of command line arguments. This -avoids undue complexity and preserves a record of how the script was run. The YAML file -dictates which years or states get run through the the processing pipeline. There are -two standard settings files that we use to run the integration tests and the nightly -builds included in the repository: +Dagster Config and PUDL ETL Settings Files +------------------------------------------ -- ``src/pudl/package_data/settings/etl_fast.yml`` processes 1-2 years of data. -- ``src/pudl/package_data/settings/etl_full.yml`` processes all available data. +The ``dg launch`` command can read run configuration from YAML files. This avoids +undue command line complexity and preserves a record of how the pipeline was run. +The standard Dagster config files we use are: + +- ``src/pudl/package_data/settings/dg_fast.yml`` for smaller, faster local runs. +- ``src/pudl/package_data/settings/dg_full.yml`` for full local builds. +- ``src/pudl/package_data/settings/dg_pytest.yml`` for integration-test prebuilds. +- ``src/pudl/package_data/settings/dg_nightly.yml`` for the nightly cloud build. .. warning:: - In previous versions of PUDL, you could specify which datasources to process - using the settings file. With the migration to dagster, all datasources are - processed no matter what datasources are included in the settings file. - If you want to process a single datasource, materialize the appropriate assets - in the dagster UI. (see :ref:`run-dagster-ui`). + The Dagster config file selects resources and execution settings. The referenced + ETL settings YAML still determines partitions, years, and other dataset-specific + parameters, but job and asset selection determine which parts of the graph run. -Each file contains instructions for how to process the data under "full" or "fast" -conditions respectively. You can copy, rename, and modify these files to suit your -needs. The layout of these files is depicted below: +Each Dagster config file includes execution options and resource configuration, +including the ``etl_settings_path`` used by the shared ``etl_settings`` resource. +The referenced ETL settings YAML files specify which partitions of each dataset should +be processed, and are generally structured like this: .. code-block:: - # FERC1 to SQLite settings - ferc_to_sqlite_settings: - ├── ferc1_dbf_to_sqlite_settings - | └── years - ├── ferc1_xbrl_to_sqlite_settings - | └── years - └── ferc2_xbrl_to_sqlite_settings - └── years - - # PUDL ETL settings - name : unique name identifying the etl outputs - title : short human readable title for the etl outputs - description : a longer description of the etl outputs - datasets: - ├── dataset name - │ └── dataset etl parameter (e.g. years) : editable list of years - └── dataset name - └── dataset etl parameter (e.g. years) : editable list of years - -Both scripts enable you to choose which **years** you want to include: - -.. list-table:: - :header-rows: 1 - :widths: auto - - * - Parameter - - Description - * - ``years`` - - A list of years to be included in the FERC Form 1 Raw DB or the PUDL DB. You - should only use a continuous range of years. Check the :doc:`/data_sources/index` - pages for the earliest available years. - -The ``pudl_etl`` script CEMS data allows you to select **years** and **states**. - -.. list-table:: - :header-rows: 1 - :widths: auto - - * - Parameter - - Description - * - ``years`` - - A list of the years you'd like to process CEMS data for. You should - only use a continuous range of years. Check the :doc:`/data_sources/epacems` page - for the earliest available years. - * - ``states`` - - A list of the state codes you'd like to process CEMS data for. You can specify - ``all`` if you want to process data for all states. This may take a while! + # FERC-to-SQLite settings + ferc_to_sqlite_settings: + ├── ferc1_dbf_to_sqlite_settings + | └── years + ├── ferc1_xbrl_to_sqlite_settings + | └── years + └── ferc2_xbrl_to_sqlite_settings + └── years + + # PUDL ETL settings + name : unique name identifying the etl outputs + title : short human readable title for the etl outputs + description : a longer description of the etl outputs + datasets: + ├── dataset name + │ └── dataset etl parameter (e.g. years) : editable list of years + └── dataset name + └── dataset etl parameter (e.g. years) : editable list of years .. seealso:: - For an exhaustive listing of the available parameters, see the ``etl_full.yml`` - file. - -There are a few notable dependencies to be wary of when fiddling with these -settings: + For an exhaustive listing of the available parameters, see the ETL settings models in + :mod:`pudl.settings` and the packaged settings files under + ``src/pudl/package_data/settings/``. -- The ``ferc_to_sqlite`` job must be executed prior to running ``pudl_etl`` - job. - -- EPA CEMS cannot be loaded without EIA data unless you have existing PUDL database. - -Now that your settings are configured, you're ready to run the scripts. +In general, you should not fiddle with these settings unless you are actually adding a +new year of data. We only test the combinations of inputs found in the full and fast +ETL settings that are checked into the PUDL repo. Many other combinations are obviously +possible, but most of them probably don't work! The Fast ETL ------------- -Running the Fast ETL processes one year of data for each dataset. This is what -we do in our :doc:`software integration tests `. Depending on your computer, -it should take around 15 minutes total. +^^^^^^^^^^^^ +Running the Fast ETL processes a limited subset of data for each dataset. This is +similar to what we do in our :doc:`software integration tests `. Depending on +your computer, it may take up to an hour to run. .. code-block:: console - $ ferc_to_sqlite settings/etl_fast.yml - $ pudl_etl settings/etl_fast.yml + $ pixi run dg launch --job pudl --config src/pudl/package_data/settings/dg_fast.yml The Full ETL ------------- -The Full ETL settings includes all all available data that PUDL can process. All +^^^^^^^^^^^^ +The Full ETL settings includes all available data that PUDL can process. All the years, all the states, and all the tables, including the ~1 billion record EPA CEMS dataset. Assuming you already have the data downloaded, on a computer with at least 16 GB of RAM, and a solid-state disk, the Full ETL including EPA @@ -541,37 +396,45 @@ CEMS should take around 2 hours. .. code-block:: console - $ ferc_to_sqlite src/pudl/package_data/settings/etl_full.yml - $ pudl_etl src/pudl/package_data/settings/etl_full.yml + $ pixi run dg launch --job pudl --config src/pudl/package_data/settings/dg_full.yml Custom ETL ----------- -You've changed the settings and renamed the file to CUSTOM_ETL.yml +^^^^^^^^^^ +If you need a custom run profile, copy one of the existing Dagster config files, +change its ``etl_settings_path`` or other resource settings, and point ``dg launch`` at +the new file. .. code-block:: console - $ ferc_to_sqlite the/path/to/your/custom_etl.yml - $ pudl_etl the/path/to/your/custom_etl.yml - + $ pixi run dg launch --job pudl --config the/path/to/your/custom_dg_config.yml Additional Notes ---------------- + +Logging +^^^^^^^ + The commands above should result in a bunch of Python :mod:`logging` output describing -what the script is doing, and file outputs the directory you specified via the +what Dagster is doing, and file outputs in the directory you specified via the ``$PUDL_OUTPUT`` environment variable. When the ETL is complete, you should see new files at e.g. ``$PUDL_OUTPUT/ferc1_dbf.sqlite``, ``$PUDL_OUTPUT/pudl.sqlite`` and ``$PUDL_OUTPUT/core_epacems__hourly_emissions.parquet``. -All of the PUDL scripts also have help messages if you want additional information (run -``script_name --help``). +The Dagster CLI also has built-in help if you want additional information: -Foreign Keys ------------- -The order assets are loaded into ``pudl.sqlite`` is non deterministic because the -assets are executed in parallel so foreign key constraints can not be evaluated in -real time. However, foreign key constraints can be evaluated after all of the data -has been loaded into the database. To check the constraints, run: +.. code-block:: console + + $ pixi run dg launch --help + +Foreign Key Constraints +^^^^^^^^^^^^^^^^^^^^^^^ +The order assets are loaded into ``pudl.sqlite`` is non-deterministic because the +assets are executed in parallel so foreign key constraint violations can't be identified +in real time. However, foreign key constraints can be checked after all of the data +has been loaded into the database successfully. To check the constraints, run: .. code-block:: console - $ pudl_check_fks + $ pixi run pudl_check_fks + +The foreign key check is also run as part of the PUDL integration tests. diff --git a/docs/dev/testing.rst b/docs/dev/testing.rst index f9c0c53acf..7d98d47ec3 100644 --- a/docs/dev/testing.rst +++ b/docs/dev/testing.rst @@ -8,6 +8,13 @@ We use `pytest `__ to specify software unit & integration te including calling ``dbt build`` to run our :doc:`data_validation_quickstart` tests. Several common test commands are available as pixi tasks for convenience. +For day-to-day work, the most commonly used pixi testing tasks are: + +.. code-block:: console + + $ pixi run pytest-unit + $ pixi run pytest-integration + To run the tests that will be run on a PR by our continuous integration (CI) on GitHub before it's merged into the ``main`` branch you can use the following command: @@ -15,8 +22,9 @@ before it's merged into the ``main`` branch you can use the following command: $ pixi run pytest-ci -This includes building the documentation, running unit & integration tests, and checking -to make sure we've got sufficient test coverage. +This includes building the documentation, running unit & integration tests, dbt data +validations other than the row counts, and checking to make sure we've got sufficient +test coverage. .. note:: @@ -36,11 +44,12 @@ each with its own subdirectory: functions and classes, often using minimal inline data structures that are specified in the test modules themselves. * **Software Integration Tests** (``test/integration/``) test larger - collections of functionality including the interactions between different - parts of the overall software system and in some cases interactions with - external systems requiring network connectivity. The main thing our - integration tests do is run the full PUDL data processing pipeline for the - most recent year of data. These tests take around 45 minutes to run. + collections of functionality including the interactions between different + parts of the overall software system and in some cases interactions with + external systems requiring network connectivity. They run a Dagster-managed + prebuild of the ETL using ``dg_pytest.yml`` and then exercise code against + those outputs. These tests take around 45 minutes to run. + ------------------------------------------------------------------------------- Running the tests and other tasks with pixi @@ -63,11 +72,11 @@ in the local datastore. However, if you're editing code that affects how the datastore works, you probably don't want to risk contaminating your working datastore. You can use a disposable temporary -datastore instead by using our custom ``--tmp-data`` with ``pytest``: +datastore instead by using our custom ``--temp-pudl-input`` with ``pytest``: .. code-block:: console - $ pytest --tmp-data test/integration + $ pixi run pytest --temp-pudl-input test/integration .. seealso:: @@ -86,10 +95,10 @@ within the activated pixi environment, or use ``pixi run pytest`` to run it explicitly. If you are working on integration tests, note that most of them require processed PUDL -outputs. If you try to run a single integration test directly with pytest it will -likely end up running the fast ETL which will take 45 minutes. If you have processed -PUDL outputs locally already, you can use ``--live-dbs`` instead. This is only helpful -if the thing you're testing isn't part of the ETL itself. +outputs. If you try to run a single integration test directly with pytest it will likely +end up running the fast ETL which will take 45 minutes. If you have processed PUDL +outputs locally already, you can use ``--live-pudl-output`` instead. This is only +helpful if the thing you're testing isn't part of the ETL itself. Running specific tests ^^^^^^^^^^^^^^^^^^^^^^ @@ -97,19 +106,19 @@ To run the software unit tests with ``pytest`` directly: .. code-block:: console - $ pytest test/unit + $ pixi run pytest test/unit To run only the unit tests for the Excel spreadsheet extraction module: .. code-block:: console - $ pytest test/unit/extract/excel_test.py + $ pixi run pytest test/unit/extract/excel_test.py To run only the unit tests defined by a single test class within that module: .. code-block:: console - $ pytest test/unit/extract/excel_test.py::TestGenericExtractor + $ pixi run pytest test/unit/extract/excel_test.py::TestGenericExtractor Custom PUDL pytest flags ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -122,38 +131,53 @@ looking at the ``custom options`` section: .. code-block:: console Custom options: - --live-dbs Use existing PUDL/FERC1 DBs instead of creating temporary ones. - --tmp-data Download fresh input data for use with this test run only. - --etl-settings=ETL_SETTINGS - Path to a non-standard ETL settings file to use. + --live-pudl-output Use existing PUDL/FERC1 DBs instead of creating temporary ones. + --temp-pudl-input Download fresh input data for use with this test run only. + --dg-config=PATH Path to a non-standard Dagster config file to use. --bypass-local-cache If enabled, the local file cache for datastore will not be used. --save-unmapped-ids Write the unmapped IDs to disk. - --ignore-foreign-key-constraints - If enabled, do not check the foreign keys. The main flexibility that these custom options provide is in selecting where the raw input data comes from and what data the tests should be run against. Being able to specify the tests to run and the data to run them against independently simplifies the test suite and keeps the data and tests very clearly separated. -The ``--live-dbs`` option lets you use your existing FERC 1 and PUDL databases instead -of building a new database at all. This can be useful if you want to test code that only -operates on an existing database, and has nothing to do with the construction of that -database. For example, the EPA CEMS specific tests: +The ``--live-pudl-output`` option lets you use your existing FERC 1 and PUDL databases +instead of building a new database at all. This can be useful if you want to test code +that only operates on an existing database, and has nothing to do with the construction +of that database. For example, the EPA CEMS specific tests: .. code-block:: console - $ pytest --live-dbs test/integration/epacems_test.py + $ pixi run pytest --live-pudl-output test/integration/epacems_test.py + +Foreign key checks and dbt validations can be selected separately from the rest of the +integration suite by running the dedicated validation module directly. For example: + +.. code-block:: console + + $ pixi run pytest --live-pudl-output test/integration/data_validation_test.py Assuming you do want to run the ETL and build new databases as part of the test you're -running, the contents of that database are determined by an ETL settings file. By -default, the settings file that's used is -``src/pudl/package_data/settings/etl_fast.yml`` But it's also possible to use a -different input file, generating a different database, and then run some tests against -that database. +running, the contents of that database are determined by the Dagster config file passed +via ``--dg-config``. By default, pytest uses +``src/pudl/package_data/settings/dg_pytest.yml``. That Dagster config file points at an +ETL settings YAML file and any runtime settings needed for the prebuild. + +If you want to run tests against an existing local full build instead, use the pixi +tasks we've defined for the nightly builds, which use +``--live-pudl-output`` and ``--dg-config src/pudl/package_data/settings/dg_full.yml``: + +.. code-block:: console + + $ pixi run pytest-integration-nightly + $ pixi run pytest-data-validation-nightly + +.. note:: -We use the ``src/pudl/package_data/etl_full.yml`` settings file to specify an exhaustive -collection of input data. + ``--live-pudl-output`` is intentionally guarded against running unit and integration + tests in the same pytest session, since the two suites need incompatible + ``PUDL_OUTPUT`` environment variable handling. The raw input data that all the tests use is ultimately coming from our `archives on Zenodo `__. A copy of that data @@ -168,4 +192,4 @@ datastore functionality specifically. .. code-block:: console - $ pytest --tmp-data test/integration/etl_test.py + $ pixi run pytest --temp-pudl-input test/integration/etl_test.py diff --git a/docs/dev/dev_dagster.rst b/docs/dev/troubleshooting_dagster.rst similarity index 63% rename from docs/dev/dev_dagster.rst rename to docs/dev/troubleshooting_dagster.rst index 36e0c43a3b..87a388e7d0 100644 --- a/docs/dev/dev_dagster.rst +++ b/docs/dev/troubleshooting_dagster.rst @@ -1,7 +1,7 @@ -.. _dev_dagster: +.. _troubleshooting_dagster: =============================================================================== -Developing with Dagster +Troubleshooting Dagster =============================================================================== Reloading code locations @@ -78,50 +78,36 @@ when you switch branches.** Configuring resources --------------------- Dagster resources are python objects that any assets can access. -Resources can be configured using the dagster UI to change the behavior -of a given resource. PUDL currently has three resources: - -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:func:`pudl.resources.dataset_settings` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``dataset_settings`` resource tells the PUDL ETL which years -of data to process. You can configure the dataset settings -by holding shift while clicking the "Materialize All" button in the upper -right hand corner of the Dagster UI. This will bring up a window -where you change how the resource is configured: - -.. image:: ../images/dataset_settings_config.png - :width: 800 - :alt: Dagster UI home - -.. note:: - - If a dataset is not specified in the config, the dataset will be - processed using the default configuration values. - -The panel on the right hand side of the window displays the available -config options and the expected types of inputs. You can also hover -over the config options to view the default values. Once you've configured -the resource you can select "Materialize All" to execute the selected -assets. +Resources can be configured using the Dagster UI or via a YAML config +file to change the behavior of a given resource. PUDL currently has +these key resources: + +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:class:`pudl.resources.PudlEtlSettingsResource` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The ``etl_settings`` resource loads a validated +:class:`pudl.settings.EtlSettings` object from an ETL settings YAML +file. It controls which datasets and years are processed by both +the ``ferc_to_sqlite`` and ``pudl`` jobs. The path to the settings +file is configured via the ``etl_settings_path`` field, and the +standard packaged settings files are under +``src/pudl/package_data/settings/``. + +To override the settings for a single run from the Dagster UI, hold +shift while clicking "Materialize All" to open the run configuration +panel and set ``etl_settings.config.etl_settings_path`` to point at +a custom settings YAML file. .. note:: - The configuration edits you make in the dagster UI are only used - for a single run. If want to save a resource configuration, - change the default value of the resource or create a new job - in :mod:`pudl.etl` or :mod:`pudl.ferc_to_sqlite` with the - preconfigured resource. + The configuration edits you make in the Dagster UI are only used + for a single run. To save a resource configuration permanently, + update the Dagster config YAML (e.g. ``dg_fast.yml``) or pass a + ``--config`` flag to ``dg launch``. -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:func:`pudl.resources.datastore` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:class:`pudl.resources.DatastoreResource` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The datastore resource allows assets to to pull data from +The datastore resource allows assets to pull data from PUDL's raw data archives on Zenodo. - -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:func:`pudl.resources.ferc_to_sqlite_settings` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``ferc_to_sqlite_settings`` resource tells the ``ferc_to_sqlite`` -job which years of FERC data to extract. diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 3f38b5464d..3e2c6e5585 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -115,6 +115,78 @@ Quality of Life Improvements Actions workflow for experimenting with automated documentation link checking. See PR :pr:`5128`. +Major Dagster Project Refactor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We did a major overhaul of our Dagster configuration to bring it closer to the +framework's current best-practice recommendations, and also to experiment with the +new ``dg`` CLI and `Dagster agent skills `__. + +See issue :issue:`5066` for an overview of the issues involved, including issue +:issue:`5120` and PR :pr:`5071`. This refactor includes the following changes: + +* **Replaced the custom ``pudl_etl`` and ``ferc_to_sqlite`` CLI entry points** with + Dagster's official ``dg launch`` tool. The old entry points assembled hand-crafted + Dagster ``run_config`` dicts at runtime; ``dg launch`` reads YAML config files that + are version-controlled alongside the code. Four packaged config files are provided: + ``dg_fast.yml``, ``dg_full.yml``, ``dg_pytest.yml``, and ``dg_nightly.yml``. + Pixi convenience tasks (``pudl-with-ferc-to-sqlite``, + ``pudl-with-ferc-to-sqlite-nightly``, ``ferc-to-sqlite``) wrap the most common + invocations. The integration test suite now runs the ETL via ``dg launch`` as a + subprocess, so tests exercise exactly the same code path as production. +* **Consolidated the PUDL job graph.** The previous ``etl_fast`` and ``etl_full`` + jobs were thin wrappers assembled at import time. These are replaced by three + top-level jobs defined directly in :mod:`pudl.etl`: ``ferc_to_sqlite`` (raw FERC + prerequisite databases only), ``pudl`` (the main PUDL ETL assuming those raw FERC + databases already exist), and ``pudl_with_ferc_to_sqlite`` (end-to-end build in a + single job). The FERC EQR pipeline is now the ``ferceqr`` job. Job selection and + asset scoping is handled by ``dg launch`` config files rather than by code. +* **Switched to Dagster config YAML files** for all run configuration (what years to + process, which datasets to include, resource settings). The settings flow is now: + ``dg launch --config some_dg.yml`` → :class:`pudl.resources.PudlEtlSettingsResource` + loads a :class:`pudl.settings.EtlSettings` object from a path declared in that YAML + → individual assets and IO managers read from the injected + :class:`~pudl.settings.EtlSettings`. This replaces the old pattern of serializing + Pydantic models to raw ``run_config`` dicts, which required keeping Dagster config + schemas manually in sync with the Pydantic models. +* **Updated Dagster resources and IO managers to use Pydantic-native** + :class:`dagster.ConfigurableResource` **and** :class:`dagster.ConfigurableIOManager` + **base classes.** + :class:`pudl.workspace.datastore.DatastoreResource` and + :class:`pudl.workspace.datastore.ZenodoDoiSettingsResource` replace the legacy + ``@resource``-decorated functions; + :class:`pudl.io_managers.PudlMixedFormatIOManager`, + :class:`pudl.io_managers.FercDbfSQLiteConfigurableIOManager`, and + :class:`pudl.io_managers.FercXbrlSQLiteConfigurableIOManager` replace the legacy + ``@io_manager`` wrappers. Resources now receive settings via Pydantic field + injection rather than via :func:`dagster.build_init_resource_context` config dicts. +* **Added FERC SQLite provenance tracking** via the new + :mod:`pudl.ferc_sqlite_provenance` module. Each time a FERC SQLite asset + materializes, it records a fingerprint as :class:`dagster.MaterializeResult` + metadata: the Zenodo DOI of the source archive, the years included, and a hash of + the ETL settings. When a downstream PUDL asset subsequently loads from that SQLite + file, the IO manager checks the stored fingerprint against the current run's + settings and raises a descriptive error if the DOIs, years, or settings are + incompatible. This eliminates a class of silent correctness failures that occurred + when stale FERC SQLite databases from a previous run were silently reused. +* **Replaced the ``disabled: true`` flag** in FERC-to-SQLite settings with + ``years: []`` (empty list). An empty ``years`` list is unambiguous — "process zero + years" — and eliminates the need for a separate boolean field that had to be + checked in addition to the years list. The ``disabled`` flag has been removed from + all settings classes and YAML config files; FERC 2, 6, and 60 DBF/XBRL configs + that previously used ``disabled: true`` now use ``years: []``. +* **Reorganized the integration test infrastructure** in ``test/conftest.py``. The + old approach ran the PUDL ETL in-process using ``execute_in_process``, which + bypassed the standard ``dg launch`` entry point and required each test fixture to + hand-assemble Dagster ``run_config`` dicts. All three FERC extraction fixtures and + the ``pudl_io_manager`` fixture are replaced by a single ``prebuilt_outputs`` + fixture that runs the full ``pudl_with_ferc_to_sqlite`` job via ``dg launch`` as a + subprocess, with coverage collection appended to the existing test coverage report. + A persistent :class:`dagster.DagsterInstance` fixture allows test code to read + asset materialisation metadata written by that subprocess. Pytest CLI flags are + renamed for clarity: ``--live-dbs`` → ``--live-pudl-output``, ``--tmp-data`` → + ``--temp-pudl-input``, ``--etl-settings`` → ``--dg-config``. + .. _release-v2026.3.0: --------------------------------------------------------------------------------------- diff --git a/pixi.lock b/pixi.lock index 656445d615..b9b5b9a6a3 100644 --- a/pixi.lock +++ b/pixi.lock @@ -371,7 +371,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libxslt-1.1.43-h711ed8c_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.2-h25fd6f3_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/linkify-it-py-2.1.0-pyhcf101f3_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/llvmlite-0.46.0-py313hdd307be_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/llvmlite-0.47.0-py313hdd307be_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/lsprotocol-2025.0.0-pyhe01879c_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lxml-6.0.2-py313h4a16004_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda @@ -414,7 +414,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/nomkl-1.0-h5ca1d4c_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-7.5.5-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-shim-0.2.4-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/numba-0.64.0-py313h5dce7c4_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/numba-0.65.0-py313h5dce7c4_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/numexpr-2.14.1-py313h24ae7f9_101.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.3-py313hf6604e3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/oauth2client-4.1.3-pyhd8ed1ab_1.conda @@ -1015,7 +1015,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.3.2-hbb4bfdb_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/linkify-it-py-2.1.0-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-22.1.2-h0d3cbff_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-64/llvmlite-0.46.0-py313h590e1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/llvmlite-0.47.0-py313he3abfad_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/lsprotocol-2025.0.0-pyhe01879c_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lxml-6.0.2-py313h00bd3da_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lz4-c-1.10.0-h240833e_1.conda @@ -1056,7 +1056,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-64/nodejs-24.14.1-hc6dc384_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-7.5.5-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-shim-0.2.4-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/osx-64/numba-0.64.0-py313h4fc6aae_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/numba-0.65.0-py313h4fc6aae_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/numexpr-2.14.1-py313h821d116_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/numpy-2.4.3-py313hb870fc3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/oauth2client-4.1.3-pyhd8ed1ab_1.conda @@ -1640,7 +1640,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.3.2-h8088a28_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/linkify-it-py-2.1.0-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvm-openmp-22.1.2-hc7d1edf_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvmlite-0.46.0-py313he297ed2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvmlite-0.47.0-py313h691f2cf_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/lsprotocol-2025.0.0-pyhe01879c_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lxml-6.0.2-py313he6cafaa_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lz4-c-1.10.0-h286801f_1.conda @@ -1681,7 +1681,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/nodejs-24.14.1-h396074d_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-7.5.5-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-shim-0.2.4-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numba-0.64.0-py313h3ca053b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numba-0.65.0-py313h3ca053b_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numexpr-2.14.1-py313h73ed539_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numpy-2.4.3-py313he4a34aa_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/oauth2client-4.1.3-pyhd8ed1ab_1.conda @@ -1930,6 +1930,1985 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstd-1.5.7-hbf9d68e_6.conda - pypi: https://files.pythonhosted.org/packages/4e/5e/4f5fe4b89fde1dc3ed0eb51bd4ce4c0bca406246673d370ea2ad0c58d747/detect_secrets-1.5.0-py3-none-any.whl - pypi: ./ + dev: + channels: + - url: https://conda.anaconda.org/conda-forge/ + indexes: + - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit + packages: + linux-64: + - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-20_gnu.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/accessible-pygments-0.0.5-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/adwaita-icon-theme-49.0-unix_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/agate-1.9.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aiobotocore-3.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aiohappyeyeballs-2.6.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aiohttp-3.13.5-py313hd6074c6_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aioitertools-0.13.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aiosignal-1.4.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/alembic-1.18.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.15.3-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/altair-6.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-doc-0.0.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/antlr-python-runtime-4.13.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/anyio-4.13.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.39.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/argon2-cffi-25.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/argon2-cffi-bindings-25.1.0-py313h07c4f96_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/arpack-3.9.1-nompi_hf03ea27_102.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/arrow-1.4.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/astroid-4.1.2-py313h78bf25f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/async-lru-2.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/at-spi2-atk-2.38.0-h0630a04_3.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/at-spi2-core-2.40.3-h0630a04_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/atk-1.0-2.38.0-h04ea711_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/attrs-26.1.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.10.1-h2d2dd48_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-cal-0.9.13-h2c9d079_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.12.6-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-compression-0.3.2-h8b1a151_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-event-stream-0.6.0-h9b893ba_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-http-0.10.12-h4bacb7b_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-io-0.26.3-hc87160b_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.15.2-he9ea9c5_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-s3-0.11.5-h6d69fc9_5.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-sdkutils-0.2.4-h8b1a151_4.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-checksums-0.2.10-h8b1a151_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-crt-cpp-0.37.4-h4c8aef7_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.11.747-hc3785e1_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/azure-core-cpp-1.16.2-h206d751_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/azure-identity-cpp-1.13.3-hed0cdb0_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/azure-storage-blobs-cpp-12.16.0-hdd73cc9_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/azure-storage-common-cpp-12.12.0-ha7a2c86_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/azure-storage-files-datalake-cpp-12.14.0-h52c5a47_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/babel-2.18.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/backoff-2.2.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/backports.zstd-1.3.0-py313h18e8e13_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/bcrypt-5.0.0-py313h843e2db_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.14.3-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-6.3.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-with-css-6.3.0-hbca2aae_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/blinker-1.9.0-pyhff2d567_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.6-he440d0b_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.42.70-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.42.70-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/bottle-0.13.4-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/bottleneck-1.6.0-np2py313hc18bace_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/branca-0.8.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-1.2.0-hed03a55_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.2.0-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.2.0-py313hf159716_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_9.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.6-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cached-property-1.5.2-hd8ed1ab_1.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/cached_property-1.5.2-pyha770c72_1.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/cachetools-7.0.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.4-he90730b_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.dbfread-3.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.ferc_xbrl_extractor-1.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cattrs-26.1.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2026.2.25-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/cffi-2.0.0-py313hf46b229_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/chardet-5.2.0-pyhd8ed1ab_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.6-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.3.1-pyh8f84b5b_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/click-aliases-1.0.1-py_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/cloudpickle-3.1.2-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/coloredlogs-14.0-pyhd8ed1ab_3.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/comm-0.2.3-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.3.3-py313hc8edb43_4.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/coverage-7.13.5-py313h3dea7bd_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.13.12-py313hd8ed1ab_100.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/cryptography-46.0.5-py313heb322e3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/curl-8.19.0-hcf29cc6_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhcf101f3_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/cyrus-sasl-2.1.28-hac629b4_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/daff-1.4.2-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-cloud-cli-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-dbt-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-dg-cli-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-dg-core-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-gcp-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-graphql-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-pandas-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-pipes-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-postgres-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-shared-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-webserver-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/databricks-sdk-0.102.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/db-dtypes-1.4.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-adapters-1.22.10-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-common-1.37.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-core-1.11.7-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-duckdb-1.9.4-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/dbt-extractor-0.6.0-py313h5c7d99a_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-protos-1.0.419-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-semantic-interfaces-0.9.2-pyh3cfb1c2_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/dbus-1.16.2-h24cb091_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/debugpy-1.8.20-py313h5d5ffb9_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/decorator-5.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/deepdiff-8.6.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/defusedxml-0.7.1-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/deprecated-1.3.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dnspython-2.8.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/doc8-2.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/docker-py-7.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/docstring_parser-0.17.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.4.0-hecca717_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/elfutils-0.194-h849f50c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/email-validator-2.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/email_validator-2.3.0-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/epoxy-1.5.10-hb03c661_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/et_xmlfile-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/executing-2.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fastapi-0.135.3-hbd727af_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fastapi-cli-0.0.23-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fastapi-core-0.135.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.25.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/flask-3.1.3-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/flask-cors-6.0.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/folium-0.20.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.17.1-h27c8c51_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.62.0-py313h3dea7bd_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fqdn-1.5.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/freetype-2.14.3-ha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/freexl-2.0.0-h9dce30a_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/fribidi-1.0.16-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/frictionless-5.18.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/frozenlist-1.7.0-py313h6b9daa2_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fsspec-2026.2.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/future-1.0.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gcsfs-2026.2.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/gdal-3.12.3-py313h1ee8c46_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/gdk-pixbuf-2.44.6-h2b0a6b4_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/geoarrow-c-0.3.1-py313h7033f15_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geoarrow-pyarrow-0.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geoarrow-types-0.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geopandas-1.1.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geopandas-base-1.1.3-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/geos-3.14.1-h480dda7_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-h5888daf_1005.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.2-hd590300_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gitdb-4.0.12-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/github3.py-4.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gitpython-3.1.46-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.86.4-hf516916_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/glog-0.7.1-hbabe93e_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/glpk-5.0-h445213a_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/gmp-6.3.0-hac33072_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/gnutls-3.8.11-h18acefa_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-api-core-2.30.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-api-core-grpc-2.30.1-pyh0c4a08c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-api-python-client-2.193.0-pyh332efcf_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-auth-2.49.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-auth-httplib2-0.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-auth-oauthlib-1.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-bigquery-core-3.41.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-core-2.5.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/google-cloud-sdk-562.0.0-py313h78bf25f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-storage-3.10.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-storage-control-1.10.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/google-crc32c-1.8.0-py313h74173ec_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-resumable-media-2.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/googleapis-common-protos-1.73.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/googleapis-common-protos-grpc-1.73.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gql-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gql-with-requests-4.0.0-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/graphene-3.4.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.14-hecca717_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/graphql-core-3.2.8-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/graphql-relay-3.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/graphviz-14.1.2-h8b86629_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/greenlet-3.3.2-py313h5d5ffb9_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/grpc-google-iam-v1-0.14.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/grpcio-1.78.1-py313h36609a2_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/grpcio-health-checking-1.78.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/grpcio-status-1.78.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/gtk3-3.24.52-ha5ea40c_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/gts-0.7.6-h977cf35_4.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/gunicorn-23.0.0-py313h78bf25f_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/h11-0.16.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/h3-4.3.0-h3e4d06c_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/h3-py-4.3.0-py313h5d5ffb9_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-13.2.1-h6083320_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/hicolor-icon-theme-0.17-ha770c72_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/httpcore-1.0.9-pyh29332c3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/httplib2-0.31.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/httptools-0.7.1-py313h07c4f96_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/httpx-0.28.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/huey-2.6.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/humanfriendly-10.0-pyh707e725_8.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/humanize-4.15.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.151.10-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/icu-78.3-h33c6efd_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.11-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/igraph-1.0.1-hfe3e89f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/imagesize-2.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.8.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.5.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/invoke-2.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipykernel-7.2.0-pyha191276_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipython-9.12.0-pyhecfbec7_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipython_pygments_lexers-1.1.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipywidgets-8.1.8-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/isodate-0.7.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/isoduration-20.11.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/itsdangerous-2.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jaconv-0.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jedi-0.19.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/jellyfish-1.2.1-py313h5c7d99a_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jmespath-1.1.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/jq-1.8.1-h73b1eb8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/json-c-0.18-h6688a6e_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/json5-0.14.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonpointer-3.1.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.26.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-with-format-nongpl-4.26.0-hcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-1.1.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-resource-usage-1.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.8.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_console-6.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.9.1-pyhc90fa1f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_events-0.12.0-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_server-2.17.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_server_terminals-0.5.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab-4.5.6-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab-lsp-5.2.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_pygments-0.3.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_server-2.28.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_widgets-3.0.16-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.3-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.5.0-py313hc8edb43_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.22.2-ha1258a1_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/latexcodec-2.0.1-pyh9f0ad1d_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.18-h0c24ade_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45.1-default_hbd61a6d_102.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/leather-0.4.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.1.0-hdb68285_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libabseil-20260107.1-cxx17_h7b12aa8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarchive-3.8.6-gpl_hc2c16d8_100.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-23.0.1-ha7f89c6_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-23.0.1-h635bf11_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-compute-23.0.1-h53684a4_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-23.0.1-h635bf11_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-23.0.1-hb4dd7c2_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-6_h4a7cf45_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.2.0-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.2.0-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.2.0-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-6_h0358290_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp21.1-21.1.8-default_h99862b1_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libclang13-22.1.0-default_h746c552_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h7a8fb5f_6.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.19.0-hcf29cc6_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcxx-22.1.2-ha0f52bf_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcxxabi-22.1.2-h9fd08b6_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.25-h17f619e_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.125-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libegl-devel-1.7.0-ha4b6fd6_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.5-hecca717_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h3435931_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libfreetype-2.14.3-ha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libfreetype6-2.14.3-h73754d4_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgd-2.3.3-h5fbf134_12.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgdal-core-3.12.3-h4f65170_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.2.0-h69a702a_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.2.0-h68bc16d_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgl-1.7.0-ha4b6fd6_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgl-devel-1.7.0-ha4b6fd6_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libglib-2.86.4-h6548e54_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libglx-devel-1.7.0-ha4b6fd6_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-3.3.0-h25dbb67_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-3.3.0-hdbdcf42_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.78.1-h1d1128b_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libhwy-1.3.0-h4c17acf_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h3b78370_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libidn2-2.3.8-hfac485b_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.1.2-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libjxl-0.11.2-ha09017c_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libkml-1.3.0-haa4a5bd_1022.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.11.0-6_h47877c9_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libllvm21-21.1.8-hf7376ad_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libllvm22-22.1.2-hf7376ad_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libmicrohttpd-1.0.2-hc2fc477_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.68.1-h877daf1_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hb9d3cd8_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libntlm-1.8-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.32-pthreads_h94d23a6_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libopengl-1.7.0-ha4b6fd6_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libopentelemetry-cpp-1.26.0-h9692893_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libopentelemetry-cpp-headers-1.26.0-ha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libparquet-23.0.1-h7376487_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.56-h421ea60_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libpq-18.3-h9abb657_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-6.33.5-h2b00c02_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2025.11.05-h0dc7533_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/librsvg-2.62.1-h4c96295_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/librttopo-1.1.0-h46dd2a8_20.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.21-h280c20c_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libspatialite-5.1.0-gpl_hab3fe16_120.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.52.0-hf4e2dac_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hcf80075_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-hdf11a46_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libtasn1-4.21.0-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.22.0-h454ac66_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.1-h9d88235_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libunistring-0.9.10-h7f98852_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/libunwind-1.8.3-h65a8314_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libutf8proc-2.11.3-hfe17d71_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.42-h5347b49_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libuv-1.51.0-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libvulkan-loader-1.4.341.0-h5279c79_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.6.0-hd42ef1d_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.13.1-hca5e8e5_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libxml2-16-2.15.2-hca6bf5a_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.15.2-he237659_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libxml2-devel-2.15.2-he237659_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libxslt-1.1.43-h711ed8c_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.2-h25fd6f3_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/linkify-it-py-2.1.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/llvmlite-0.47.0-py313hdd307be_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/loro-1.10.3-py313hdeb11d6_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lsprotocol-2025.0.0-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/lxml-6.0.2-py313h4a16004_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/lzo-2.10-h280c20c_1002.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mako-1.3.10-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mapclassify-2.10.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/marimo-0.21.1-py313hd5f5364_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-3.10.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/marko-2.2.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/markupsafe-3.0.3-py313h3dea7bd_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mashumaro-3.14-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.10.8-py313h78bf25f_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.10.8-py313h683a580_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/matplotlib-inline-0.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/matplotx-0.3.10-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/mdit-py-plugins-0.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/memray-1.19.2-py313h422961c_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/minizip-4.0.10-h05a5f5f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mistune-3.2.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mlflow-3.10.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mlflow-skinny-3.10.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mlflow-ui-3.10.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/more-itertools-10.8.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/msgpack-python-1.1.2-py313h7037e92_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/msgspec-0.20.0-py313h07c4f96_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/multidict-6.7.1-py313h3dea7bd_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/muparser-2.3.5-h5888daf_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.1.0-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-2.18.1-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbclient-0.10.4-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-7.17.0-h14065e2_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-core-7.17.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-pandoc-7.17.0-hc3985f0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbformat-5.10.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nest-asyncio-1.6.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/nettle-3.10.1-h4a9d5aa_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/networkx-3.6.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/nlohmann_json-3.12.0-h54a6638_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/nodejs-24.14.1-h3d65ac4_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nomkl-1.0-h5ca1d4c_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-7.5.5-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-shim-0.2.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/numba-0.65.0-py313h5dce7c4_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/numexpr-2.14.1-py313h24ae7f9_101.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.3-py313hf6604e3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/oauth2client-4.1.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/oauthlib-3.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/oniguruma-6.9.10-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.4-h55fea9a_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/openldap-2.6.10-hbde042b_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/openpyxl-3.1.5-py313ha4be090_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.1-h35e630c_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-api-1.16.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-proto-1.40.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-sdk-1.16.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-semantic-conventions-0.37b0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/orc-2.3.0-h21090e2_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/orderly-set-5.5.0-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/orjson-3.11.8-py313h541fbb8_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/overrides-7.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/p11-kit-0.26.2-h3435931_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pandas-2.3.3-py313h08cd8bf_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pandera-base-0.30.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pandera-polars-0.30.1-hd6cc441_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pandoc-3.9.0.2-ha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pandocfilters-1.5.0-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/pango-1.56.4-hda50119_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/parallel-20260122-ha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/paramiko-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/parsedatetime-2.4-py_1.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/parso-0.8.6-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pathlib-abc-0.5.2-pyh9692d8f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-0.12.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pbr-7.0.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.47-haa7fec5_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/perl-5.32.1-7_hd590300_perl5.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/petl-1.7.17-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pexpect-4.9.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pillow-12.2.0-py313h80991f8_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pip-26.0.1-pyh145f28c_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pixman-0.46.4-h54a6638_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.9.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/plotly-6.6.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.6.0-pyhf9edf01_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/polars-1.39.3-pyh58ad624_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/polars-runtime-32-1.39.3-py310hffdcd12_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/prek-0.3.8-hb17b654_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/prettier-3.8.1-h7e4c9f4_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prettytable-3.17.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/proj-9.8.0-he0df7b0_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/prometheus-cpp-1.3.0-ha5d0236_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prometheus_client-0.24.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prometheus_flask_exporter-0.23.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prompt-toolkit-3.0.52-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prompt_toolkit-3.0.52-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/propcache-0.3.1-py313h8060acc_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/proto-plus-1.27.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/protobuf-6.33.5-py313hf481762_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.2-py313h54dd161_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/psycopg2-2.9.10-py313hdc942f6_4.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/psycopg2-binary-2.9.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-23.0.1-py313h78bf25f_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-core-23.0.1-py313h98bfbea_0_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.6.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyasn1-modules-0.4.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pybtex-0.25.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pybtex-docutils-1.0.3-pyhcf101f3_4.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.12.5-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.41.5-py313h843e2db_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-extra-types-2.11.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-settings-2.13.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.16.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pygls-2.1.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.20.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pygraphviz-1.14-py313h8a0a71b_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyicu-2.16.2-py313hbb8943b_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyjwt-2.12.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pymdown-extensions-10.21.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pynacl-1.6.2-py313h5008379_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyogrio-0.12.1-py313hae45665_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyopenssl-26.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.3.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyproj-3.7.2-py313he648cc1_4.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyside6-6.11.0-py313hcd51b16_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-9.0.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-console-scripts-1.4.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-cov-7.1.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-mock-3.15.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-order-1.3.0-pyh29332c3_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.13.12-hc97d973_100_cp313.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/python-calamine-0.6.1-py313h5c7d99a_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-dotenv-1.2.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/python-duckdb-1.4.4-py313h7033f15_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-fastjsonschema-2.21.2-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-flatbuffers-25.9.23-pyh1e1bc0e_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.13.12-h4df99d1_100.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/python-igraph-1.0.0-py313h7033f15_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-json-logger-2.0.7-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-multipart-0.0.22-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-slugify-8.0.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2025.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.13-8_cp313.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytimeparse-1.1.8-py_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/pytz-2026.1.post1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyu2f-0.1.5-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pywin32-on-windows-0.1.0-pyh1179c8e_3.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.3-py313h3dea7bd_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyzmq-27.1.0-py312hda471dd_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.11.0-pl5321h16c4a6b_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/querystring_parser-1.2.4-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/questionary-2.1.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/re2-2025.11.05-h5301d42_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.3-h853b02a_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/readthedocs-sphinx-ext-2.2.5-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/referencing-0.37.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/regex-2026.3.32-py313h07c4f96_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.33.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/requests-oauthlib-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/requests-toolbelt-1.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/responses-0.26.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/restructuredtext_lint-2.0.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3339-validator-0.1.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3986-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3986-validator-0.1.1-pyh9f0ad1d_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3987-syntax-1.1.0-pyhe01879c_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rich-14.3.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rich-toolkit-0.19.7-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ripgrep-15.1.0-hdab8a38_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/roman-numerals-4.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/rpds-py-0.30.0-py313h843e2db_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rsa-4.9.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ruff-0.15.8-h7805a7d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ruff-lsp-0.0.62-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/s2n-1.7.1-h1cbb8d7_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/s3fs-2026.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/s3transfer-0.16.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/scikit-learn-1.8.0-np2py313h16d504d_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/scipy-1.17.1-py313h4b8bb8b_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/send2trash-2.1.0-pyha191276_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-82.0.1-pyh332efcf_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/shapely-2.1.2-py313had47c43_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/shellingham-1.5.4-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/simpleeval-1.0.7-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhe01879c_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/skops-0.13.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/slack-sdk-3.41.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/smmap-5.0.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.2-h03e3b7b_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sniffio-1.3.1-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-3.0.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/snowplow-tracker-1.0.4-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sortedcontainers-2.4.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.8.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-9.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-autoapi-3.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-design-0.7.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-issues-6.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-reredirects-1.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-bibtex-2.6.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-googleanalytics-0.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-mermaid-2.0.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/splink-4.0.16-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/sqlalchemy-2.0.48-py313h54dd161_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sqlglot-28.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sqlglot-rs-28.0.0-h5602114_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/sqlglotrs-0.7.3-py313h5a021e7_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.52.0-h04a0ce9_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sqlparse-0.5.4-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/starlette-1.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stdlib-list-0.12.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stevedore-5.7.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stringcase-1.2.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/structlog-25.5.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tabulate-0.10.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/taplo-0.9.3-h53e704d_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/termcolor-3.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/terminado-0.18.1-pyhc90fa1f_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/terraform-1.14.7-h76a2195_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/text-unidecode-1.3-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/texttable-1.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/textual-8.2.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/timezonefinder-8.2.2-py313h54dd161_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tinycss2-1.4.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h366c992_103.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.4.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tomlkit-0.13.2-pyha770c72_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/toposort-1.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/tornado-6.5.5-py313h07c4f96_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.3-pyh8f84b5b_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/truststore-0.10.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ty-0.0.27-h4e94fc0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typeguard-4.5.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typer-0.24.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/types-pyyaml-6.0.12.20250915-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/types-requests-2.33.0.20260327-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_inspect-0.9.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_utils-0.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/typos-1.45.0-hdab8a38_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uc-micro-py-2.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/universal-pathlib-0.3.10-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/universal_pathlib-0.3.10-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uri-template-1.3.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/uriparser-0.9.8-hac33072_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uritemplate-4.2.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.6.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.11.2-h0f56927_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-0.42.0-pyhc90fa1f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-standard-0.42.0-h76e4700_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/uvloop-0.22.1-py313h07c4f96_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/validators-0.35.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/watchdog-6.0.0-py313hd5f5364_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/watchfiles-1.1.1-py313h5c7d99a_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/wayland-1.25.0-hd6090a7_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.6.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/webcolors-25.10.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/webencodings-0.5.1-pyhd8ed1ab_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/websocket-client-1.9.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/websockets-15.0.1-py313h54dd161_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/werkzeug-3.1.7-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/widgetsnbextension-4.0.15-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/wrapt-2.1.2-py313h07c4f96_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.1-h4f16b4b_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-cursor-0.1.6-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-hb711507_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.1-hb711507_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.10-hb711507_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.2-hb711507_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xerces-c-3.3.0-hd9031aa_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.47-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/xlsxwriter-3.2.9-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.6-he73a12e_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.13-he1eb515_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxcomposite-0.4.7-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxcursor-1.2.3-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdamage-1.1.6-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.7-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.2-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.8.2-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxinerama-1.1.6-hecca717_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrandr-1.5.5-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.12-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-hb9d3cd8_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.7-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-xorgproto-2025.1-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/xyzservices-2026.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/yarl-1.23.0-py313h3dea7bd_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/yaspin-3.4.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/zeromq-4.3.5-h41580af_10.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/zip-3.0-hd590300_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.2-h25fd6f3_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.3.3-hceb46e0_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda + - pypi: https://files.pythonhosted.org/packages/4e/5e/4f5fe4b89fde1dc3ed0eb51bd4ce4c0bca406246673d370ea2ad0c58d747/detect_secrets-1.5.0-py3-none-any.whl + - pypi: ./ + osx-64: + - conda: https://conda.anaconda.org/conda-forge/osx-64/_openmp_mutex-4.5-7_kmp_llvm.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/accessible-pygments-0.0.5-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/adwaita-icon-theme-49.0-unix_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/agate-1.9.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aiobotocore-3.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aiohappyeyeballs-2.6.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aiohttp-3.13.5-py313h6f5309d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aioitertools-0.13.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aiosignal-1.4.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/alembic-1.18.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/altair-6.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-doc-0.0.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/antlr-python-runtime-4.13.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/anyio-4.13.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/appnope-0.1.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.39.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/argon2-cffi-25.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/argon2-cffi-bindings-25.1.0-py313hf050af9_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/arpack-3.9.1-nompi_hdfe9103_102.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/arrow-1.4.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/astroid-4.1.2-py313habf4b1d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/async-lru-2.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/atk-1.0-2.38.0-h4bec284_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/attrs-26.1.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-auth-0.10.1-hfd47d4b_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-cal-0.9.13-hea39f9f_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-common-0.12.6-h8616949_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-compression-0.3.2-hb9ea233_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-event-stream-0.6.0-ha9bd753_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-http-0.10.12-h1037d30_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-io-0.26.3-hc95b61d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-mqtt-0.15.2-h6fabf1c_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-s3-0.11.5-hb15a67f_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-sdkutils-0.2.4-h901532c_4.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-checksums-0.2.10-h31279ed_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-crt-cpp-0.37.4-h1135fef_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-sdk-cpp-1.11.747-h17cee85_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/azure-core-cpp-1.16.2-h87f1c7e_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/azure-identity-cpp-1.13.3-h1135191_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/azure-storage-blobs-cpp-12.16.0-h9b4319f_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/azure-storage-common-cpp-12.12.0-h7373072_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/azure-storage-files-datalake-cpp-12.14.0-he1781d6_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/babel-2.18.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/backoff-2.2.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/backports.zstd-1.3.0-py313h591e92b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/bcrypt-5.0.0-py313hcc225dc_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.14.3-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-6.3.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-with-css-6.3.0-hbca2aae_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/blinker-1.9.0-pyhff2d567_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/blosc-1.21.6-hd145fbb_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.42.70-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.42.70-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/bottle-0.13.4-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/bottleneck-1.6.0-np2py313h4e95564_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/branca-0.8.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-1.2.0-hf139dec_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.2.0-h8616949_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-python-1.2.0-py313h8d69aa9_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-h500dc9f_9.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/c-ares-1.34.6-hb5e19a0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cached-property-1.5.2-hd8ed1ab_1.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/cached_property-1.5.2-pyha770c72_1.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/cachetools-7.0.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/cairo-1.18.4-h7656bdc_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.dbfread-3.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.ferc_xbrl_extractor-1.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cattrs-26.1.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2026.2.25-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/cffi-2.0.0-py313hf57695f_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/chardet-5.2.0-pyhd8ed1ab_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.6-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.3.1-pyh8f84b5b_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/click-aliases-1.0.1-py_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/cloudpickle-3.1.2-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/coloredlogs-14.0-pyhd8ed1ab_3.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/comm-0.2.3-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.3.3-py313h98b818e_4.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/coverage-7.13.5-py313h035b7d0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.13.12-py313hd8ed1ab_100.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/cryptography-46.0.5-py313h6e3882f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/curl-8.19.0-h8f0b9e4_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhcf101f3_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/cyrus-sasl-2.1.28-h7cc0300_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/daff-1.4.2-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-cloud-cli-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-dbt-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-dg-cli-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-dg-core-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-gcp-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-graphql-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-pandas-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-pipes-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-postgres-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-shared-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-webserver-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/databricks-sdk-0.102.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/db-dtypes-1.4.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-adapters-1.22.10-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-common-1.37.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-core-1.11.7-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-duckdb-1.9.4-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/dbt-extractor-0.6.0-py313h35a1e28_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-protos-1.0.419-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-semantic-interfaces-0.9.2-pyh3cfb1c2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/debugpy-1.8.20-py313h8b5a893_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/decorator-5.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/deepdiff-8.6.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/defusedxml-0.7.1-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/deprecated-1.3.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dnspython-2.8.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/doc8-2.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/docker-py-7.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/docstring_parser-0.17.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/email-validator-2.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/email_validator-2.3.0-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/epoxy-1.5.10-h8616949_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/et_xmlfile-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/executing-2.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fastapi-0.135.3-hbd727af_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fastapi-cli-0.0.23-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fastapi-core-0.135.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.25.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/flask-3.1.3-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/flask-cors-6.0.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/folium-0.20.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/fontconfig-2.17.1-h7a4440b_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.62.0-py313h035b7d0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fqdn-1.5.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/freetype-2.14.3-h694c41f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/freexl-2.0.0-h3183152_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/fribidi-1.0.16-h8616949_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/frictionless-5.18.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/frozenlist-1.7.0-py313haf29b43_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fsspec-2026.2.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/future-1.0.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gcsfs-2026.2.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/gdal-3.12.3-py313h369b93d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/gdk-pixbuf-2.44.6-hae309b2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/geoarrow-c-0.3.1-py313hbc4457e_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geoarrow-pyarrow-0.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geoarrow-types-0.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geopandas-1.1.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geopandas-base-1.1.3-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/geos-3.14.1-he483b9e_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/gflags-2.2.2-hac325c4_1005.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/giflib-5.2.2-h10d778d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gitdb-4.0.12-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/github3.py-4.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gitpython-3.1.46-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/glib-tools-2.86.4-h8501676_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/glog-0.7.1-h2790a97_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/glpk-5.0-h3cb5acd_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/gmp-6.3.0-hf036a51_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-api-core-2.30.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-api-core-grpc-2.30.1-pyh0c4a08c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-api-python-client-2.193.0-pyh332efcf_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-auth-2.49.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-auth-httplib2-0.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-auth-oauthlib-1.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-bigquery-core-3.41.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-core-2.5.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/google-cloud-sdk-562.0.0-py313habf4b1d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-storage-3.10.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-storage-control-1.10.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/google-crc32c-1.8.0-py313h49a2f01_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-resumable-media-2.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/googleapis-common-protos-1.73.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/googleapis-common-protos-grpc-1.73.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gql-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gql-with-requests-4.0.0-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/graphene-3.4.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/graphite2-1.3.14-h21dd04a_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/graphql-core-3.2.8-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/graphql-relay-3.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/graphviz-14.1.2-h44fc223_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/greenlet-3.3.2-py313h5fe49f0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/grpc-google-iam-v1-0.14.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/grpcio-1.78.1-py313h3d05629_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/grpcio-health-checking-1.78.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/grpcio-status-1.78.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/gtk3-3.24.52-hf2d442a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/gts-0.7.6-h53e17e3_4.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/gunicorn-23.0.0-py313habf4b1d_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/h11-0.16.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/h3-4.4.1-h53ec75d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/h3-py-4.4.1-py313ha9a7918_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/harfbuzz-13.2.1-hf0bc557_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/hicolor-icon-theme-0.17-h694c41f_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/httpcore-1.0.9-pyh29332c3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/httplib2-0.31.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/httptools-0.7.1-py313hf050af9_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/httpx-0.28.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/huey-2.6.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/humanfriendly-10.0-pyh707e725_8.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/humanize-4.15.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.151.10-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/icu-78.3-h25d91c4_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.11-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/igraph-1.0.1-h049a311_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/imagesize-2.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.8.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.5.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/invoke-2.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipykernel-7.2.0-pyh5552912_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipython-9.12.0-pyhecfbec7_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipython_pygments_lexers-1.1.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipywidgets-8.1.8-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/isodate-0.7.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/isoduration-20.11.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/itsdangerous-2.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jaconv-0.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jedi-0.19.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/jellyfish-1.2.1-py313ha265c4a_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jmespath-1.1.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/jq-1.8.1-h2287256_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/json-c-0.18-hc62ec3d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/json5-0.14.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonpointer-3.1.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.26.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-with-format-nongpl-4.26.0-hcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-1.1.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-resource-usage-1.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.8.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_console-6.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.9.1-pyhc90fa1f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_events-0.12.0-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_server-2.17.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_server_terminals-0.5.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab-4.5.6-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab-lsp-5.2.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_pygments-0.3.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_server-2.28.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_widgets-3.0.16-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.5.0-py313h224b87c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/krb5-1.22.2-h207b36a_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/latexcodec-2.0.1-pyh9f0ad1d_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.18-h90db99b_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/leather-0.4.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/lerc-4.1.0-h35c7297_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libabseil-20260107.1-cxx17_h7ed6875_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libarchive-3.8.6-gpl_h2bf6321_100.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libarrow-23.0.1-h6b6ab80_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libarrow-acero-23.0.1-h66151e4_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libarrow-compute-23.0.1-h5d4fa73_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libarrow-dataset-23.0.1-h66151e4_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libarrow-substrait-23.0.1-h613493e_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libblas-3.11.0-6_he492b99_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.2.0-h8616949_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.2.0-h8616949_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.2.0-h8616949_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.11.0-6_h9b27e0a_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libcrc32c-1.1.2-he49afe7_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/libcurl-8.19.0-h8f0b9e4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libcxx-22.1.2-h19cb2f5_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.25-h517ebb2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libedit-3.1.20250104-pl5321ha958ccf_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libev-4.33-h10d778d_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libevent-2.1.12-ha90c15b_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libexpat-2.7.5-hcc62823_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libffi-3.5.2-hd1f9c09_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libfreetype-2.14.3-h694c41f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libfreetype6-2.14.3-h58fbd8d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgcc-15.2.0-h08519bb_18.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgd-2.3.3-hb2c11ec_12.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgdal-core-3.12.3-h24162b0_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgfortran-15.2.0-h7e5c614_18.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-15.2.0-hd16e46c_18.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libglib-2.86.4-hec30fc1_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgoogle-cloud-3.3.0-h10ed7cb_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgoogle-cloud-storage-3.3.0-hea209c6_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgrpc-1.78.1-h147dede_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libhwy-1.3.0-hab838a1_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libiconv-1.18-h57a12c2_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libintl-0.25.1-h3184127_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libjpeg-turbo-3.1.2-h8616949_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libjxl-0.11.2-hde0fb83_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libkml-1.3.0-h450b6c2_1022.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/liblapack-3.11.0-6_h859234e_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/liblzma-5.8.2-h11316ed_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libmpdec-4.0.0-hf3981d6_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libnghttp2-1.68.1-h70048d4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libntlm-1.8-h6e16a3a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libopenblas-0.3.32-openmp_h9e49c7b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libopentelemetry-cpp-1.26.0-h7a0a166_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libopentelemetry-cpp-headers-1.26.0-h694c41f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libparquet-23.0.1-h527dc83_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.56-he930e7c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libpq-18.3-h94170d9_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libprotobuf-6.33.5-h29d92e8_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libre2-11-2025.11.05-h6e8c311_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/librsvg-2.62.1-h7321050_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/librttopo-1.1.0-h16cd5d8_20.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libsodium-1.0.21-hc6ced15_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libspatialite-5.1.0-gpl_h77f1de8_120.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.52.0-h77d7759_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libssh2-1.11.1-hed3591d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libthrift-0.22.0-h687e942_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.7.1-ha0a348c_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libutf8proc-2.11.3-hc282952_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libuv-1.51.0-h58003a5_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.6.0-hb807250_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libxcb-1.17.0-hf1f96e2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libxml2-16-2.15.2-h7a90416_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libxml2-2.15.2-hd552753_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libxml2-devel-2.15.2-hd552753_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libxslt-1.1.43-h486b42e_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.3.2-hbb4bfdb_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/linkify-it-py-2.1.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-22.1.2-h0d3cbff_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/llvmlite-0.47.0-py313he3abfad_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/loro-1.10.3-py313h101a90a_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lsprotocol-2025.0.0-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/lxml-6.0.2-py313h00bd3da_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/lz4-c-1.10.0-h240833e_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/lzo-2.10-h4132b18_1002.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mako-1.3.10-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mapclassify-2.10.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/marimo-0.21.1-py313h11baec3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-3.10.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/marko-2.2.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/markupsafe-3.0.3-py313h035b7d0_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mashumaro-3.14-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.10.8-py313habf4b1d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.10.8-py313h4ad75b8_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/matplotlib-inline-0.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/matplotx-0.3.10-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/mdit-py-plugins-0.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/memray-1.19.2-py313ha00f39b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/minizip-4.0.10-hfb7a1ec_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mistune-3.2.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mlflow-3.10.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mlflow-skinny-3.10.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mlflow-ui-3.10.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/more-itertools-10.8.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/msgpack-python-1.1.2-py313h5eff275_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/msgspec-0.20.0-py313h36bb7f5_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/multidict-6.7.1-py313h84cef87_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/muparser-2.3.5-hb996559_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.1.0-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-2.18.1-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbclient-0.10.4-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-7.17.0-h14065e2_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-core-7.17.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-pandoc-7.17.0-hc3985f0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbformat-5.10.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/ncurses-6.5-h0622a9a_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nest-asyncio-1.6.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/networkx-3.6.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/nlohmann_json-3.12.0-h06076ce_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/nodejs-24.14.1-hc6dc384_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-7.5.5-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-shim-0.2.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/numba-0.65.0-py313h4fc6aae_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/numexpr-2.14.1-py313h821d116_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/numpy-2.4.3-py313hb870fc3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/oauth2client-4.1.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/oauthlib-3.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/oniguruma-6.9.10-h6e16a3a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.4-h52bb76a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/openldap-2.6.10-h2f5043c_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/openpyxl-3.1.5-py313hc34da29_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/openssl-3.6.1-hb6871ef_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-api-1.16.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-proto-1.40.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-sdk-1.16.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-semantic-conventions-0.37b0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/orc-2.3.0-hb9b210e_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/orderly-set-5.5.0-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/orjson-3.11.8-py313h13dbcd0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/overrides-7.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pandas-2.3.3-py313h2f264a9_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pandera-base-0.30.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pandera-polars-0.30.1-hd6cc441_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pandoc-3.9.0.2-h694c41f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pandocfilters-1.5.0-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/pango-1.56.4-hf280016_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/parallel-20260122-h694c41f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/paramiko-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/parsedatetime-2.4-py_1.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/parso-0.8.6-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pathlib-abc-0.5.2-pyh9692d8f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-0.12.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pbr-7.0.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pcre2-10.47-h13923f0_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/perl-5.32.1-7_h10d778d_perl5.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/petl-1.7.17-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pexpect-4.9.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pillow-12.2.0-py313h23d381d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pip-26.0.1-pyh145f28c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pixman-0.46.4-ha059160_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.9.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/plotly-6.6.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.6.0-pyhf9edf01_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/polars-1.39.3-pyh58ad624_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/polars-runtime-32-1.39.3-py310h428a0da_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/prek-0.3.8-h19f9e61_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/prettier-3.8.1-h07b0e94_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prettytable-3.17.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/proj-9.8.0-he69a98e_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/prometheus-cpp-1.3.0-h7802330_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prometheus_client-0.24.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prometheus_flask_exporter-0.23.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prompt-toolkit-3.0.52-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prompt_toolkit-3.0.52-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/propcache-0.3.1-py313h717bdf5_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/proto-plus-1.27.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/protobuf-6.33.5-py313hc1d2497_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/psutil-7.2.2-py313h16366db_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/psycopg2-2.9.10-py313h250ff00_4.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/psycopg2-binary-2.9.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pthread-stubs-0.4-h00291cd_1002.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyarrow-23.0.1-py313habf4b1d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyarrow-core-23.0.1-py313h345cca6_0_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.6.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyasn1-modules-0.4.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pybtex-0.25.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pybtex-docutils-1.0.3-pyhcf101f3_4.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.12.5-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pydantic-core-2.41.5-py313hcc225dc_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-extra-types-2.11.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-settings-2.13.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.16.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pygls-2.1.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.20.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pygraphviz-1.14-py313hd8bc1fb_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyicu-2.16.2-py313h68ab0b4_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyjwt-2.12.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pymdown-extensions-10.21.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pynacl-1.6.2-py313hf61a874_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyobjc-core-12.1-py313h07bcf3a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyobjc-framework-cocoa-12.1-py313hf669bc3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyogrio-0.12.1-py313h8e1be7a_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyopenssl-26.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.3.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyproj-3.7.2-py313haf973d7_4.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-9.0.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-console-scripts-1.4.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-cov-7.1.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-mock-3.15.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-order-1.3.0-pyh29332c3_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/python-3.13.12-h894a449_100_cp313.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/python-calamine-0.6.1-py313ha265c4a_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-dotenv-1.2.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/python-duckdb-1.4.4-py313hf4a977d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-fastjsonschema-2.21.2-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-flatbuffers-25.9.23-pyh1e1bc0e_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.13.12-h4df99d1_100.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/python-igraph-1.0.0-py313hc4a83b5_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-json-logger-2.0.7-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-multipart-0.0.22-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-slugify-8.0.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2025.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.13-8_cp313.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytimeparse-1.1.8-py_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/pytz-2026.1.post1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyu2f-0.1.5-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pywin32-on-windows-0.1.0-pyh1179c8e_3.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyyaml-6.0.3-py313h7c6a591_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyzmq-27.1.0-py312h2ac7433_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/qhull-2020.2-h3c5361c_5.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/querystring_parser-1.2.4-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/questionary-2.1.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/re2-2025.11.05-h77e0585_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/readline-8.3-h68b038d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/readthedocs-sphinx-ext-2.2.5-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/referencing-0.37.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/regex-2026.3.32-py313hf59fe81_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.33.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/requests-oauthlib-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/requests-toolbelt-1.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/responses-0.26.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/restructuredtext_lint-2.0.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3339-validator-0.1.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3986-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3986-validator-0.1.1-pyh9f0ad1d_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3987-syntax-1.1.0-pyhe01879c_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rich-14.3.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rich-toolkit-0.19.7-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/ripgrep-15.1.0-h009cd8f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/roman-numerals-4.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/rpds-py-0.30.0-py313hcc225dc_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rsa-4.9.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/ruff-0.15.8-h16586dd_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ruff-lsp-0.0.62-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/s3fs-2026.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/s3transfer-0.16.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/scikit-learn-1.8.0-np2py313he2891f2_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/scipy-1.17.1-py313h9cbb6b6_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/send2trash-2.1.0-pyh5552912_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-82.0.1-pyh332efcf_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/shapely-2.1.2-py313h210a477_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/shellingham-1.5.4-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/simpleeval-1.0.7-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhe01879c_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/skops-0.13.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/slack-sdk-3.41.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/smmap-5.0.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/snappy-1.2.2-h01f5ddf_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sniffio-1.3.1-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-3.0.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/snowplow-tracker-1.0.4-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sortedcontainers-2.4.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.8.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-9.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-autoapi-3.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-design-0.7.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-issues-6.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-reredirects-1.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-bibtex-2.6.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-googleanalytics-0.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-mermaid-2.0.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/splink-4.0.16-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/sqlalchemy-2.0.48-py313h22ab4a2_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sqlglot-28.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sqlglot-rs-28.0.0-h5602114_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/sqlglotrs-0.7.3-py313hd090e6f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/sqlite-3.52.0-hd4d344e_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sqlparse-0.5.4-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/starlette-1.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stdlib-list-0.12.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stevedore-5.7.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stringcase-1.2.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/structlog-25.5.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tabulate-0.10.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/taplo-0.9.3-hf3953a5_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/termcolor-3.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/terminado-0.18.1-pyhc90fa1f_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/terraform-1.14.7-hdaada87_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/text-unidecode-1.3-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/texttable-1.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/textual-8.2.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/timezonefinder-8.2.2-py313h22ab4a2_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tinycss2-1.4.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/tk-8.6.13-h7142dee_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.4.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tomlkit-0.13.2-pyha770c72_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/toposort-1.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/tornado-6.5.5-py313hf59fe81_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.3-pyh8f84b5b_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/truststore-0.10.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/ty-0.0.27-h479939e_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typeguard-4.5.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typer-0.24.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/types-pyyaml-6.0.12.20250915-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/types-requests-2.33.0.20260327-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_inspect-0.9.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_utils-0.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/typos-1.45.0-h009cd8f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uc-micro-py-2.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/universal-pathlib-0.3.10-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/universal_pathlib-0.3.10-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uri-template-1.3.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/uriparser-0.9.8-h6aefe2f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uritemplate-4.2.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.6.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/uv-0.11.2-hbc54fe3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-0.42.0-pyhc90fa1f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-standard-0.42.0-h76e4700_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/uvloop-0.22.1-py313hf050af9_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/validators-0.35.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/watchdog-6.0.0-py313h22ab4a2_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/watchfiles-1.1.1-py313ha265c4a_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.6.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/webcolors-25.10.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/webencodings-0.5.1-pyhd8ed1ab_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/websocket-client-1.9.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/websockets-15.0.1-py313h6865ccc_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/werkzeug-3.1.7-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/widgetsnbextension-4.0.15-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/wrapt-2.1.2-py313hf59fe81_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/xerces-c-3.3.0-ha8d0d41_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/xlsxwriter-3.2.9-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/xorg-libxau-1.0.12-h8616949_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/xorg-libxdmcp-1.1.5-h8616949_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/xyzservices-2026.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/yaml-0.2.5-h4132b18_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/yarl-1.23.0-py313h035b7d0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/yaspin-3.4.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/zeromq-4.3.5-h27d9b8f_10.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/zip-3.0-h0dc2134_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/zlib-1.3.2-hbb4bfdb_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/zlib-ng-2.3.3-h8bce59a_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.7-h3eecb57_6.conda + - pypi: https://files.pythonhosted.org/packages/4e/5e/4f5fe4b89fde1dc3ed0eb51bd4ce4c0bca406246673d370ea2ad0c58d747/detect_secrets-1.5.0-py3-none-any.whl + - pypi: ./ + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/_openmp_mutex-4.5-7_kmp_llvm.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/accessible-pygments-0.0.5-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/adwaita-icon-theme-49.0-unix_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/agate-1.9.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aiobotocore-3.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aiohappyeyeballs-2.6.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aiohttp-3.13.5-py313h53c0e3e_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aioitertools-0.13.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/aiosignal-1.4.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/alembic-1.18.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/altair-6.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-doc-0.0.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/antlr-python-runtime-4.13.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/anyio-4.13.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/appnope-0.1.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/arelle-release-2.39.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/argon2-cffi-25.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/argon2-cffi-bindings-25.1.0-py313h6535dbc_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/arpack-3.9.1-nompi_h1f29f7c_102.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/arrow-1.4.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/astroid-4.1.2-py313h8f79df9_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/async-lru-2.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/atk-1.0-2.38.0-hd03087b_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/attrs-26.1.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-auth-0.10.1-hcb83491_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-cal-0.9.13-h6ee9776_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-common-0.12.6-hc919400_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-compression-0.3.2-h3e7f9b5_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-event-stream-0.6.0-h351c84d_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-http-0.10.12-h95cdebe_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-io-0.26.3-h4137820_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-mqtt-0.15.2-h69e7467_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-s3-0.11.5-ha5d16b2_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-sdkutils-0.2.4-h16f91aa_4.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-checksums-0.2.10-h3e7f9b5_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-crt-cpp-0.37.4-h5505c15_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-sdk-cpp-1.11.747-had22720_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/azure-core-cpp-1.16.2-he5ae378_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/azure-identity-cpp-1.13.3-h810541e_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/azure-storage-blobs-cpp-12.16.0-hc57151b_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/azure-storage-common-cpp-12.12.0-he467506_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/azure-storage-files-datalake-cpp-12.14.0-hf8a9d22_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/babel-2.18.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/backoff-2.2.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/backports.zstd-1.3.0-py313h48bb75e_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bcrypt-5.0.0-py313h2c089d5_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.14.3-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-6.3.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-with-css-6.3.0-hbca2aae_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/blinker-1.9.0-pyhff2d567_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/blosc-1.21.6-h7dd00d9_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.42.70-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.42.70-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/bottle-0.13.4-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bottleneck-1.6.0-np2py313hc22c943_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/branca-0.8.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-1.2.0-h7d5ae5b_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-bin-1.2.0-hc919400_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-python-1.2.0-py313hde1f3bb_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-hd037594_9.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/c-ares-1.34.6-hc919400_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cached-property-1.5.2-hd8ed1ab_1.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/cached_property-1.5.2-pyha770c72_1.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/cachetools-7.0.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/cairo-1.18.4-he0f2337_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.dbfread-3.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.ferc_xbrl_extractor-1.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cattrs-26.1.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2026.2.25-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/cffi-2.0.0-py313h224173a_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/chardet-5.2.0-pyhd8ed1ab_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.4.6-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.3.1-pyh8f84b5b_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/click-aliases-1.0.1-py_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/cloudpickle-3.1.2-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/coloredlogs-14.0-pyhd8ed1ab_3.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/comm-0.2.3-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/contourpy-1.3.3-py313h2af2deb_4.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/coverage-7.13.5-py313h65a2061_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.13.12-py313hd8ed1ab_100.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/cryptography-46.0.5-py313he3f6fad_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/curl-8.19.0-hd5a2499_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhcf101f3_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/cyrus-sasl-2.1.28-hb961e35_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/daff-1.4.2-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-cloud-cli-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-dbt-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-dg-cli-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-dg-core-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-gcp-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-graphql-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-pandas-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-pipes-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-postgres-1!0.28.21-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-shared-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dagster-webserver-1.12.21-pyh742d864_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/databricks-sdk-0.102.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/db-dtypes-1.4.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-adapters-1.22.10-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-common-1.37.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-core-1.11.7-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-duckdb-1.9.4-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/dbt-extractor-0.6.0-py313h6e3aefc_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-protos-1.0.419-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dbt-semantic-interfaces-0.9.2-pyh3cfb1c2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/debugpy-1.8.20-py313h1188861_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/decorator-5.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/deepdiff-8.6.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/defusedxml-0.7.1-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/deprecated-1.3.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/dnspython-2.8.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/doc8-2.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/docker-py-7.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/docstring_parser-0.17.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/email-validator-2.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/email_validator-2.3.0-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/epoxy-1.5.10-hc919400_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/et_xmlfile-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/executing-2.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fastapi-0.135.3-hbd727af_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fastapi-cli-0.0.23-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fastapi-core-0.135.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.25.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/flask-3.1.3-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/flask-cors-6.0.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/folium-0.20.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/fontconfig-2.17.1-h2b252f5_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-hc364b38_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/fonttools-4.62.0-py313h65a2061_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fqdn-1.5.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/freetype-2.14.3-hce30654_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/freexl-2.0.0-h3ab3353_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/fribidi-1.0.16-hc919400_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/frictionless-5.18.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/frozenlist-1.7.0-py313hf28abc0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/fsspec-2026.2.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/future-1.0.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gcsfs-2026.2.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/gdal-3.12.3-py313h543f8f2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/gdk-pixbuf-2.44.6-h4e57454_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/geoarrow-c-0.3.1-py313h6deaedc_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geoarrow-pyarrow-0.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geoarrow-types-0.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geopandas-1.1.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/geopandas-base-1.1.3-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/geos-3.14.1-h5afe852_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/gflags-2.2.2-hf9b8971_1005.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/giflib-5.2.2-h93a5062_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gitdb-4.0.12-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/github3.py-4.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gitpython-3.1.46-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/glib-tools-2.86.4-h60c1bae_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/glog-0.7.1-heb240a5_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/glpk-5.0-h6d7a090_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/gmp-6.3.0-h7bae524_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-api-core-2.30.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-api-core-grpc-2.30.1-pyh0c4a08c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-api-python-client-2.193.0-pyh332efcf_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-auth-2.49.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-auth-httplib2-0.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-auth-oauthlib-1.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-bigquery-core-3.41.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-core-2.5.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/google-cloud-sdk-562.0.0-py313h8f79df9_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-storage-3.10.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-cloud-storage-control-1.10.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/google-crc32c-1.8.0-py313h11ab6f4_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/google-resumable-media-2.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/googleapis-common-protos-1.73.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/googleapis-common-protos-grpc-1.73.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gql-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/gql-with-requests-4.0.0-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/graphene-3.4.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/graphite2-1.3.14-hec049ff_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/graphql-core-3.2.8-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/graphql-relay-3.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/graphviz-14.1.2-hec8c438_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/greenlet-3.3.2-py313h1188861_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/grpc-google-iam-v1-0.14.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/grpcio-1.78.1-py313hfa4fce0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/grpcio-health-checking-1.78.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/grpcio-status-1.78.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/gtk3-3.24.52-hc0f3e19_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/gts-0.7.6-he42f4ea_4.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/gunicorn-23.0.0-py313h8f79df9_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/h11-0.16.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/h3-4.4.1-h248ca61_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/h3-py-4.4.1-py313hc37fe24_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/harfbuzz-13.2.1-h3103d1b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/hicolor-icon-theme-0.17-hce30654_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/hpack-4.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/httpcore-1.0.9-pyh29332c3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/httplib2-0.31.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/httptools-0.7.1-py313h6535dbc_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/httpx-0.28.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/huey-2.6.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/humanfriendly-10.0-pyh707e725_8.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/humanize-4.15.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/hypothesis-6.151.10-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/icu-78.3-hef89b57_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.11-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/igraph-1.0.1-h1ee73af_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/imagesize-2.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.8.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.5.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/invoke-2.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipykernel-7.2.0-pyh5552912_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipython-9.12.0-pyhecfbec7_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipython_pygments_lexers-1.1.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipywidgets-8.1.8-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/isodate-0.7.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/isoduration-20.11.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/itsdangerous-2.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jaconv-0.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jedi-0.19.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/jellyfish-1.2.1-py313h0b74987_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.6-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jmespath-1.1.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/joblib-1.5.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/jq-1.8.1-hbc156a2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/json-c-0.18-he4178ee_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/json5-0.14.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonpointer-3.1.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.26.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-with-format-nongpl-4.26.0-hcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-1.1.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-lsp-2.3.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter-resource-usage-1.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.8.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_console-6.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.9.1-pyhc90fa1f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_events-0.12.0-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_server-2.17.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_server_terminals-0.5.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab-4.5.6-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab-lsp-5.2.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_pygments-0.3.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_server-2.28.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyterlab_widgets-3.0.16-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/kiwisolver-1.5.0-py313h2af2deb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/krb5-1.22.2-h385eeb1_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/latexcodec-2.0.1-pyh9f0ad1d_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lcms2-2.18-hdfa7624_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/leather-0.4.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lerc-4.1.0-h1eee2c3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libabseil-20260107.1-cxx17_h2062a1b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarchive-3.8.6-gpl_h6fbacd7_100.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-23.0.1-h2124f06_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-acero-23.0.1-hee8fe31_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-compute-23.0.1-h3b6a98a_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-dataset-23.0.1-hee8fe31_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-substrait-23.0.1-h05be00f_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libblas-3.11.0-6_h51639a9_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlicommon-1.2.0-hc919400_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlidec-1.2.0-hc919400_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlienc-1.2.0-hc919400_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcblas-3.11.0-6_hb0561ab_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcrc32c-1.1.2-hbdafb3b_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcurl-8.19.0-hd5a2499_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-22.1.2-h55c6f16_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libdeflate-1.25-hc11a715_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libedit-3.1.20250104-pl5321hafb1f1b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libev-4.33-h93a5062_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libevent-2.1.12-h2757513_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.7.5-hf6b4638_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.5.2-hcf2aa1b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libfreetype-2.14.3-hce30654_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libfreetype6-2.14.3-hdfa99f5_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgcc-15.2.0-hcbb3090_18.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgd-2.3.3-h05bcc79_12.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgdal-core-3.12.3-h38a4fdb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgfortran-15.2.0-h07b0088_18.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgfortran5-15.2.0-hdae7583_18.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libglib-2.86.4-he378b5c_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgoogle-cloud-3.3.0-he41eb1d_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgoogle-cloud-storage-3.3.0-ha114238_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgrpc-1.78.1-h3e3f78d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libhwy-1.3.0-h48b13b8_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libiconv-1.18-h23cfdf5_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libintl-0.25.1-h493aca8_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libjpeg-turbo-3.1.2-hc919400_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libjxl-0.11.2-h913acd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libkml-1.3.0-hc33e383_1022.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblapack-3.11.0-6_hd9741b5_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblzma-5.8.2-h8088a28_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libmpdec-4.0.0-h84a0fba_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libnghttp2-1.68.1-h8f3e76b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libntlm-1.8-h5505292_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libopenblas-0.3.32-openmp_he657e61_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libopentelemetry-cpp-1.26.0-h08d5cc3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libopentelemetry-cpp-headers-1.26.0-hce30654_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libparquet-23.0.1-h16c0493_9_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libpng-1.6.56-h132b30e_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libpq-18.3-hd341ff2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libprotobuf-6.33.5-h4a5acfd_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libre2-11-2025.11.05-h4c27e2a_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/librsvg-2.62.1-he8aa2a2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/librttopo-1.1.0-ha909e78_20.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsodium-1.0.21-h1a92334_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libspatialite-5.1.0-gpl_hc59e0ec_120.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.52.0-h1ae2325_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libssh2-1.11.1-h1590b86_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libthrift-0.22.0-h14a376c_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libtiff-4.7.1-h4030677_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libutf8proc-2.11.3-h2431656_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libuv-1.51.0-h6caf38d_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libwebp-base-1.6.0-h07db88b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libxcb-1.17.0-hdb1d25a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libxml2-16-2.15.2-h5ef1a60_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libxml2-2.15.2-h8d039ee_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libxml2-devel-2.15.2-h8d039ee_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libxslt-1.1.43-hb2570ba_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.3.2-h8088a28_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/linkify-it-py-2.1.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvm-openmp-22.1.2-hc7d1edf_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvmlite-0.47.0-py313h691f2cf_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/loro-1.10.3-py313h1634cc5_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lsprotocol-2025.0.0-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lxml-6.0.2-py313he6cafaa_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lz4-c-1.10.0-h286801f_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lzo-2.10-h925e9cb_1002.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mako-1.3.10-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mapclassify-2.10.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/marimo-0.21.1-py313h6fa1262_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-3.10.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/marko-2.2.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/markupsafe-3.0.3-py313h65a2061_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mashumaro-3.14-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/matplotlib-3.10.8-py313h39782a4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/matplotlib-base-3.10.8-py313h58042b9_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/matplotlib-inline-0.2.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/matplotx-0.3.10-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/mdit-py-plugins-0.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/memray-1.19.2-py313hd8878d0_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/minizip-4.0.10-hff1a8ea_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mistune-3.2.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mlflow-3.10.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mlflow-skinny-3.10.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mlflow-ui-3.10.1-pyh707e725_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/more-itertools-10.8.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/msgpack-python-1.1.2-py313ha61f8ec_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/msgspec-0.20.0-py313h0997733_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/multidict-6.7.1-py313haf6918d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/muparser-2.3.5-h11e0b38_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.1.0-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-2.18.1-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbclient-0.10.4-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-7.17.0-h14065e2_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-core-7.17.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-pandoc-7.17.0-hc3985f0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nbformat-5.10.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-h5e97a16_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/nest-asyncio-1.6.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/networkx-3.6.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/nlohmann_json-3.12.0-h784d473_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/nodejs-24.14.1-h396074d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-7.5.5-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/notebook-shim-0.2.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numba-0.65.0-py313h3ca053b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numexpr-2.14.1-py313h73ed539_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numpy-2.4.3-py313he4a34aa_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/oauth2client-4.1.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/oauthlib-3.3.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/oniguruma-6.9.10-h5505292_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openjpeg-2.5.4-hd9e9057_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openldap-2.6.10-hf7f56bc_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openpyxl-3.1.5-py313he4f8f71_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.6.1-hd24854e_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-api-1.16.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-proto-1.40.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-sdk-1.16.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/opentelemetry-semantic-conventions-0.37b0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/orc-2.3.0-hd11884d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/orderly-set-5.5.0-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/orjson-3.11.8-py313hf195ed2_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/overrides-7.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pandas-2.3.3-py313h7d16b84_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pandera-base-0.30.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pandera-polars-0.30.1-hd6cc441_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pandoc-3.9.0.2-hce30654_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pandocfilters-1.5.0-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pango-1.56.4-hf80efc4_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/parallel-20260122-hce30654_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/paramiko-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/parsedatetime-2.4-py_1.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/parso-0.8.6-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pathlib-abc-0.5.2-pyh9692d8f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-0.12.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pbr-7.0.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pcre2-10.47-h30297fc_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/perl-5.32.1-7_h4614cfb_perl5.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/petl-1.7.17-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pexpect-4.9.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pillow-12.2.0-py313h45e5a15_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pip-26.0.1-pyh145f28c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pixman-0.46.4-h81086ad_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.9.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/plotly-6.6.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.6.0-pyhf9edf01_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/polars-1.39.3-pyh58ad624_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/polars-runtime-32-1.39.3-py310h216a1ac_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/prek-0.3.8-h6fdd925_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/prettier-3.8.1-h9907cc9_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prettytable-3.17.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/proj-9.8.0-hfb14a63_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/prometheus-cpp-1.3.0-h0967b3e_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prometheus_client-0.24.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prometheus_flask_exporter-0.23.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prompt-toolkit-3.0.52-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prompt_toolkit-3.0.52-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/propcache-0.3.1-py313ha9b7d5b_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/proto-plus-1.27.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/protobuf-6.33.5-py313h691911b_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/psutil-7.2.2-py313h6688731_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/psycopg2-2.9.10-py313h85ad9ef_4.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/psycopg2-binary-2.9.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pthread-stubs-0.4-hd74edd7_1002.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyarrow-23.0.1-py313h39782a4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyarrow-core-23.0.1-py313h23b330d_0_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.6.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyasn1-modules-0.4.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pybtex-0.25.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pybtex-docutils-1.0.3-pyhcf101f3_4.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.12.5-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pydantic-core-2.41.5-py313h2c089d5_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-extra-types-2.11.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-settings-2.13.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pydata-sphinx-theme-0.16.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pygls-2.1.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.20.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pygraphviz-1.14-py313hef38a24_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyicu-2.16.2-py313h8798f4d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyjwt-2.12.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pymdown-extensions-10.21.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pynacl-1.6.2-py313h6940bce_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyobjc-core-12.1-py313h40b429f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyobjc-framework-cocoa-12.1-py313hcc5defa_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyogrio-0.12.1-py313he6d61f9_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyopenssl-26.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.3.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyproj-3.7.2-py313h9902f63_4.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-9.0.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-console-scripts-1.4.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-cov-7.1.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-mock-3.15.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-order-1.3.0-pyh29332c3_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.13.12-h20e6be0_100_cp313.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-calamine-0.6.1-py313h0b74987_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-dotenv-1.2.2-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-duckdb-1.4.4-py313h6deaedc_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-fastjsonschema-2.21.2-pyhe01879c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-flatbuffers-25.9.23-pyh1e1bc0e_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.13.12-h4df99d1_100.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-igraph-1.0.0-py313h0e822ff_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-json-logger-2.0.7-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-multipart-0.0.22-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-slugify-8.0.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2025.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.13-8_cp313.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pytimeparse-1.1.8-py_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/pytz-2026.1.post1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pyu2f-0.1.5-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pywin32-on-windows-0.1.0-pyh1179c8e_3.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyyaml-6.0.3-py313h65a2061_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyzmq-27.1.0-py312h022ad19_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/qhull-2020.2-h420ef59_5.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/querystring_parser-1.2.4-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/questionary-2.1.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/re2-2025.11.05-ha480c28_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.3-h46df422_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/readthedocs-sphinx-ext-2.2.5-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/referencing-0.37.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/regex-2026.3.32-py313h0997733_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.33.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/requests-oauthlib-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/requests-toolbelt-1.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/responses-0.26.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/restructuredtext_lint-2.0.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3339-validator-0.1.4-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3986-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3986-validator-0.1.1-pyh9f0ad1d_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3987-syntax-1.1.0-pyhe01879c_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rich-14.3.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rich-toolkit-0.19.7-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ripgrep-15.1.0-h748bcf4_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/roman-numerals-4.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/rpds-py-0.30.0-py313h2c089d5_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rsa-4.9.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ruff-0.15.8-hc5c3a1d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ruff-lsp-0.0.62-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/s3fs-2026.2.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/s3transfer-0.16.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/scikit-learn-1.8.0-np2py313h3b23316_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/scipy-1.17.1-py313hc753a45_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/send2trash-2.1.0-pyh5552912_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-82.0.1-pyh332efcf_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/shapely-2.1.2-py313h10b2fc2_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/shellingham-1.5.4-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/simpleeval-1.0.7-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhe01879c_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/skops-0.13.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/slack-sdk-3.41.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/smmap-5.0.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/snappy-1.2.2-hada39a4_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sniffio-1.3.1-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-3.0.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/snowplow-tracker-1.0.4-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sortedcontainers-2.4.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/soupsieve-2.8.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-9.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-autoapi-3.8.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-design-0.7.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-issues-6.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-reredirects-1.1.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-bibtex-2.6.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-googleanalytics-0.5-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-mermaid-2.0.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-2.0.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/splink-4.0.16-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/sqlalchemy-2.0.48-py313h6688731_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sqlglot-28.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sqlglot-rs-28.0.0-h5602114_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/sqlglotrs-0.7.3-py313hb4875b5_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/sqlite-3.52.0-h77b7338_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/sqlparse-0.5.4-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/starlette-1.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stdlib-list-0.12.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stevedore-5.7.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stringcase-1.2.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/structlog-25.5.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tabulate-0.10.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/taplo-0.9.3-hdf53557_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/termcolor-3.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/terminado-0.18.1-pyhc90fa1f_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/terraform-1.14.7-h01237fd_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/text-unidecode-1.3-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/texttable-1.7.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/textual-8.2.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.6.0-pyhecae5ae_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/timezonefinder-8.2.2-py313h6688731_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tinycss2-1.4.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h010d191_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.4.1-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tomlkit-0.13.2-pyha770c72_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/toposort-1.10-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tornado-6.5.5-py313h0997733_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.3-pyh8f84b5b_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/truststore-0.10.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ty-0.0.27-hdfcc030_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typeguard-4.5.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typer-0.24.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/types-pyyaml-6.0.12.20250915-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/types-requests-2.33.0.20260327-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.2-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_inspect-0.9.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_utils-0.1.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/typos-1.45.0-h748bcf4_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uc-micro-py-2.0.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/universal-pathlib-0.3.10-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/universal_pathlib-0.3.10-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uri-template-1.3.0-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uriparser-0.9.8-h00cdb27_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uritemplate-4.2.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.6.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.11.2-h2a61971_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-0.42.0-pyhc90fa1f_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/uvicorn-standard-0.42.0-h76e4700_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uvloop-0.22.1-py313h6535dbc_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/validators-0.35.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/watchdog-6.0.0-py313h6688731_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/watchfiles-1.1.1-py313h0b74987_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.6.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/webcolors-25.10.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/webencodings-0.5.1-pyhd8ed1ab_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/websocket-client-1.9.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/websockets-15.0.1-py313h5b5ffa7_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/werkzeug-3.1.7-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/widgetsnbextension-4.0.15-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/wrapt-2.1.2-py313h0997733_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xerces-c-3.3.0-h25f632f_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/xlsxwriter-3.2.9-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xorg-libxau-1.0.12-hc919400_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xorg-libxdmcp-1.1.5-hc919400_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/xyzservices-2026.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/yaml-0.2.5-h925e9cb_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/yarl-1.23.0-py313h65a2061_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/yaspin-3.4.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zeromq-4.3.5-h4818236_10.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zip-3.0-hb547adb_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zlib-1.3.2-h8088a28_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zlib-ng-2.3.3-hed4e4f5_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstd-1.5.7-hbf9d68e_6.conda + - pypi: https://files.pythonhosted.org/packages/4e/5e/4f5fe4b89fde1dc3ed0eb51bd4ce4c0bca406246673d370ea2ad0c58d747/detect_secrets-1.5.0-py3-none-any.whl + - pypi: ./ packages: - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-20_gnu.conda build_number: 20 @@ -2168,6 +4147,17 @@ packages: - pkg:pypi/alembic?source=hash-mapping size: 184763 timestamp: 1770806831769 +- conda: https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.15.3-hb03c661_0.conda + sha256: d88aa7ae766cf584e180996e92fef2aa7d8e0a0a5ab1d4d49c32390c1b5fff31 + md5: dcdc58c15961dbf17a0621312b01f5cb + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + license: LGPL-2.1-or-later + license_family: GPL + purls: [] + size: 584660 + timestamp: 1768327524772 - conda: https://conda.anaconda.org/conda-forge/noarch/altair-6.0.0-pyhd8ed1ab_1.conda sha256: c89567b3805b37393fa36f43c5f992c11b28a0f56cf3ce85c91826c78298c882 md5: 36a31df7e76e845fb87f8d0a2c8c5ebf @@ -3896,8 +5886,8 @@ packages: timestamp: 1766416416791 - pypi: ./ name: catalystcoop-pudl - version: 2026.3.1.dev43 - sha256: 619d2db2853518b1a5b6dae49e7a5834e97f3f142be7320978a9100e4c485821 + version: 2026.3.1.dev196 + sha256: 8adb6c3ed1e80bfb42cd5ed5bb47f01519781140b709bf2b7e7dc14ad2a21b44 requires_python: '>=3.13,<3.14.0a0' - conda: https://conda.anaconda.org/conda-forge/noarch/catalystcoop.dbfread-3.0.0-pyhd8ed1ab_1.conda sha256: 7a706fefa47027ab14cac7572dbddadaf9a12fc13a1222ad1e75f43f8fd9dcfb @@ -5022,6 +7012,18 @@ packages: - pkg:pypi/docutils?source=hash-mapping size: 402700 timestamp: 1733217860944 +- conda: https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.4.0-hecca717_0.conda + sha256: 40cdd1b048444d3235069d75f9c8e1f286db567f6278a93b4f024e5642cfaecc + md5: dbe3ec0f120af456b3477743ffd99b74 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libstdcxx >=14 + license: BSD-3-Clause + license_family: BSD + purls: [] + size: 71809 + timestamp: 1765193127016 - conda: https://conda.anaconda.org/conda-forge/linux-64/elfutils-0.194-h849f50c_0.conda sha256: f71eae7dc8ff9392d225d2d529691b2db16289b7d8009646eeb1adf0caf3937b md5: 6da1f998c8ea85ba7692afbb5db72fb9 @@ -7665,6 +9667,43 @@ packages: - pkg:pypi/joblib?source=hash-mapping size: 226448 timestamp: 1765794135253 +- conda: https://conda.anaconda.org/conda-forge/linux-64/jq-1.8.1-h73b1eb8_0.conda + sha256: ab26cb11ad0d10f5c6637d925b044c74a3eacb5825686d3720313b3cb6d40cef + md5: 2714e43bfc035f7ef26796632aa1b523 + depends: + - oniguruma 6.9.* + - libgcc >=13 + - __glibc >=2.17,<3.0.a0 + - oniguruma >=6.9.10,<6.10.0a0 + license: MIT + license_family: MIT + purls: [] + size: 313184 + timestamp: 1751447310552 +- conda: https://conda.anaconda.org/conda-forge/osx-64/jq-1.8.1-h2287256_0.conda + sha256: 971ec2b98b491bc9419bb8d97006dc521e2e06d7466f2da37612796fd38066ff + md5: f76d7d452699d8208be98516ab18df96 + depends: + - oniguruma 6.9.* + - __osx >=10.13 + - oniguruma >=6.9.10,<6.10.0a0 + license: MIT + license_family: MIT + purls: [] + size: 331126 + timestamp: 1751447338102 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/jq-1.8.1-hbc156a2_0.conda + sha256: 4c66141af08a9b1019345e800c5b1bcf005614167e35edee9977034371acce4a + md5: ab406f399e1becf7b51b74510e332f3d + depends: + - oniguruma 6.9.* + - __osx >=11.0 + - oniguruma >=6.9.10,<6.10.0a0 + license: MIT + license_family: MIT + purls: [] + size: 338907 + timestamp: 1751447320937 - conda: https://conda.anaconda.org/conda-forge/linux-64/json-c-0.18-h6688a6e_0.conda sha256: 09e706cb388d3ea977fabcee8e28384bdaad8ce1fc49340df5f868a2bd95a7da md5: 38f5dbc9ac808e31c00650f7be1db93f @@ -8865,6 +10904,32 @@ packages: purls: [] size: 18863 timestamp: 1774504433388 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp21.1-21.1.8-default_h99862b1_3.conda + sha256: de512ce246faec2d4f7766774769921a85b5aa053a74abd2f8c97ad50b393aac + md5: 24a2802074d26aecfdbc9b3f1d8168d1 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libllvm21 >=21.1.8,<21.2.0a0 + - libstdcxx >=14 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + purls: [] + size: 21066639 + timestamp: 1770190428756 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libclang13-22.1.0-default_h746c552_0.conda + sha256: 4a9dd814492a129f2ff40cd4ab0b942232c9e3c6dbc0d0aaf861f1f65e99cc7d + md5: 140459a7413d8f6884eb68205ce39a0d + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libllvm22 >=22.1.0,<22.2.0a0 + - libstdcxx >=14 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + purls: [] + size: 12817500 + timestamp: 1772101411287 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2 sha256: fd1d153962764433fe6233f34a72cdeed5dcf8a883a85769e8295ce940b5b0c5 md5: c965a5aa0d5c1c37ffc62dff36e28400 @@ -10158,6 +12223,38 @@ packages: purls: [] size: 18863 timestamp: 1774504467905 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libllvm21-21.1.8-hf7376ad_0.conda + sha256: 91bb4f5be1601b40b4995911d785e29387970f0b3c80f33f7f9028f95335399f + md5: 1a2708a460884d6861425b7f9a7bef99 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libstdcxx >=14 + - libxml2 + - libxml2-16 >=2.14.6 + - libzlib >=1.3.1,<2.0a0 + - zstd >=1.5.7,<1.6.0a0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + purls: [] + size: 44333366 + timestamp: 1765959132513 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libllvm22-22.1.2-hf7376ad_0.conda + sha256: eda0013a9979d142f520747e3621749c493f5fbc8f9d13a52ac7a2b699338e7c + md5: 7147b0792a803cd5b9929ce5d48f7818 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libstdcxx >=14 + - libxml2 + - libxml2-16 >=2.14.6 + - libzlib >=1.3.2,<2.0a0 + - zstd >=1.5.7,<1.6.0a0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + purls: [] + size: 44217146 + timestamp: 1774480335347 - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda sha256: 755c55ebab181d678c12e49cced893598f2bab22d582fbbf4d8b83c18be207eb md5: c7c83eecbb72d88b940c249af56c8b17 @@ -10368,6 +12465,16 @@ packages: purls: [] size: 4308797 timestamp: 1774472508546 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libopengl-1.7.0-ha4b6fd6_2.conda + sha256: 215086c108d80349e96051ad14131b751d17af3ed2cb5a34edd62fa89bfe8ead + md5: 7df50d44d4a14d6c31a2c54f2cd92157 + depends: + - __glibc >=2.17,<3.0.a0 + - libglvnd 1.7.0 ha4b6fd6_2 + license: LicenseRef-libglvnd + purls: [] + size: 50757 + timestamp: 1731330993524 - conda: https://conda.anaconda.org/conda-forge/linux-64/libopentelemetry-cpp-1.26.0-h9692893_0.conda sha256: 5126b75e7733de31e261aa275c0a1fd38b25fdfff23e7d7056ebd6ca76d11532 md5: c360be6f9e0947b64427603e91f9651f @@ -11163,6 +13270,22 @@ packages: purls: [] size: 421195 timestamp: 1753948426421 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libvulkan-loader-1.4.341.0-h5279c79_0.conda + sha256: a68280d57dfd29e3d53400409a39d67c4b9515097eba733aa6fe00c880620e2b + md5: 31ad065eda3c2d88f8215b1289df9c89 + depends: + - __glibc >=2.17,<3.0.a0 + - libstdcxx >=14 + - libgcc >=14 + - xorg-libx11 >=1.8.12,<2.0a0 + - xorg-libxrandr >=1.5.5,<2.0a0 + constrains: + - libvulkan-headers 1.4.341.0.* + license: Apache-2.0 + license_family: APACHE + purls: [] + size: 199795 + timestamp: 1770077125520 - conda: https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.6.0-hd42ef1d_0.conda sha256: 3aed21ab28eddffdaf7f804f49be7a7d701e8f0e46c856d801270b470820a37b md5: aea31d2e5b1091feca96fcfe945c3cf9 @@ -11521,14 +13644,14 @@ packages: purls: [] size: 285695 timestamp: 1774733561929 -- conda: https://conda.anaconda.org/conda-forge/linux-64/llvmlite-0.46.0-py313hdd307be_0.conda - sha256: 0e1bc6ee1c7885cc26c37fcd1a2095169a4e4e148860c600d3f685b6a4f32322 - md5: d99ac09b331711fd12e16323ca8d96e4 +- conda: https://conda.anaconda.org/conda-forge/linux-64/llvmlite-0.47.0-py313hdd307be_0.conda + sha256: f9bf452cb3e68cc1fed41005c0ec3d2024c712e3f43e582f39695d68c1c3b2d0 + md5: f7d77de51327ba26e1ff4a50c5d74dab depends: - __glibc >=2.17,<3.0.a0 - libgcc >=14 - libstdcxx >=14 - - libzlib >=1.3.1,<2.0a0 + - libzlib >=1.3.2,<2.0a0 - python >=3.13,<3.14.0a0 - python_abi 3.13.* *_cp313 - zstd >=1.5.7,<1.6.0a0 @@ -11536,15 +13659,15 @@ packages: license_family: BSD purls: - pkg:pypi/llvmlite?source=hash-mapping - size: 34130706 - timestamp: 1765280056189 -- conda: https://conda.anaconda.org/conda-forge/osx-64/llvmlite-0.46.0-py313h590e1ab_0.conda - sha256: f1549261f0f2f24c2dd2c7a613b465c0c3e4e1158c43a72224c228aa0b5cb76f - md5: ab9fe8b3937e90b22a18554c3d961e97 + size: 34128718 + timestamp: 1775031357014 +- conda: https://conda.anaconda.org/conda-forge/osx-64/llvmlite-0.47.0-py313he3abfad_0.conda + sha256: 51867f155c8242646fc5b4580de7598a7d213ba3ae5b6cfb01f13f2ae772f7b2 + md5: 9deecb01e0e469143823dd1113833042 depends: - - __osx >=10.13 + - __osx >=11.0 - libcxx >=19 - - libzlib >=1.3.1,<2.0a0 + - libzlib >=1.3.2,<2.0a0 - python >=3.13,<3.14.0a0 - python_abi 3.13.* *_cp313 - zstd >=1.5.7,<1.6.0a0 @@ -11552,15 +13675,15 @@ packages: license_family: BSD purls: - pkg:pypi/llvmlite?source=hash-mapping - size: 26010458 - timestamp: 1765280511277 -- conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvmlite-0.46.0-py313he297ed2_0.conda - sha256: d59fdc5a5682e3f6c17f1c8dc73019afaf6724f4ecd10878515438ca35683269 - md5: 81f05ab2abc842253505133ffa652bf5 + size: 25984923 + timestamp: 1775031955654 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvmlite-0.47.0-py313h691f2cf_0.conda + sha256: 07528d37b11a25044e1e0f1b848754a101fce99fdbd52c64d0f806968860b7f0 + md5: bab9e531e5796cfa3aa0bbf77dda3af2 depends: - __osx >=11.0 - libcxx >=19 - - libzlib >=1.3.1,<2.0a0 + - libzlib >=1.3.2,<2.0a0 - python >=3.13,<3.14.0a0 - python >=3.13,<3.14.0a0 *_cp313 - python_abi 3.13.* *_cp313 @@ -11569,8 +13692,54 @@ packages: license_family: BSD purls: - pkg:pypi/llvmlite?source=hash-mapping - size: 24338921 - timestamp: 1765280468997 + size: 24334985 + timestamp: 1775032301948 +- conda: https://conda.anaconda.org/conda-forge/linux-64/loro-1.10.3-py313hdeb11d6_1.conda + sha256: c4b3126d07a05e153696548b1f424a32d104e28362da3ecd6266798e5895a207 + md5: 3a32d5cf4b03bb61987b6a5f676c778a + depends: + - python + - __glibc >=2.17,<3.0.a0 + - python_abi 3.13.* *_cp313 + constrains: + - __glibc >=2.17 + license: MIT + license_family: MIT + purls: + - pkg:pypi/loro?source=hash-mapping + size: 2750699 + timestamp: 1768757686080 +- conda: https://conda.anaconda.org/conda-forge/osx-64/loro-1.10.3-py313h101a90a_1.conda + sha256: 305be8b4400f1c371b074d6a419c52eee32f733ce20e805cf78393a9adccf347 + md5: 2ffc540a2bbeff4e028b2b6da4ce0670 + depends: + - python + - __osx >=10.13 + - python_abi 3.13.* *_cp313 + constrains: + - __osx >=10.13 + license: MIT + license_family: MIT + purls: + - pkg:pypi/loro?source=hash-mapping + size: 2686289 + timestamp: 1768757781113 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/loro-1.10.3-py313h1634cc5_1.conda + sha256: 4c6113c6537ffefe96d529ec776fc3db7efac26309513f8ced9ca60a21185fb3 + md5: 12dc43a0cb5ace8bd611d0645955867c + depends: + - python + - __osx >=11.0 + - python 3.13.* *_cp313 + - python_abi 3.13.* *_cp313 + constrains: + - __osx >=11.0 + license: MIT + license_family: MIT + purls: + - pkg:pypi/loro?source=hash-mapping + size: 2488963 + timestamp: 1768757810454 - conda: https://conda.anaconda.org/conda-forge/noarch/lsprotocol-2025.0.0-pyhe01879c_0.conda sha256: c530344ab48b6bf44a441f742e99898c481fba8ef9b96037adf261beda0f936f md5: f0680112562ae328ef9ce60545879cf9 @@ -11727,9 +13896,97 @@ packages: license: BSD-3-Clause license_family: BSD purls: - - pkg:pypi/mapclassify?source=hash-mapping - size: 810830 - timestamp: 1752271625200 + - pkg:pypi/mapclassify?source=hash-mapping + size: 810830 + timestamp: 1752271625200 +- conda: https://conda.anaconda.org/conda-forge/linux-64/marimo-0.21.1-py313hd5f5364_0.conda + sha256: d1e34cedea76596c15114a0600e5a304b34ac65a57c29beb54612decb1bb6b25 + md5: dc1e1ce9e8c87cd10844826b5a393850 + depends: + - python + - click >=8.0,<9 + - jedi >=0.18.0 + - markdown >=3.6,<4 + - pymdown-extensions >=10.15,<11 + - pygments >=2.19,<3 + - tomlkit >=0.12.0 + - pyyaml >=6.0.1 + - uvicorn >=0.22.0 + - starlette >=0.37.2 + - websockets >=14.2.0 + - loro >=1.10.0 + - docutils >=0.16.0 + - psutil >=5.0 + - itsdangerous >=2.0.0 + - narwhals >=2.0.0 + - packaging + - msgspec >=0.20.0 + - python_abi 3.13.* *_cp313 + license: Apache-2.0 + license_family: APACHE + purls: + - pkg:pypi/marimo?source=hash-mapping + size: 33875610 + timestamp: 1773863601938 +- conda: https://conda.anaconda.org/conda-forge/osx-64/marimo-0.21.1-py313h11baec3_0.conda + sha256: 96d58373d22d2a2b9f1700474c6283bb83c6ba4d93ab74cd76865ac775ada994 + md5: 1b864a69cdd43560d743df7657f139d7 + depends: + - python + - click >=8.0,<9 + - jedi >=0.18.0 + - markdown >=3.6,<4 + - pymdown-extensions >=10.15,<11 + - pygments >=2.19,<3 + - tomlkit >=0.12.0 + - pyyaml >=6.0.1 + - uvicorn >=0.22.0 + - starlette >=0.37.2 + - websockets >=14.2.0 + - loro >=1.10.0 + - docutils >=0.16.0 + - psutil >=5.0 + - itsdangerous >=2.0.0 + - narwhals >=2.0.0 + - packaging + - msgspec >=0.20.0 + - python_abi 3.13.* *_cp313 + license: Apache-2.0 + license_family: APACHE + purls: + - pkg:pypi/marimo?source=hash-mapping + size: 33874605 + timestamp: 1773863869295 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/marimo-0.21.1-py313h6fa1262_0.conda + sha256: 2c25aa1e254867a552ca5c8a324327791caaf5f855c26497fd4a2acb7bdbb4d6 + md5: e31e2ca1cf65b42d1342c901e5796f6a + depends: + - python + - click >=8.0,<9 + - jedi >=0.18.0 + - markdown >=3.6,<4 + - pymdown-extensions >=10.15,<11 + - pygments >=2.19,<3 + - tomlkit >=0.12.0 + - pyyaml >=6.0.1 + - uvicorn >=0.22.0 + - starlette >=0.37.2 + - websockets >=14.2.0 + - loro >=1.10.0 + - docutils >=0.16.0 + - psutil >=5.0 + - itsdangerous >=2.0.0 + - narwhals >=2.0.0 + - packaging + - msgspec >=0.20.0 + - python 3.13.* *_cp313 + - python_abi 3.13.* *_cp313 + license: Apache-2.0 + license_family: APACHE + purls: + - pkg:pypi/marimo?source=hash-mapping + size: 33879804 + timestamp: 1773863722824 - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-3.10.2-pyhcf101f3_0.conda sha256: 20e0892592a3e7c683e3d66df704a9425d731486a97c34fc56af4da1106b2b6b md5: ba0a9221ce1063f31692c07370d062f3 @@ -11825,6 +14082,46 @@ packages: - pkg:pypi/mashumaro?source=hash-mapping size: 72490 timestamp: 1729775610028 +- conda: https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.10.8-py313h78bf25f_0.conda + sha256: ad3eb40a91d456620936c88ea4eb2700ca24e474acd9498fdad831a87771399e + md5: 85bce686dd57910d533807562204e16b + depends: + - matplotlib-base >=3.10.8,<3.10.9.0a0 + - pyside6 >=6.7.2 + - python >=3.13,<3.14.0a0 + - python_abi 3.13.* *_cp313 + - tornado >=5 + license: PSF-2.0 + license_family: PSF + purls: [] + size: 17429 + timestamp: 1763055377972 +- conda: https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.10.8-py313habf4b1d_0.conda + sha256: cea48c750f812eaf7c8b1edaff9d4b30bdad99f28f4421f1ab49e24c74db360d + md5: 37dffad2937d7c8b7fc47003ddd31eac + depends: + - matplotlib-base >=3.10.8,<3.10.9.0a0 + - python >=3.13,<3.14.0a0 + - python_abi 3.13.* *_cp313 + - tornado >=5 + license: PSF-2.0 + license_family: PSF + purls: [] + size: 17433 + timestamp: 1763055798218 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/matplotlib-3.10.8-py313h39782a4_0.conda + sha256: bdbac057835e29adeb32c4e937455f7caefd7723909b11cb9dc1d7675d1cdc4f + md5: bae471007cbebf097a19e851c219d56a + depends: + - matplotlib-base >=3.10.8,<3.10.9.0a0 + - python >=3.13,<3.14.0a0 + - python_abi 3.13.* *_cp313 + - tornado >=5 + license: PSF-2.0 + license_family: PSF + purls: [] + size: 17522 + timestamp: 1763056165099 - conda: https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.10.8-py313h683a580_0.conda sha256: b1117aa2c1d11ca70d1704054cdc8801cbcf2dfb846c565531edd417ddd82559 md5: ffe67570e1a9192d2f4c189b27f75f89 @@ -11924,6 +14221,19 @@ packages: - pkg:pypi/matplotlib-inline?source=hash-mapping size: 15175 timestamp: 1761214578417 +- conda: https://conda.anaconda.org/conda-forge/noarch/matplotx-0.3.10-pyhd8ed1ab_0.tar.bz2 + sha256: a432d6ec4f4f6554cbdb0a439ace94763601448bf3f962736974c1a872110636 + md5: 41995de5fec0178e1b9cbfdbdaeeddbf + depends: + - matplotlib + - networkx + - python >=3.7 + license: MIT + license_family: MIT + purls: + - pkg:pypi/matplotx?source=hash-mapping + size: 26379 + timestamp: 1661194994609 - conda: https://conda.anaconda.org/conda-forge/noarch/mdit-py-plugins-0.5.0-pyhd8ed1ab_0.conda sha256: 123cc004e2946879708cdb6a9eff24acbbb054990d6131bb94bca7a374ebebfc md5: 1997a083ef0b4c9331f9191564be275e @@ -12218,6 +14528,47 @@ packages: - pkg:pypi/msgpack?source=hash-mapping size: 91725 timestamp: 1762504404391 +- conda: https://conda.anaconda.org/conda-forge/linux-64/msgspec-0.20.0-py313h07c4f96_2.conda + sha256: 46dae9f07e5ec0a3576d7c245bcf538dcd8d851ebd78eb97f43e6f218cda4af6 + md5: 7107f9b15e25a2ca8f2d3ed2fff91bac + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - python >=3.13,<3.14.0a0 + - python_abi 3.13.* *_cp313 + license: BSD-3-Clause + license_family: BSD + purls: + - pkg:pypi/msgspec?source=hash-mapping + size: 219152 + timestamp: 1768737684822 +- conda: https://conda.anaconda.org/conda-forge/osx-64/msgspec-0.20.0-py313h36bb7f5_2.conda + sha256: 802638b8ac3ea9846efe414cfa34a87eb51c0120c57c1929955f7890e4cb7bfa + md5: f58680cf464604af1e0fc5a4bfef6177 + depends: + - __osx >=10.13 + - python >=3.13,<3.14.0a0 + - python_abi 3.13.* *_cp313 + license: BSD-3-Clause + license_family: BSD + purls: + - pkg:pypi/msgspec?source=hash-mapping + size: 217761 + timestamp: 1768737866896 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/msgspec-0.20.0-py313h0997733_2.conda + sha256: 1ee9c9054853d1a1bd9b6f3ceb713d79cd4df37424c95b69e4e62a5d2a69497b + md5: d930bfbd3e138604a88061ca2406d3a4 + depends: + - __osx >=11.0 + - python >=3.13,<3.14.0a0 + - python >=3.13,<3.14.0a0 *_cp313 + - python_abi 3.13.* *_cp313 + license: BSD-3-Clause + license_family: BSD + purls: + - pkg:pypi/msgspec?source=hash-mapping + size: 215227 + timestamp: 1768738426275 - conda: https://conda.anaconda.org/conda-forge/linux-64/multidict-6.7.1-py313h3dea7bd_0.conda sha256: 3d277c0a9e237dc4c64f0b6414f3cf3e95806b2f5d03dec9c50f0ad0db5b7df1 md5: 4f3e7bf5a9fc60a7d39047ba9e84c84c @@ -12615,15 +14966,15 @@ packages: - pkg:pypi/notebook-shim?source=hash-mapping size: 16817 timestamp: 1733408419340 -- conda: https://conda.anaconda.org/conda-forge/linux-64/numba-0.64.0-py313h5dce7c4_0.conda - sha256: 3c6e9f28b5d1d987041011c0b5a1052e730df6c682b47e8a7fd7b6f00040d562 - md5: 90caff1954fb5cacc0b3e75f5066ae7c +- conda: https://conda.anaconda.org/conda-forge/linux-64/numba-0.65.0-py313h5dce7c4_0.conda + sha256: fe66980a110c5fba2a1dcf840466f195094aa59a597636db88bd8d124f061ed8 + md5: 5a910c790b8e14b7464696ab893c3a81 depends: - __glibc >=2.17,<3.0.a0 - _openmp_mutex >=4.5 - libgcc >=14 - libstdcxx >=14 - - llvmlite >=0.46.0,<0.47.0a0 + - llvmlite >=0.47.0,<0.48.0a0 - numpy >=1.22.3,<2.5 - numpy >=1.23,<3 - python >=3.13,<3.14.0a0 @@ -12631,69 +14982,69 @@ packages: constrains: - cuda-version >=11.2 - scipy >=1.0 - - libopenblas !=0.3.6 + - cuda-python >=11.6 - cudatoolkit >=11.2 - tbb >=2021.6.0 - - cuda-python >=11.6 + - libopenblas !=0.3.6 license: BSD-2-Clause license_family: BSD purls: - pkg:pypi/numba?source=compressed-mapping - size: 5764718 - timestamp: 1772481814929 -- conda: https://conda.anaconda.org/conda-forge/osx-64/numba-0.64.0-py313h4fc6aae_0.conda - sha256: 05cf14b53d85c7bec07beeb644ff73ad53088c61d5b83fd4d2c9f5d0697d892e - md5: cf7b4f0e369bd47903c552a22a390955 + size: 5763220 + timestamp: 1775076099963 +- conda: https://conda.anaconda.org/conda-forge/osx-64/numba-0.65.0-py313h4fc6aae_0.conda + sha256: 5ec5b4cdf42c251a223d3acee8b1b1a44d67588dd8a611f01de92c4b1255262f + md5: 58f965ae65099d38010a3f11f1f6a379 depends: - __osx >=11.0 - libcxx >=19 - llvm-openmp >=19.1.7 - - llvm-openmp >=22.1.0 - - llvmlite >=0.46.0,<0.47.0a0 + - llvm-openmp >=22.1.2 + - llvmlite >=0.47.0,<0.48.0a0 - numpy >=1.22.3,<2.5 - numpy >=1.23,<3 - python >=3.13,<3.14.0a0 - python_abi 3.13.* *_cp313 constrains: - - cuda-version >=11.2 + - tbb >=2021.6.0 - libopenblas !=0.3.6 - - cuda-python >=11.6 - cudatoolkit >=11.2 - scipy >=1.0 - - tbb >=2021.6.0 + - cuda-python >=11.6 + - cuda-version >=11.2 license: BSD-2-Clause license_family: BSD purls: - - pkg:pypi/numba?source=hash-mapping - size: 5730801 - timestamp: 1772482474350 -- conda: https://conda.anaconda.org/conda-forge/osx-arm64/numba-0.64.0-py313h3ca053b_0.conda - sha256: a7d1456b321218d532d73a0d47ef97f42f8729b09d579ae1bed3a9645f5deb38 - md5: 8c539246e98c032220a3e7c0f48f04b8 + - pkg:pypi/numba?source=compressed-mapping + size: 5719465 + timestamp: 1775076792930 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/numba-0.65.0-py313h3ca053b_0.conda + sha256: fa48f39543892a6d47e24b87212ea5cdd957fbb874333664603a1416406c9be3 + md5: 417d74f768a702d947ce6b1b845572da depends: - __osx >=11.0 - libcxx >=19 - llvm-openmp >=19.1.7 - - llvm-openmp >=22.1.0 - - llvmlite >=0.46.0,<0.47.0a0 + - llvm-openmp >=22.1.2 + - llvmlite >=0.47.0,<0.48.0a0 - numpy >=1.22.3,<2.5 - numpy >=1.23,<3 - python >=3.13,<3.14.0a0 - python >=3.13,<3.14.0a0 *_cp313 - python_abi 3.13.* *_cp313 constrains: - - libopenblas >=0.3.18,!=0.3.20 - - cuda-python >=11.6 - - scipy >=1.0 - - cudatoolkit >=11.2 - cuda-version >=11.2 - tbb >=2021.6.0 + - libopenblas >=0.3.18,!=0.3.20 + - cudatoolkit >=11.2 + - scipy >=1.0 + - cuda-python >=11.6 license: BSD-2-Clause license_family: BSD purls: - - pkg:pypi/numba?source=hash-mapping - size: 5739127 - timestamp: 1772482294462 + - pkg:pypi/numba?source=compressed-mapping + size: 5725451 + timestamp: 1775076429950 - conda: https://conda.anaconda.org/conda-forge/linux-64/numexpr-2.14.1-py313h24ae7f9_101.conda sha256: 26917aa008b9753ec0e4658521ee6ef144414f49db65e2ce83fbf316914f318b md5: b7e46fb2704458afc67fb95773528967 @@ -12834,6 +15185,37 @@ packages: - pkg:pypi/oauthlib?source=hash-mapping size: 102059 timestamp: 1750415349440 +- conda: https://conda.anaconda.org/conda-forge/linux-64/oniguruma-6.9.10-hb9d3cd8_0.conda + sha256: bbff8a60f70d5ebab138b564554f28258472e1e63178614562d4feee29d10da2 + md5: 6ce853cb231f18576d2db5c2d4cb473e + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 + license: BSD-2-Clause + license_family: BSD + purls: [] + size: 248670 + timestamp: 1735727084819 +- conda: https://conda.anaconda.org/conda-forge/osx-64/oniguruma-6.9.10-h6e16a3a_0.conda + sha256: c8ecd1cb39e75677235daddc6ead10055a0ef66b2293118ed77adc621b2ffbcc + md5: 1de37bb098b5b39ad79027d1767b02dd + depends: + - __osx >=10.13 + license: BSD-2-Clause + license_family: BSD + purls: [] + size: 224022 + timestamp: 1735727100676 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/oniguruma-6.9.10-h5505292_0.conda + sha256: cedcd880e316240cbb35a1275990bfed1da36dba4a4f714edf95237f03d48665 + md5: 045afd0b8e35a71bfbe95345146592c4 + depends: + - __osx >=11.0 + license: BSD-2-Clause + license_family: BSD + purls: [] + size: 223354 + timestamp: 1735727101839 - conda: https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.4-h55fea9a_0.conda sha256: 3900f9f2dbbf4129cf3ad6acf4e4b6f7101390b53843591c53b00f034343bc4d md5: 11b3379b191f63139e29c0d19dee24cd @@ -13794,6 +16176,21 @@ packages: - pkg:pypi/platformdirs?source=compressed-mapping size: 25646 timestamp: 1773199142345 +- conda: https://conda.anaconda.org/conda-forge/noarch/plotly-6.6.0-pyhd8ed1ab_0.conda + sha256: c418d325359fc7a0074cea7f081ef1bce26e114d2da8a0154c5d27ecc87a08e7 + md5: 3e9427ee186846052e81fadde8ebe96a + depends: + - narwhals >=1.15.1 + - packaging + - python >=3.10 + constrains: + - ipywidgets >=7.6 + license: MIT + license_family: MIT + purls: + - pkg:pypi/plotly?source=compressed-mapping + size: 5251872 + timestamp: 1772628857717 - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.6.0-pyhf9edf01_1.conda sha256: e14aafa63efa0528ca99ba568eaf506eb55a0371d12e6250aaaa61718d2eb62e md5: d7585b6550ad04c8c5e21097ada2888e @@ -14818,6 +17215,19 @@ packages: - pkg:pypi/pyjwt?source=hash-mapping size: 32247 timestamp: 1773482160904 +- conda: https://conda.anaconda.org/conda-forge/noarch/pymdown-extensions-10.21.2-pyhd8ed1ab_0.conda + sha256: f0513cb5a1ad722025771e7c5a45fa163dfab965d608a82d058cde47abb02d48 + md5: 8e693ba2664eaa3306d7084642e2e4a2 + depends: + - markdown >=3.6 + - python >=3.10 + - pyyaml + license: MIT + license_family: MIT + purls: + - pkg:pypi/pymdown-extensions?source=compressed-mapping + size: 173156 + timestamp: 1774803071462 - conda: https://conda.anaconda.org/conda-forge/linux-64/pynacl-1.6.2-py313h5008379_1.conda sha256: 51e80a7bef95025ad47a92acb69ee0e78f01e107655c86fe76abcde2ac688166 md5: c4426edfc5514a2c9be6871557bce52b @@ -15059,6 +17469,30 @@ packages: - pkg:pypi/pyproj?source=hash-mapping size: 523924 timestamp: 1773003238662 +- conda: https://conda.anaconda.org/conda-forge/linux-64/pyside6-6.11.0-py313hcd51b16_1.conda + sha256: d1fee2b83e5ae3a9543054cde1e28b6520e23ceedac49e6e7b86535ce23d3345 + md5: aafbb057823020eda1e5db662bc354a9 + depends: + - python + - qt6-main 6.11.0.* + - libgcc >=14 + - libstdcxx >=14 + - __glibc >=2.17,<3.0.a0 + - qt6-main >=6.11.0,<6.12.0a0 + - python_abi 3.13.* *_cp313 + - libegl >=1.7.0,<2.0a0 + - libxslt >=1.1.43,<2.0a0 + - libgl >=1.7.0,<2.0a0 + - libopengl >=1.7.0,<2.0a0 + - libxml2 + - libxml2-16 >=2.14.6 + - libvulkan-loader >=1.4.341.0,<2.0a0 + - libclang13 >=21.1.8 + license: LGPL-3.0-only + purls: + - pkg:pypi/pyside6?source=compressed-mapping + size: 13208635 + timestamp: 1775055046353 - conda: https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda sha256: ba3b032fa52709ce0d9fd388f63d330a026754587a2f461117cac9ab73d8d0d8 md5: 461219d1a5bd61342293efa2c0c90eac @@ -15667,6 +18101,80 @@ packages: purls: [] size: 516376 timestamp: 1720814307311 +- conda: https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.11.0-pl5321h16c4a6b_2.conda + sha256: e83dabfeb6209c863a1555edad81b12b08a953a0d952359bf3f0be3250ab12ce + md5: c6ba2de6b22dedf2f20eba3bde1dbe8e + depends: + - libxcb + - xcb-util + - xcb-util-wm + - xcb-util-keysyms + - xcb-util-image + - xcb-util-renderutil + - xcb-util-cursor + - libgcc >=14 + - __glibc >=2.17,<3.0.a0 + - libstdcxx >=14 + - libllvm21 >=21.1.8,<21.2.0a0 + - zstd >=1.5.7,<1.6.0a0 + - libwebp-base >=1.6.0,<2.0a0 + - xorg-libxcomposite >=0.4.7,<1.0a0 + - double-conversion >=3.4.0,<3.5.0a0 + - pcre2 >=10.47,<10.48.0a0 + - xorg-libxrandr >=1.5.5,<2.0a0 + - libxkbcommon >=1.13.1,<2.0a0 + - libvulkan-loader >=1.4.341.0,<2.0a0 + - libzlib >=1.3.2,<2.0a0 + - libclang-cpp21.1 >=21.1.8,<21.2.0a0 + - xcb-util-keysyms >=0.4.1,<0.5.0a0 + - xcb-util-wm >=0.4.2,<0.5.0a0 + - xorg-libxxf86vm >=1.1.7,<2.0a0 + - icu >=78.3,<79.0a0 + - libxml2 + - libxml2-16 >=2.14.6 + - xcb-util-image >=0.4.0,<0.5.0a0 + - xcb-util-renderutil >=0.3.10,<0.4.0a0 + - libglib >=2.86.4,<3.0a0 + - libcups >=2.3.3,<2.4.0a0 + - libpq >=18.3,<19.0a0 + - xorg-libice >=1.1.2,<2.0a0 + - libgl >=1.7.0,<2.0a0 + - harfbuzz >=13.2.1 + - xorg-libxdamage >=1.1.6,<2.0a0 + - xorg-libsm >=1.2.6,<2.0a0 + - xorg-libxcursor >=1.2.3,<2.0a0 + - krb5 >=1.22.2,<1.23.0a0 + - dbus >=1.16.2,<2.0a0 + - libdrm >=2.4.125,<2.5.0a0 + - libjpeg-turbo >=3.1.2,<4.0a0 + - fontconfig >=2.17.1,<3.0a0 + - fonts-conda-ecosystem + - xcb-util-cursor >=0.1.6,<0.2.0a0 + - alsa-lib >=1.2.15.3,<1.3.0a0 + - libxcb >=1.17.0,<2.0a0 + - openssl >=3.5.5,<4.0a0 + - libegl >=1.7.0,<2.0a0 + - libpng >=1.6.56,<1.7.0a0 + - wayland >=1.25.0,<2.0a0 + - libbrotlicommon >=1.2.0,<1.3.0a0 + - libbrotlienc >=1.2.0,<1.3.0a0 + - libbrotlidec >=1.2.0,<1.3.0a0 + - libfreetype >=2.14.3 + - libfreetype6 >=2.14.3 + - xorg-libxext >=1.3.7,<2.0a0 + - xcb-util >=0.4.1,<0.5.0a0 + - xorg-libx11 >=1.8.13,<2.0a0 + - xorg-libxtst >=1.2.5,<2.0a0 + - libclang13 >=21.1.8 + - libsqlite >=3.52.0,<4.0a0 + - libtiff >=4.7.1,<4.8.0a0 + constrains: + - qt ==6.11.0 + license: LGPL-3.0-only + license_family: LGPL + purls: [] + size: 60993086 + timestamp: 1774634904948 - conda: https://conda.anaconda.org/conda-forge/noarch/querystring_parser-1.2.4-pyhd8ed1ab_2.conda sha256: 776af394dd2ff149d3df0568c02d7ec2c5350b6d8cb68f7c0250cf7b237c4099 md5: 57962900dda3df6b61e545023469c930 @@ -15973,6 +18481,43 @@ packages: - pkg:pypi/rich-toolkit?source=compressed-mapping size: 32484 timestamp: 1771977622605 +- conda: https://conda.anaconda.org/conda-forge/linux-64/ripgrep-15.1.0-hdab8a38_0.conda + sha256: a745b0d0ca5ae53757e42d893a61a5034ed2ad4791728e376dbc5c6a4f9c3eb0 + md5: 1f9739b74aab91a4c9c7aea7a6987dbb + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + constrains: + - __glibc >=2.17 + license: MIT + license_family: MIT + purls: [] + size: 1737412 + timestamp: 1761210874723 +- conda: https://conda.anaconda.org/conda-forge/osx-64/ripgrep-15.1.0-h009cd8f_0.conda + sha256: 2454a0ea20e28fea47b77191cbb59246e99bb4141092c17c5162aa0bf56d5a78 + md5: 42dbb90abf031307dadb43d76a43f2b5 + depends: + - __osx >=11.0 + constrains: + - __osx >=10.13 + license: MIT + license_family: MIT + purls: [] + size: 1591574 + timestamp: 1773824665697 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/ripgrep-15.1.0-h748bcf4_0.conda + sha256: cf467a20ee5d00536b06b08e502b2b780536a1627b5b44593d57dbd6c2aac393 + md5: 9e11fa21d0627ad52f2edfe90a363208 + depends: + - __osx >=11.0 + constrains: + - __osx >=11.0 + license: MIT + license_family: MIT + purls: [] + size: 1492744 + timestamp: 1773825226555 - conda: https://conda.anaconda.org/conda-forge/noarch/roman-numerals-4.1.0-pyhd8ed1ab_0.conda sha256: 30f3c04fcfb64c44d821d392a4a0b8915650dbd900c8befc20ade8fde8ec6aa2 md5: 0dc48b4b570931adc8641e55c6c17fe4 @@ -17356,6 +19901,58 @@ packages: - pkg:pypi/truststore?source=hash-mapping size: 24279 timestamp: 1766494826559 +- conda: https://conda.anaconda.org/conda-forge/linux-64/ty-0.0.27-h4e94fc0_0.conda + noarch: python + sha256: 59c76b8a7842375aca17bd6edd287fba2183e805883699dbdf3b75cde2db2cdd + md5: c881453ee50a7254c2c75480510788f6 + depends: + - python + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - _python_abi3_support 1.* + - cpython >=3.10 + constrains: + - __glibc >=2.17 + license: MIT + license_family: MIT + purls: + - pkg:pypi/ty?source=compressed-mapping + size: 9357690 + timestamp: 1775024964098 +- conda: https://conda.anaconda.org/conda-forge/osx-64/ty-0.0.27-h479939e_0.conda + noarch: python + sha256: bb199525eec613690adea21fa3b97d69e94f35048a498dcee6818fa26cecb788 + md5: 454253d22ee206b2ff88db32e340af64 + depends: + - python + - __osx >=11.0 + - _python_abi3_support 1.* + - cpython >=3.10 + constrains: + - __osx >=10.13 + license: MIT + license_family: MIT + purls: + - pkg:pypi/ty?source=hash-mapping + size: 9055315 + timestamp: 1775025029340 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/ty-0.0.27-hdfcc030_0.conda + noarch: python + sha256: e67c690200dc808f99d0bb6d61ffb61c2a94bde4c8174b2116c2c2b2172760b6 + md5: 322449902a1690b22ef65d5be43f50ab + depends: + - python + - __osx >=11.0 + - _python_abi3_support 1.* + - cpython >=3.10 + constrains: + - __osx >=11.0 + license: MIT + license_family: MIT + purls: + - pkg:pypi/ty?source=compressed-mapping + size: 8453321 + timestamp: 1775025004282 - conda: https://conda.anaconda.org/conda-forge/noarch/typeguard-4.5.1-pyhd8ed1ab_0.conda sha256: 39d8ae33c43cdb8f771373e149b0b4fae5a08960ac58dcca95b2f1642bb17448 md5: 260af1b0a94f719de76b4e14094e9a3b @@ -17994,6 +20591,78 @@ packages: - pkg:pypi/wrapt?source=hash-mapping size: 85132 timestamp: 1772795528446 +- conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.1-h4f16b4b_2.conda + sha256: ad8cab7e07e2af268449c2ce855cbb51f43f4664936eff679b1f3862e6e4b01d + md5: fdc27cb255a7a2cc73b7919a968b48f0 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 + - libxcb >=1.17.0,<2.0a0 + license: MIT + license_family: MIT + purls: [] + size: 20772 + timestamp: 1750436796633 +- conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-cursor-0.1.6-hb03c661_0.conda + sha256: c2be9cae786fdb2df7c2387d2db31b285cf90ab3bfabda8fa75a596c3d20fc67 + md5: 4d1fc190b99912ed557a8236e958c559 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libxcb >=1.13 + - libxcb >=1.17.0,<2.0a0 + - xcb-util-image >=0.4.0,<0.5.0a0 + - xcb-util-renderutil >=0.3.10,<0.4.0a0 + license: MIT + license_family: MIT + purls: [] + size: 20829 + timestamp: 1763366954390 +- conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-hb711507_2.conda + sha256: 94b12ff8b30260d9de4fd7a28cca12e028e572cbc504fd42aa2646ec4a5bded7 + md5: a0901183f08b6c7107aab109733a3c91 + depends: + - libgcc-ng >=12 + - libxcb >=1.16,<2.0.0a0 + - xcb-util >=0.4.1,<0.5.0a0 + license: MIT + license_family: MIT + purls: [] + size: 24551 + timestamp: 1718880534789 +- conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.1-hb711507_0.conda + sha256: 546e3ee01e95a4c884b6401284bb22da449a2f4daf508d038fdfa0712fe4cc69 + md5: ad748ccca349aec3e91743e08b5e2b50 + depends: + - libgcc-ng >=12 + - libxcb >=1.16,<2.0.0a0 + license: MIT + license_family: MIT + purls: [] + size: 14314 + timestamp: 1718846569232 +- conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.10-hb711507_0.conda + sha256: 2d401dadc43855971ce008344a4b5bd804aca9487d8ebd83328592217daca3df + md5: 0e0cbe0564d03a99afd5fd7b362feecd + depends: + - libgcc-ng >=12 + - libxcb >=1.16,<2.0.0a0 + license: MIT + license_family: MIT + purls: [] + size: 16978 + timestamp: 1718848865819 +- conda: https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.2-hb711507_0.conda + sha256: 31d44f297ad87a1e6510895740325a635dd204556aa7e079194a0034cdd7e66a + md5: 608e0ef8256b81d04456e8d211eee3e8 + depends: + - libgcc-ng >=12 + - libxcb >=1.16,<2.0.0a0 + license: MIT + license_family: MIT + purls: [] + size: 51689 + timestamp: 1718844051451 - conda: https://conda.anaconda.org/conda-forge/linux-64/xerces-c-3.3.0-hd9031aa_1.conda sha256: 605980121ad3ee9393a9b53fb0996929c9732f8fc6b9f796d25244ca6fa23032 md5: 66a1db55ecdb7377d2b91f54cd56eafa diff --git a/pyproject.toml b/pyproject.toml index d00f7a4d12..d58ccd0b6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -179,6 +179,20 @@ zip = ">=3.0" "catalystcoop.pudl" = { path = ".", editable = true } detect-secrets = ">=1.5" +[tool.pixi.feature.dev.dependencies] +jq = ">=1.8" # Primarily for agent skills use +marimo = ">=0.21" +matplotx = ">=0.3.10" +plotly = ">=6.6" +ripgrep = ">=15" # Primarily for agent skills use +ty = ">=0.0.25" # Experimental, also for agent skills use + +[tool.pixi.environments] +dev = { features = ["dev"], solve-group = "default" } + +######################################################################################## +# Conda package build configuration. This is used by pixi to build the conda package +######################################################################################## [tool.pixi.package] version = "v0.0.0" @@ -191,9 +205,9 @@ channels = ["conda-forge"] python = ">=3.13,<3.14.0" [tool.pixi.package.host-dependencies] -hatchling = ">=1.16" +hatchling = ">=1.29" hatch-vcs = ">=0.5" -uv = ">=0.10" +uv = ">=0.11" # This is a list of the minimal set of run-time dependencies required for PUDL to # be installed and function properly. These should be kept in sync with the more @@ -258,11 +272,19 @@ universal-pathlib = ">=0.3.10" urllib3 = ">=2.4" xlsxwriter = ">=3.2" +######################################################################################## +######################################################################################## + [tool.pixi.activation.env] # Reusable variables for pixi tasks -ETL_FAST_YML = "src/pudl/package_data/settings/etl_fast.yml" -ETL_FULL_YML = "src/pudl/package_data/settings/etl_full.yml" - +DG_FULL_CONFIG = "src/pudl/package_data/settings/dg_full.yml" +DG_NIGHTLY_CONFIG = "src/pudl/package_data/settings/dg_nightly.yml" + +######################################################################################## +# Pixi task definitions. These are used by pixi to run various tasks in a consistent +# environment. They can be run with `pixi run `, and can depend on each other +# to create complex workflows. See the pixi documentation for more details: +######################################################################################## [tool.pixi.tasks] # Documentation tasks [tool.pixi.tasks.docs-clean] @@ -332,8 +354,29 @@ cmd = "pytest --cov-fail-under=0 --doctest-modules src/pudl test/unit" description = "Run unit tests and doctests" [tool.pixi.tasks.pytest-integration] -cmd = "pytest --cov-fail-under=0 --etl-settings $ETL_FAST_YML test/integration" -description = "Run integration tests with fast ETL settings" +cmd = "pytest --cov-fail-under=0 test/integration" +description = "Run integration tests with the default settings" + +[tool.pixi.tasks.pytest-unit-nightly] +cmd = "pytest -n auto --no-cov --dg-config $DG_NIGHTLY_CONFIG test/unit" +description = "Run nightly-build unit tests with the nightly Dagster config" + +[tool.pixi.tasks.pytest-integration-nightly] +cmd = "pytest -n auto --no-cov --live-pudl-output --dg-config $DG_NIGHTLY_CONFIG --ignore=test/integration/data_validation_test.py test/integration" +description = "Run nightly-build integration tests against live nightly-style outputs" + +[tool.pixi.tasks.pytest-data-validation-nightly] +cmd = "pytest -n auto --no-cov --live-pudl-output --dg-config $DG_NIGHTLY_CONFIG test/integration/data_validation_test.py" +description = "Run nightly-build data validation tests against live nightly-style outputs" + +[tool.pixi.tasks.pytest-nightly] +cmd = "true" +depends-on = [ + "pytest-unit-nightly", + "pytest-integration-nightly", + "pytest-data-validation-nightly", +] +description = "Run the nightly-build pytest stages using the nightly Dagster config" [tool.pixi.tasks.pytest-ci] cmd = "coverage report" @@ -345,46 +388,64 @@ depends-on = [ ] description = "Run all CI tests and report test coverage" -[tool.pixi.tasks.pytest-integration-full] -cmd = "pytest --live-dbs --no-cov --etl-settings $ETL_FULL_YML test/integration" -description = "Run integration tests against local full ETL outputs" - [tool.pixi.tasks.pytest-jupyter] -cmd = "pytest --live-dbs test/integration/jupyter_notebooks_test.py" +cmd = "pytest --live-pudl-output test/integration/jupyter_notebooks_test.py" description = "Test Jupyter notebooks" - -# Build the FERC 1 and PUDL DBs, ignoring foreign key constraints. +# Build the FERC 1 and PUDL DBs. # Identify any plant or utility IDs in the DBs that haven't yet been mapped # These outputs should be turned into normal dagster assets. See issue #4338 [tool.pixi.tasks.unmapped-ids] cmd = """ pytest \ --save-unmapped-ids - --ignore-foreign-key-constraints \ - --etl-settings $ETL_FULL_YML \ + --dg-config $DG_FULL_CONFIG \ test/integration/glue_test.py """ description = "Identify unmapped plant and utility IDs" -# ETL tasks -[tool.pixi.tasks.ferc] +# Dagster tasks +[tool.pixi.tasks.ferc-to-sqlite-clean] cmd = """ bash -c 'shopt -s nullglob; rm -f \ $PUDL_OUTPUT/ferc*.sqlite \ $PUDL_OUTPUT/ferc*.duckdb \ $PUDL_OUTPUT/ferc*_xbrl_datapackage.json \ - $PUDL_OUTPUT/ferc*_xbrl_taxonomy_metadata.json' && \ -coverage run --append ferc_to_sqlite $ETL_FULL_YML + $PUDL_OUTPUT/ferc*_xbrl_taxonomy_metadata.json' """ +description = "Remove raw FERC SQLite databases and related outputs" + +[tool.pixi.tasks.ferc-to-sqlite] +cmd = "dg launch --job ferc_to_sqlite --config $DG_FULL_CONFIG" +depends-on = ["ferc-to-sqlite-clean"] description = "Build FERC databases from scratch" +[tool.pixi.tasks.pudl-clean] +cmd = "bash -c 'shopt -s nullglob; rm -f $PUDL_OUTPUT/pudl.sqlite'" +description = "Remove PUDL SQLite database" + [tool.pixi.tasks.pudl] cmd = """ -bash -c 'shopt -s nullglob; rm -f $PUDL_OUTPUT/pudl.sqlite' && \ alembic upgrade head && \ -coverage run --append pudl_etl $ETL_FULL_YML +dg launch --job pudl --config $DG_FULL_CONFIG +""" +depends-on = ["pudl-clean"] +description = "Run the full PUDL ETL without rebuilding raw FERC SQLite databases" + +[tool.pixi.tasks.pudl-with-ferc-to-sqlite] +cmd = """ +alembic upgrade head && \ +dg launch --job pudl_with_ferc_to_sqlite --config $DG_FULL_CONFIG """ -description = "Run full PUDL ETL pipeline" +depends-on = ["ferc-to-sqlite-clean", "pudl-clean"] +description = "Run the full PUDL ETL including rebuilding raw FERC SQLite databases" + +[tool.pixi.tasks.pudl-with-ferc-to-sqlite-nightly] +cmd = """ +alembic upgrade head && \ +dg launch --job pudl_with_ferc_to_sqlite --config $DG_NIGHTLY_CONFIG +""" +depends-on = ["ferc-to-sqlite-clean", "pudl-clean"] +description = "Run the full PUDL ETL (including FERC) using the nightly Dagster config" # Code quality tasks [tool.pixi.tasks.prek-run] @@ -416,12 +477,10 @@ description = "Install skills from lockfile" [project.scripts] auto_match_utilities = "pudl.scripts.auto_match_utilities:main" -ferc_to_sqlite = "pudl.ferc_to_sqlite.cli:main" metadata_to_rst = "pudl.convert.metadata_to_rst:metadata_to_rst" pudl_check_fks = "pudl.etl.check_foreign_keys:pudl_check_fks" pudl_datastore = "pudl.workspace.datastore:pudl_datastore" pudl_deploy = "pudl.scripts.deploy:pudl_deploy" -pudl_etl = "pudl.etl.cli:pudl_etl" pudl_service_territories = "pudl.analysis.service_territory:pudl_service_territories" dbt_helper = "pudl.scripts.dbt_helper:dbt_helper" generate_pudl_duckdb = "pudl.scripts.generate_pudl_duckdb:main" @@ -614,10 +673,7 @@ exclude_lines = [ ] [tool.dagster] -modules = [ - { type = "module", name = "pudl.etl" }, - { type = "module", name = "pudl.ferc_to_sqlite" }, -] +modules = [{ type = "module", name = "pudl.etl" }] [tool.dg] directory_type = "project" @@ -625,7 +681,7 @@ directory_type = "project" [tool.dg.project] root_module = "pudl" code_location_target_module = "pudl.definitions" -registry_modules = ["pudl.etl", "pudl.ferc_to_sqlite"] +registry_modules = ["pudl.etl"] [tool.typos.default] # Ignore spellchecking all lines with # spellchecker:ignore @@ -672,6 +728,8 @@ extend-exclude = [ "*.bib", "*.csv", "*.html", + "*.json", + ".github/skills/pudl-dev/references/datapackage.schema.json", "src/pudl/metadata/codes.py", "migrations/", "notebooks/", @@ -680,3 +738,9 @@ extend-exclude = [ "src/pudl/transform/params/ferc1.py", "src/pudl/package_data/ferc1/*_categories.yaml", ] + +[tool.ty.environment] +python = ".pixi/envs/default/bin/python" +python-version = "3.13" +python-platform = "all" +root = ["./src"] diff --git a/skills-lock.json b/skills-lock.json index 4d1a06bd9c..af90233614 100644 --- a/skills-lock.json +++ b/skills-lock.json @@ -4,7 +4,7 @@ "dagster-expert": { "source": "dagster-io/skills", "sourceType": "github", - "computedHash": "dc32f5b0fe71cd56f39cb56382d2c6cfb89b212241d17204db302952f372c3ea" + "computedHash": "0e7d57afa353697147cf056011fb8b0eb8263dbfac96640beaf549b4c9c37532" }, "dignified-python": { "source": "dagster-io/skills", diff --git a/src/pudl/__init__.py b/src/pudl/__init__.py index 52b7f632f3..eb40a35ce8 100644 --- a/src/pudl/__init__.py +++ b/src/pudl/__init__.py @@ -1,12 +1,21 @@ """The Public Utility Data Liberation (PUDL) Project.""" +import warnings + +from dagster import PreviewWarning + from pudl.logging_helpers import configure_root_logger -from . import ( +warnings.filterwarnings( + action="once", + message=r"Specifying a partitions_def on an AssetCheckSpec is currently in preview.*", + category=PreviewWarning, +) + +from . import ( # noqa: E402 analysis, convert, extract, - ferc_to_sqlite, glue, helpers, io_managers, diff --git a/src/pudl/__main__.py b/src/pudl/__main__.py deleted file mode 100644 index 8f238a831d..0000000000 --- a/src/pudl/__main__.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Entrypoint module for the PUDL ETL script.""" - -import sys - -from pudl.etl.cli import pudl_etl - -if __name__ == "__main__": - sys.exit(pudl_etl()) diff --git a/src/pudl/analysis/timeseries_cleaning.py b/src/pudl/analysis/timeseries_cleaning.py index d300401b76..752c8414ae 100644 --- a/src/pudl/analysis/timeseries_cleaning.py +++ b/src/pudl/analysis/timeseries_cleaning.py @@ -2011,7 +2011,7 @@ def _flag_timeseries_matrix( ) @asset( - required_resource_keys={"dataset_settings"}, + required_resource_keys={"etl_settings"}, ins={ "matrix": AssetIn(cleaned_timeseries_matrix_asset), "flags": AssetIn(flags_asset), @@ -2118,7 +2118,7 @@ def _simulate_flags( ) @asset( - required_resource_keys={"dataset_settings"}, + required_resource_keys={"etl_settings"}, ins={ "aligned_df": AssetIn(aligned_input_asset), "matrix": AssetIn(simulated_timeseries_matrix_asset), diff --git a/src/pudl/dbt_wrapper.py b/src/pudl/dbt_wrapper.py index de0d0fe56a..c414f5d30d 100644 --- a/src/pudl/dbt_wrapper.py +++ b/src/pudl/dbt_wrapper.py @@ -44,6 +44,17 @@ def format_failure_contexts(self) -> str: return "\n=====\n".join(ctx.pretty_print() for ctx in self.failure_contexts) +def install_dbt_deps(dbt: dbtRunner | None = None) -> dbtRunner: + """Ensure dbt package dependencies are installed in the project directory.""" + if dbt is None: + dbt = dbtRunner() + + with chdir(DBT_DIR): + dbt.invoke(["deps"]) + + return dbt + + def __get_failed_nodes(results: RunExecutionResult) -> list[GenericTestNode]: """Get test node output from tests that failed.""" return [res.node for res in results if res.status == TestStatus.Fail] @@ -132,10 +143,9 @@ def build_with_context( cli_args = ["--target", dbt_target, "--select", node_selection] if node_exclusion is not None: cli_args += ["--exclude", node_exclusion] - dbt = dbtRunner() + dbt = install_dbt_deps() with chdir(DBT_DIR): - dbt.invoke(["deps"]) dbt.invoke(["seed"]) build_output: dbtRunnerResult = dbt.invoke(["build"] + cli_args) build_results = cast(RunExecutionResult, build_output.result) diff --git a/src/pudl/definitions.py b/src/pudl/definitions.py index 6409329ffe..632f0fcdd6 100644 --- a/src/pudl/definitions.py +++ b/src/pudl/definitions.py @@ -9,9 +9,5 @@ import dagster as dg import pudl.etl -import pudl.ferc_to_sqlite -defs: dg.Definitions = dg.Definitions.merge( - pudl.etl.defs, - pudl.ferc_to_sqlite.defs, -) +defs: dg.Definitions = pudl.etl.defs diff --git a/src/pudl/deploy/ferceqr.py b/src/pudl/deploy/ferceqr.py index cda00adc34..2eb9b2a09c 100644 --- a/src/pudl/deploy/ferceqr.py +++ b/src/pudl/deploy/ferceqr.py @@ -1,4 +1,4 @@ -"""Define tooling for monitoring the ferceqr_etl job during batch builds. +"""Define tooling for monitoring the ferceqr job during batch builds. In this module we define a Dagster Sensor that will monitor the status of a ``ferceqr`` backfill. This sensor will only run if the environment variable diff --git a/src/pudl/etl/__init__.py b/src/pudl/etl/__init__.py index b35d45ca3c..b1b631db77 100644 --- a/src/pudl/etl/__init__.py +++ b/src/pudl/etl/__init__.py @@ -3,13 +3,18 @@ import importlib.resources import itertools import os +from collections.abc import Mapping, Sequence +from typing import Any import dagster as dg import pudl +from pudl.analysis.ml_tools import get_ml_models_config from pudl.deploy import ferceqr from pudl.etl.asset_checks import asset_check_from_schema from pudl.io_managers import ( + FercDbfSQLiteConfigurableIOManager, + FercXbrlSQLiteConfigurableIOManager, ferc1_dbf_sqlite_io_manager, ferc1_xbrl_sqlite_io_manager, ferc714_xbrl_sqlite_io_manager, @@ -19,14 +24,16 @@ ) from pudl.metadata import PUDL_PACKAGE from pudl.resources import ( - dataset_settings, + RuntimeSettings, datastore, - ferc_to_sqlite_settings, + etl_settings, + zenodo_dois, ) -from pudl.settings import EtlSettings +from pudl.settings import load_packaged_etl_settings from . import ( eia_bulk_elec_assets, + ferc_to_sqlite_assets, glue_assets, static_assets, ) @@ -34,6 +41,7 @@ logger = pudl.logging_helpers.get_logger(__name__) raw_module_groups = { + "raw_ferc_to_sqlite": [ferc_to_sqlite_assets], "raw_censuspep": [pudl.extract.censuspep], "raw_eia176": [pudl.extract.eia176], "raw_eia191": [pudl.extract.eia191], @@ -198,16 +206,17 @@ def _get_keys_from_assets( default_resources = { "datastore": datastore, + "zenodo_dois": zenodo_dois, "pudl_io_manager": pudl_mixed_format_io_manager, "ferc1_dbf_sqlite_io_manager": ferc1_dbf_sqlite_io_manager, "ferc1_xbrl_sqlite_io_manager": ferc1_xbrl_sqlite_io_manager, "ferc714_xbrl_sqlite_io_manager": ferc714_xbrl_sqlite_io_manager, - "dataset_settings": dataset_settings, - "ferc_to_sqlite_settings": ferc_to_sqlite_settings, + "etl_settings": etl_settings, + "runtime_settings": RuntimeSettings(), "parquet_io_manager": parquet_io_manager, "geoparquet_io_manager": geoparquet_io_manager, "ferceqr_extract_settings": pudl.extract.ferceqr.ExtractSettings( - archive=os.getenv( # Default to read directly from GCS if local path not specified + ferceqr_archive_uri=os.getenv( # Default to read directly from GCS if local path not specified "FERCEQR_ARCHIVE_PATH", "gs://archives.catalyst.coop/ferceqr/published" ) ), @@ -223,72 +232,162 @@ def _get_keys_from_assets( }, ] +default_execution_config = { + "execution": { + "config": { + "multiprocess": { + "max_concurrent": 0, + "tag_concurrency_limits": default_tag_concurrency_limits, + }, + }, + }, +} +default_pudl_job_config = default_execution_config | get_ml_models_config() -default_config = pudl.helpers.get_dagster_execution_config( - tag_concurrency_limits=default_tag_concurrency_limits -) -default_config |= pudl.analysis.ml_tools.get_ml_models_config() - - -def load_dataset_settings_from_file(setting_filename: str) -> dict: - """Load dataset settings from a settings file in `pudl.package_data.settings`. - Args: - setting_filename: name of settings file. +def load_etl_run_config_from_file(setting_filename: str) -> dict: + """Load ETL run config from a packaged settings profile. - Returns: - Dictionary of dataset settings. + The settings file path is resolved via ``importlib.resources`` so the config + works correctly regardless of the current working directory. """ - dataset_settings = EtlSettings.from_yaml( + etl_settings = load_packaged_etl_settings(setting_filename) + if etl_settings.ferc_to_sqlite_settings is None: + raise ValueError("Missing ferc_to_sqlite_settings in ETL settings file.") + + etl_settings_path = str( importlib.resources.files("pudl.package_data.settings") / f"{setting_filename}.yml" - ).datasets.model_dump() - - return dataset_settings - - -defs: dg.Definitions = dg.Definitions( - assets=default_assets, - asset_checks=default_asset_checks, - resources=default_resources, - jobs=[ - dg.define_asset_job( - name="etl_full", - description="This job executes all years of all assets.", - config=default_config - | { - "resources": { - "dataset_settings": { - "config": load_dataset_settings_from_file("etl_full") - } - } + ) + + return { + "resources": { + "etl_settings": {"config": {"etl_settings_path": etl_settings_path}}, + "runtime_settings": { + "config": {}, }, - selection=dg.AssetSelection.all() - - dg.AssetSelection.groups("raw_ferceqr", "core_ferceqr"), + } + } + + +default_jobs = [ + dg.define_asset_job( + name="pudl", + description=( + "This job executes the main PUDL ETL without refreshing the FERC-to-SQLite " + "prerequisites." ), - dg.define_asset_job( - name="etl_fast", - config=default_config - | { - "resources": { - "dataset_settings": { - "config": load_dataset_settings_from_file("etl_fast") - } - } - }, - description="This job executes the most recent year of each asset.", - selection=dg.AssetSelection.all() - - dg.AssetSelection.groups("raw_ferceqr", "core_ferceqr"), + config=default_pudl_job_config | load_etl_run_config_from_file("etl_full"), + selection=dg.AssetSelection.all() + - dg.AssetSelection.groups( + "raw_ferc_to_sqlite", + "raw_ferceqr", + "core_ferceqr", ), - dg.define_asset_job( - name="ferceqr_etl", - description="This job executes the ferceqr ETL.", - config=pudl.helpers.get_dagster_execution_config( - tag_concurrency_limits=default_tag_concurrency_limits - ), - selection=dg.AssetSelection.groups("raw_ferceqr", "core_ferceqr"), + ), + dg.define_asset_job( + name="ferc_to_sqlite", + description="This job refreshes the FERC-to-SQLite prerequisite assets only.", + config=default_execution_config | load_etl_run_config_from_file("etl_full"), + selection=dg.AssetSelection.groups("raw_ferc_to_sqlite"), + ), + dg.define_asset_job( + name="pudl_with_ferc_to_sqlite", + description=( + "This job executes the main PUDL ETL including the FERC-to-SQLite " + "prerequisites (default: full settings profile)." ), - ], - sensors=[ferceqr.ferceqr_sensor], -) + config=default_pudl_job_config | load_etl_run_config_from_file("etl_full"), + selection=dg.AssetSelection.all() + - dg.AssetSelection.groups("raw_ferceqr", "core_ferceqr"), + ), + dg.define_asset_job( + name="ferceqr", + description="This job processes the FERC EQR data.", + config={ + "execution": { + "config": { + "multiprocess": { + "max_concurrent": 0, + "tag_concurrency_limits": default_tag_concurrency_limits, + }, + }, + }, + }, + selection=dg.AssetSelection.groups("raw_ferceqr", "core_ferceqr"), + ), +] + +default_sensors = [ferceqr.ferceqr_sensor] + + +def build_defs( + *, + resource_overrides: Mapping[str, Any] | None = None, + asset_overrides: Sequence[Any] | None = None, + asset_check_overrides: Sequence[dg.AssetChecksDefinition] | None = None, + job_overrides: Sequence[Any] | None = None, + sensor_overrides: Sequence[dg.SensorDefinition] | None = None, +) -> dg.Definitions: + """Build a fresh PUDL ``Definitions`` object with optional overrides. + + This allows tests and other callers to reuse the canonical ETL asset graph while + supplying concrete resources or a narrowed set of definitions without mutating the + module-level production ``defs`` object. + """ + resources = dict(default_resources) + if resource_overrides: + resources.update(resource_overrides) + + etl_settings_override = resource_overrides.get("etl_settings") + zenodo_dois_override = resources["zenodo_dois"] + + # Temporary workaround for nested resource dependencies in the FERC IO managers. + # These IO managers embed ``etl_settings`` as a Dagster resource dependency at + # instantiation time, so overriding the top-level ``etl_settings`` resource alone + # is not sufficient — the IO managers must be rebuilt against the same resource + # instance. A follow-up PR will remove the nested dependency from the IO managers, + # after which this rebuild block can be deleted. See Issue #5118. + if etl_settings_override is not None: + if "ferc1_dbf_sqlite_io_manager" not in resource_overrides: + resources["ferc1_dbf_sqlite_io_manager"] = ( + FercDbfSQLiteConfigurableIOManager( + etl_settings=etl_settings_override, + zenodo_dois=zenodo_dois_override, + db_name=ferc1_dbf_sqlite_io_manager.db_name, + ) + ) + + if "ferc1_xbrl_sqlite_io_manager" not in resource_overrides: + resources["ferc1_xbrl_sqlite_io_manager"] = ( + FercXbrlSQLiteConfigurableIOManager( + etl_settings=etl_settings_override, + zenodo_dois=zenodo_dois_override, + db_name=ferc1_xbrl_sqlite_io_manager.db_name, + ) + ) + + if "ferc714_xbrl_sqlite_io_manager" not in resource_overrides: + resources["ferc714_xbrl_sqlite_io_manager"] = ( + FercXbrlSQLiteConfigurableIOManager( + etl_settings=etl_settings_override, + zenodo_dois=zenodo_dois_override, + db_name=ferc714_xbrl_sqlite_io_manager.db_name, + ) + ) + + return dg.Definitions( + assets=list(default_assets if asset_overrides is None else asset_overrides), + asset_checks=list( + default_asset_checks + if asset_check_overrides is None + else asset_check_overrides + ), + resources=resources, + jobs=list(default_jobs if job_overrides is None else job_overrides), + sensors=list(default_sensors if sensor_overrides is None else sensor_overrides), + ) + + +defs: dg.Definitions = build_defs() """A collection of dagster assets, resources, IO managers, and jobs for the PUDL ETL.""" diff --git a/src/pudl/etl/cli.py b/src/pudl/etl/cli.py deleted file mode 100644 index 93d28cffa0..0000000000 --- a/src/pudl/etl/cli.py +++ /dev/null @@ -1,183 +0,0 @@ -"""A command line interface (CLI) to the main PUDL ETL functionality.""" - -import pathlib -import sys -from collections.abc import Callable - -import click -import fsspec -from dagster import ( - DagsterInstance, - JobDefinition, - build_reconstructable_job, - execute_job, -) - -import pudl -from pudl.etl import defs -from pudl.helpers import get_dagster_execution_config -from pudl.settings import EtlSettings -from pudl.workspace.setup import PudlPaths - -logger = pudl.logging_helpers.get_logger(__name__) - - -def pudl_etl_job_factory( - logfile: str | None = None, - loglevel: str = "INFO", - base_job: str = "etl_full", -) -> Callable[[], JobDefinition]: - """Factory for parameterizing a reconstructable pudl_etl job. - - Args: - loglevel: The log level for the job's execution. - logfile: Path to a log file for the job's execution. - base_job: Name of the Dagster ETL job to execute. - - Returns: - The job definition to be executed. - """ - - def get_pudl_etl_job(): - """Create an pudl_etl_job wrapped by to be wrapped by reconstructable.""" - pudl.logging_helpers.configure_root_logger(logfile=logfile, loglevel=loglevel) - return defs.get_job_def(base_job) - - return get_pudl_etl_job - - -@click.command( - context_settings={"help_option_names": ["-h", "--help"]}, -) -@click.argument( - "etl_settings_yml", - type=click.Path( - exists=True, - dir_okay=False, - resolve_path=True, - path_type=pathlib.Path, - ), -) -@click.option( - "--dagster-workers", - default=0, - type=int, - help="Max number of processes Dagster can launch. Defaults to the number of CPUs.", -) -@click.option( - "--cloud-cache-path", - type=str, - default="s3://pudl.catalyst.coop/zenodo", - help=( - "Load cached inputs from cloud object storage (S3 or GCS). This is typically " - "much faster and more reliable than downloading from Zenodo directly. By " - "default we read from the cache in PUDL's free, public AWS Open Data Registry " - "bucket." - ), -) -@click.option( - "--logfile", - help="If specified, write logs to this file.", - type=click.Path( - exists=False, - resolve_path=True, - path_type=pathlib.Path, - ), -) -@click.option( - "--loglevel", - default="INFO", - type=click.Choice( - ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False - ), -) -def pudl_etl( - etl_settings_yml: pathlib.Path, - dagster_workers: int, - cloud_cache_path: str, - logfile: pathlib.Path | None, - loglevel: str, -): - """Use Dagster to run the PUDL ETL, as specified by the file ETL_SETTINGS_YML.""" - # Display logged output from the PUDL package: - logfile_str = str(logfile) if logfile is not None else None - pudl.logging_helpers.configure_root_logger(logfile=logfile_str, loglevel=loglevel) - etl_settings = EtlSettings.from_yaml(str(etl_settings_yml)) - if etl_settings.datasets is None: - raise click.BadParameter( - "No datasets were configured in the ETL settings file.", - param_hint="etl_settings_yml", - ) - - if etl_settings.datasets.epacems is None or etl_settings.datasets.epacems.disabled: - raise click.BadParameter( - "EPA CEMS is now always included in the ETL. " - "Set datasets.epacems with disabled: false in your ETL settings file.", - param_hint="etl_settings_yml", - ) - - dataset_settings_config = etl_settings.datasets.model_dump() - - pudl_etl_reconstructable_job = build_reconstructable_job( - "pudl.etl.cli", - "pudl_etl_job_factory", - reconstructable_kwargs={ - "loglevel": loglevel, - "logfile": logfile_str, - }, - ) - run_config = { - "resources": { - "dataset_settings": {"config": dataset_settings_config}, - "datastore": { - "config": { - "cloud_cache_path": cloud_cache_path, - }, - }, - }, - } - - # Limit the number of concurrent workers when launch assets that use a lot of memory. - tag_concurrency_limits = [ - { - "key": "memory-use", - "value": "high", - "limit": 4, - }, - ] - - run_config.update( - get_dagster_execution_config( - num_workers=dagster_workers, - tag_concurrency_limits=tag_concurrency_limits, - ) - ) - - result = execute_job( - pudl_etl_reconstructable_job, - instance=DagsterInstance.get(), - run_config=run_config, - ) - - # Workaround to reliably getting full stack trace - if not result.success: - for event in result.all_events: - if event.event_type_value == "STEP_FAILURE": - event_error = getattr(event.event_specific_data, "error", None) - if event_error is not None: - raise Exception(event_error) - raise Exception("ETL failed but no step error details were available.") - else: - logger.info("ETL job completed successfully, publishing outputs.") - for output_path in etl_settings.publish_destinations: - logger.info(f"Publishing outputs to {output_path}") - fs, _, _ = fsspec.get_fs_token_paths(output_path) - fs.put( - PudlPaths().output_dir, # type: ignore[call-arg] - output_path, - recursive=True, - ) - - -if __name__ == "__main__": - sys.exit(pudl_etl()) diff --git a/src/pudl/etl/ferc_to_sqlite_assets.py b/src/pudl/etl/ferc_to_sqlite_assets.py new file mode 100644 index 0000000000..bbdeffcbcb --- /dev/null +++ b/src/pudl/etl/ferc_to_sqlite_assets.py @@ -0,0 +1,159 @@ +"""Dagster asset definitions for granular FERC-to-SQLite extraction.""" + +import dagster as dg + +import pudl +from pudl.extract.ferc import ( + Ferc1DbfExtractor, + Ferc2DbfExtractor, + Ferc6DbfExtractor, + Ferc60DbfExtractor, +) +from pudl.extract.xbrl import FercXbrlDatastore, convert_form +from pudl.ferc_sqlite_provenance import build_ferc_sqlite_provenance_metadata +from pudl.settings import XbrlFormNumber +from pudl.workspace.setup import PudlPaths + +logger = pudl.logging_helpers.get_logger(__name__) + + +def dbf_to_sqlite_asset_factory( + *, key: dg.AssetKey, dataset: str, extractor_class +) -> dg.AssetsDefinition: + """Create a DBF-to-SQLite prerequisite asset for a specific FERC dataset.""" + + @dg.asset( + key=key, + group_name="raw_ferc_to_sqlite", + required_resource_keys={ + "etl_settings", + "datastore", + "runtime_settings", + "zenodo_dois", + }, + tags={"dataset": dataset, "data_format": "dbf"}, + ) + def _asset(context) -> dg.MaterializeResult[str]: + extractor_class( + datastore=context.resources.datastore, + settings=context.resources.etl_settings.ferc_to_sqlite, + output_path=PudlPaths().output_dir, + ).execute() + return dg.MaterializeResult( + value="complete", + metadata=build_ferc_sqlite_provenance_metadata( + db_name=f"{dataset}_dbf", + etl_settings=context.resources.etl_settings, + zenodo_dois=context.resources.zenodo_dois, + sqlite_path=PudlPaths().sqlite_db_path(f"{dataset}_dbf"), + status="complete", + ), + ) + + return _asset + + +def xbrl_to_sqlite_asset_factory( + *, key: dg.AssetKey, form: XbrlFormNumber +) -> dg.AssetsDefinition: + """Create an XBRL-to-SQLite prerequisite asset for a specific FERC form.""" + + @dg.asset( + key=key, + group_name="raw_ferc_to_sqlite", + required_resource_keys={ + "etl_settings", + "datastore", + "runtime_settings", + "zenodo_dois", + }, + tags={"dataset": f"ferc{form.value}", "data_format": "xbrl"}, + ) + def _asset(context) -> dg.MaterializeResult[str]: + runtime_settings = context.resources.runtime_settings + settings = context.resources.etl_settings.get_xbrl_dataset_settings(form) + if settings is None or not settings.years: + logger.info( + f"Skipping dataset ferc{form.value}_xbrl: no config or no years configured." + ) + return dg.MaterializeResult( + value="skipped", + metadata={ + "pudl_ferc_sqlite_status": dg.MetadataValue.text("skipped"), + }, + ) + + output_path = PudlPaths().output_dir + sqlite_path = PudlPaths().sqlite_db_path(f"ferc{form.value}_xbrl") + if sqlite_path.exists(): + sqlite_path.unlink() + duckdb_path = PudlPaths().duckdb_db_path(f"ferc{form.value}_xbrl") + if duckdb_path.exists(): + duckdb_path.unlink() + + convert_form( + settings, + form, + FercXbrlDatastore(context.resources.datastore), + output_path=output_path, + sqlite_path=sqlite_path, + duckdb_path=duckdb_path, + batch_size=runtime_settings.xbrl_batch_size, + workers=runtime_settings.xbrl_num_workers, + loglevel=runtime_settings.xbrl_loglevel, + ) + return dg.MaterializeResult( + value="complete", + metadata=build_ferc_sqlite_provenance_metadata( + db_name=f"ferc{form.value}_xbrl", + etl_settings=context.resources.etl_settings, + zenodo_dois=context.resources.zenodo_dois, + sqlite_path=sqlite_path, + status="complete", + ), + ) + + return _asset + + +raw_ferc1_dbf__sqlite = dbf_to_sqlite_asset_factory( + key=dg.AssetKey("raw_ferc1_dbf__sqlite"), + dataset="ferc1", + extractor_class=Ferc1DbfExtractor, +) +raw_ferc2_dbf__sqlite = dbf_to_sqlite_asset_factory( + key=dg.AssetKey("raw_ferc2_dbf__sqlite"), + dataset="ferc2", + extractor_class=Ferc2DbfExtractor, +) +raw_ferc6_dbf__sqlite = dbf_to_sqlite_asset_factory( + key=dg.AssetKey("raw_ferc6_dbf__sqlite"), + dataset="ferc6", + extractor_class=Ferc6DbfExtractor, +) +raw_ferc60_dbf__sqlite = dbf_to_sqlite_asset_factory( + key=dg.AssetKey("raw_ferc60_dbf__sqlite"), + dataset="ferc60", + extractor_class=Ferc60DbfExtractor, +) + +raw_ferc1_xbrl__sqlite = xbrl_to_sqlite_asset_factory( + key=dg.AssetKey("raw_ferc1_xbrl__sqlite"), + form=XbrlFormNumber.FORM1, +) +raw_ferc2_xbrl__sqlite = xbrl_to_sqlite_asset_factory( + key=dg.AssetKey("raw_ferc2_xbrl__sqlite"), + form=XbrlFormNumber.FORM2, +) +raw_ferc6_xbrl__sqlite = xbrl_to_sqlite_asset_factory( + key=dg.AssetKey("raw_ferc6_xbrl__sqlite"), + form=XbrlFormNumber.FORM6, +) +raw_ferc60_xbrl__sqlite = xbrl_to_sqlite_asset_factory( + key=dg.AssetKey("raw_ferc60_xbrl__sqlite"), + form=XbrlFormNumber.FORM60, +) +raw_ferc714_xbrl__sqlite = xbrl_to_sqlite_asset_factory( + key=dg.AssetKey("raw_ferc714_xbrl__sqlite"), + form=XbrlFormNumber.FORM714, +) diff --git a/src/pudl/etl/glue_assets.py b/src/pudl/etl/glue_assets.py index 0835373a7f..7fb18f881d 100644 --- a/src/pudl/etl/glue_assets.py +++ b/src/pudl/etl/glue_assets.py @@ -24,7 +24,7 @@ if "core_epa__assn_eia_epacamd" not in table_name }, can_subset=True, - required_resource_keys={"datastore", "dataset_settings"}, + required_resource_keys={"datastore", "etl_settings"}, ) def create_glue_tables(context): """Extract, transform and load CSVs for the FERC-EIA Glue tables. @@ -40,7 +40,7 @@ def create_glue_tables(context): """ # TODO 2024-09-23: double check if these settings are actually # doing anything for the FERC-EIA glue... doesn't look like it. - dataset_settings = context.resources.dataset_settings + dataset_settings = context.resources.etl_settings.dataset_settings # grab the glue tables for ferc1 & eia glue_dfs = pudl.glue.ferc1_eia.glue( ferc1=dataset_settings.glue.ferc1, @@ -88,7 +88,7 @@ def raw_pudl__assn_eia_epacamd(context) -> pd.DataFrame: return pd.concat(year_matches, ignore_index=True) -@asset(required_resource_keys={"dataset_settings"}, io_manager_key="pudl_io_manager") +@asset(required_resource_keys={"etl_settings"}, io_manager_key="pudl_io_manager") def core_epa__assn_eia_epacamd( context, raw_pudl__assn_eia_epacamd: pd.DataFrame, @@ -192,7 +192,7 @@ def core_epa__assn_eia_epacamd( .dropna(subset=["plant_id_eia"]) .pipe(correct_epa_eia_plant_id_mapping) ) - dataset_settings = context.resources.dataset_settings + dataset_settings = context.resources.etl_settings.dataset_settings processing_all_eia_years = ( dataset_settings.eia.eia860.years == dataset_settings.eia.eia860.data_source.working_partitions["years"] diff --git a/src/pudl/etl/static_assets.py b/src/pudl/etl/static_assets.py index e6cf2579f7..d6a2f795ad 100644 --- a/src/pudl/etl/static_assets.py +++ b/src/pudl/etl/static_assets.py @@ -49,12 +49,12 @@ def _read_static_encoding_tables( for table_name in Package.get_etl_group_tables("static_pudl") }, can_subset=True, - required_resource_keys={"dataset_settings", "datastore"}, + required_resource_keys={"etl_settings", "datastore"}, ) def static_pudl_tables(context): """Read static tables compiled as part of PUDL and not from any agency dataset.""" ds = context.resources.datastore - dataset_settings = context.resources.dataset_settings + dataset_settings = context.resources.etl_settings.dataset_settings static_pudl_tables_dict = { "core_pudl__codes_subdivisions": POLITICAL_SUBDIVISIONS, diff --git a/src/pudl/extract/dbf.py b/src/pudl/extract/dbf.py index 41d80ba1a6..b962dae3c2 100644 --- a/src/pudl/extract/dbf.py +++ b/src/pudl/extract/dbf.py @@ -19,7 +19,7 @@ import pudl import pudl.logging_helpers from pudl.metadata.classes import DataSource -from pudl.settings import FercToSqliteSettings, GenericDatasetSettings +from pudl.settings import FercDbfToSqliteSettings, FercToSqliteSettings from pudl.workspace.datastore import Datastore from pudl.workspace.setup import PudlPaths @@ -46,7 +46,7 @@ def __init__(self, table_name: str): def add_column( self, col_name: str, - col_type: sa.types.TypeEngine, + col_type: type[sa.types.TypeEngine] | sa.types.TypeEngine, short_name: str | None = None, ): """Adds a new column to this table schema.""" @@ -56,7 +56,9 @@ def add_column( if short_name is not None: self._short_name_map[short_name] = col_name - def get_columns(self) -> Iterator[tuple[str, sa.types.TypeEngine]]: + def get_columns( + self, + ) -> Iterator[tuple[str, type[sa.types.TypeEngine] | sa.types.TypeEngine]]: """Iterates over the (column_name, column_type) pairs.""" for col_name in self._columns: yield (col_name, self._column_types[col_name]) @@ -94,7 +96,7 @@ def __init__( dbc_path: Path, table_file_map: dict[str, str], partition: dict[str, Any], - field_parser: FieldParser, + field_parser: type[FieldParser], ): """Constructs new instance of FercDbfArchive.""" self.zipfile = zipfile @@ -162,7 +164,7 @@ def get_table_schema(self, table_name: str) -> DbfTableSchema: dbf = self.get_table_dbf(table_name) dbf_fields = [field for field in dbf.fields if field.name != "_NullFlags"] if len(dbf_fields) != len(table_columns): - return ValueError( + raise ValueError( f"Number of DBF fields in {table_name} does not match what was " f"found in the DBC index file for {self.partition}." ) @@ -294,7 +296,7 @@ def __init__( self: Self, datastore: Datastore, dataset: str, - field_parser: FieldParser = FercFieldParser, + field_parser: type[FieldParser] = FercFieldParser, ): """Create a new instance of FercDbfReader. @@ -440,7 +442,7 @@ def __init__( settings: generic settings object for this extrctor. output_path: directory where the output databases should be stored. """ - self.settings: GenericDatasetSettings = self.get_settings(settings) + self.settings: FercDbfToSqliteSettings = self.get_settings(settings) self.output_path = output_path self.datastore = datastore self.dbf_reader = self.get_dbf_reader(datastore) @@ -450,9 +452,9 @@ def __init__( def get_settings( self, global_settings: FercToSqliteSettings - ) -> GenericDatasetSettings: + ) -> FercDbfToSqliteSettings: """Returns dataset relevant settings from the global_settings.""" - return NotImplemented( + raise NotImplementedError( "get_settings() needs to extract dataset specific settings." ) @@ -472,7 +474,7 @@ def get_dagster_op(cls) -> Callable: @op( name=f"{cls.DATASET}_dbf", required_resource_keys={ - "ferc_to_sqlite_settings", + "etl_settings", "datastore", "runtime_settings", }, @@ -482,7 +484,7 @@ def inner_method(context) -> None: """Instantiates dbf extractor and runs it.""" dbf_extractor = cls( datastore=context.resources.datastore, - settings=context.resources.ferc_to_sqlite_settings, + settings=context.resources.etl_settings.ferc_to_sqlite, output_path=PudlPaths().output_dir, ) dbf_extractor.execute() @@ -494,8 +496,8 @@ def execute(self): logger.info( f"Running dbf extraction for {self.DATASET} with settings: {self.settings}" ) - if self.settings.disabled: - logger.warning(f"Dataset {self.DATASET} extraction is disabled, skipping") + if not self.settings.years: + logger.warning(f"Dataset {self.DATASET} has no years configured, skipping") return self.delete_schema() diff --git a/src/pudl/extract/eia860.py b/src/pudl/extract/eia860.py index 33d47f1720..35baef481f 100644 --- a/src/pudl/extract/eia860.py +++ b/src/pudl/extract/eia860.py @@ -104,7 +104,7 @@ def get_dtypes(page, **partition): for table_name in sorted(RAW_EIA860_TABLE_NAMES) }, can_subset=True, - required_resource_keys={"datastore", "dataset_settings"}, + required_resource_keys={"datastore", "etl_settings"}, ) def extract_eia860(context, raw_eia860__all_dfs): """Extract raw EIA data from excel sheets into dataframes. @@ -115,7 +115,7 @@ def extract_eia860(context, raw_eia860__all_dfs): Returns: A tuple of extracted EIA dataframes. """ - eia_settings = context.resources.dataset_settings.eia + eia_settings = context.resources.etl_settings.dataset_settings.eia ds = context.resources.datastore selected_outputs = set(context.selected_output_names) diff --git a/src/pudl/extract/eia860m.py b/src/pudl/extract/eia860m.py index a6d703db5e..4b95c68d5a 100644 --- a/src/pudl/extract/eia860m.py +++ b/src/pudl/extract/eia860m.py @@ -92,10 +92,10 @@ def append_eia860m( return eia860_raw_dfs -@asset(required_resource_keys={"datastore", "dataset_settings"}) +@asset(required_resource_keys={"datastore", "etl_settings"}) def raw_eia860m__all_dfs(context): """Extract raw EIA 860M data from excel sheets into dict of dataframes.""" - eia_settings = context.resources.dataset_settings.eia + eia_settings = context.resources.etl_settings.dataset_settings.eia ds = context.resources.datastore eia860m_extractor = Extractor(ds=ds) diff --git a/src/pudl/extract/eia930.py b/src/pudl/extract/eia930.py index 12657280c2..7bd2ab8960 100644 --- a/src/pudl/extract/eia930.py +++ b/src/pudl/extract/eia930.py @@ -25,27 +25,27 @@ logger = pudl.logging_helpers.get_logger(__name__) -@asset(required_resource_keys={"datastore", "dataset_settings"}) +@asset(required_resource_keys={"datastore", "etl_settings"}) def raw_eia930__balance(context) -> ParquetData: """Raw balance page.""" return extract_page( datastore=context.resources.datastore, page="balance", - half_years=context.resources.dataset_settings.eia.eia930.half_years, + half_years=context.resources.etl_settings.dataset_settings.eia.eia930.half_years, ) -@asset(required_resource_keys={"datastore", "dataset_settings"}) +@asset(required_resource_keys={"datastore", "etl_settings"}) def raw_eia930__interchange(context) -> ParquetData: """Raw interchange page.""" return extract_page( datastore=context.resources.datastore, page="interchange", - half_years=context.resources.dataset_settings.eia.eia930.half_years, + half_years=context.resources.etl_settings.dataset_settings.eia.eia930.half_years, ) -@asset(required_resource_keys={"datastore", "dataset_settings"}) +@asset(required_resource_keys={"datastore", "etl_settings"}) def raw_eia930__subregion(context) -> ParquetData: """Raw subregion page - only exists after 2018h2.""" return extract_page( @@ -53,7 +53,7 @@ def raw_eia930__subregion(context) -> ParquetData: page="subregion", half_years=[ h - for h in context.resources.dataset_settings.eia.eia930.half_years + for h in context.resources.etl_settings.dataset_settings.eia.eia930.half_years if h >= "2018half2" ], ) diff --git a/src/pudl/extract/eiaaeo.py b/src/pudl/extract/eiaaeo.py index 66fc260a66..941805ad2b 100644 --- a/src/pudl/extract/eiaaeo.py +++ b/src/pudl/extract/eiaaeo.py @@ -421,7 +421,7 @@ def get_table(self, table_number: int, report_year: int) -> pd.DataFrame: "raw_eiaaeo__electric_power_projections_regional": AssetOut(is_required=False), }, can_subset=True, - required_resource_keys={"datastore", "dataset_settings"}, + required_resource_keys={"datastore", "etl_settings"}, ) def raw_eiaaeo(context: AssetExecutionContext): """Extract tables from EIA's Annual Energy Outlook. @@ -453,7 +453,7 @@ def raw_eiaaeo(context: AssetExecutionContext): ds = context.resources.datastore # Extract for all years specified in the settings - report_years = context.resources.dataset_settings.eia.eiaaeo.years + report_years = context.resources.etl_settings.dataset_settings.eia.eiaaeo.years selected = context.op_execution_context.selected_output_names for asset_name in selected: diff --git a/src/pudl/extract/extractor.py b/src/pudl/extract/extractor.py index 2dabde4287..676ec68ae3 100644 --- a/src/pudl/extract/extractor.py +++ b/src/pudl/extract/extractor.py @@ -387,7 +387,7 @@ def partitions_from_settings_factory(name: str) -> OpDefinition: @op( out=DynamicOut(), - required_resource_keys={"dataset_settings"}, + required_resource_keys={"etl_settings"}, name=f"{name}_partitions_from_settings", ) def partitions_from_settings(context) -> DynamicOutput: @@ -402,9 +402,9 @@ def partitions_from_settings(context) -> DynamicOutput: https://docs.dagster.io/_apidocs/dynamic#dagster.DynamicOut """ if "eia" in name: # Account for nested settings if EIA - partition_settings = context.resources.dataset_settings.eia + partition_settings = context.resources.etl_settings.dataset_settings.eia else: - partition_settings = context.resources.dataset_settings + partition_settings = context.resources.etl_settings.dataset_settings # Get year/year_quarter/half_year partition data_settings = getattr(partition_settings, name) # Get dataset settings diff --git a/src/pudl/extract/ferc1.py b/src/pudl/extract/ferc1.py index 8940c55547..4dcb7e4d02 100644 --- a/src/pudl/extract/ferc1.py +++ b/src/pudl/extract/ferc1.py @@ -68,7 +68,7 @@ import json from itertools import chain from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, TypedDict import pandas as pd import sqlalchemy as sa @@ -76,7 +76,6 @@ AssetKey, AssetSpec, asset, - build_init_resource_context, build_input_context, ) @@ -88,17 +87,35 @@ deduplicate_by_year, ) from pudl.io_managers import ( - FercDBFSQLiteIOManager, - FercXBRLSQLiteIOManager, + FercDbfSQLiteIOManager, + FercXbrlSQLiteIOManager, ferc1_dbf_sqlite_io_manager, ferc1_xbrl_sqlite_io_manager, ) -from pudl.settings import DatasetsSettings, FercToSqliteSettings, GenericDatasetSettings +from pudl.settings import ( + DatasetsSettings, + EtlSettings, + FercDbfToSqliteSettings, + FercToSqliteSettings, +) from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) -TABLE_NAME_MAP_FERC1: dict[str, dict[str, str]] = { +FERC1_DBF_SQLITE_ASSET_KEY = AssetKey("raw_ferc1_dbf__sqlite") +FERC1_XBRL_SQLITE_ASSET_KEY = AssetKey("raw_ferc1_xbrl__sqlite") + +RawFercTableName = str | list[str] + + +class RawTableMapping(TypedDict): + """Mapping between normalized PUDL table and raw DBF/XBRL table names.""" + + dbf: RawFercTableName + xbrl: RawFercTableName + + +TABLE_NAME_MAP_FERC1: dict[str, RawTableMapping] = { "core_ferc1__yearly_steam_plants_fuel_sched402": { "dbf": "f1_fuel", "xbrl": "steam_electric_generating_plant_statistics_large_plants_fuel_statistics_402", @@ -208,7 +225,7 @@ } """A mapping of PUDL DB table names to their XBRL and DBF source table names.""" -XBRL_META_ONLY_FERC1 = { +XBRL_META_ONLY_FERC1: dict[str, RawTableMapping] = { "nuclear_fuel_materials_ferc1": { "dbf": "f1_nuclear_fuel", "xbrl": "nuclear_fuel_materials_202", @@ -238,9 +255,12 @@ class Ferc1DbfExtractor(FercDbfExtractor): def get_settings( self, global_settings: FercToSqliteSettings - ) -> GenericDatasetSettings: + ) -> FercDbfToSqliteSettings: """Returns settings for FERC Form 1 DBF dataset.""" - return global_settings.ferc1_dbf_to_sqlite_settings + settings = global_settings.ferc1_dbf_to_sqlite_settings + if settings is None: + raise ValueError("ferc1_dbf_to_sqlite_settings must be configured") + return settings def finalize_schema(self, meta: sa.MetaData) -> sa.MetaData: """Modifies schema before it's written to sqlite database. @@ -290,7 +310,7 @@ def add_missing_respondents(self): # are identified in the PUDL_RIDS map, others are still unknown. records = [] for rid in missing_ids: - entry = {"respondent_id": rid} + entry: dict[str, int | str] = {"respondent_id": rid} known_name = self.PUDL_RIDS.get(rid, None) if known_name: entry["respondent_name"] = f"{known_name} (PUDL determined)" @@ -359,9 +379,10 @@ def create_raw_ferc1_assets() -> list[AssetSpec]: ) dbf_table_names = tuple(set(flattened_dbfs)) raw_ferc1_dbf_assets = [ - AssetSpec(key=AssetKey(f"raw_ferc1_dbf__{table_name}")).with_io_manager_key( - "ferc1_dbf_sqlite_io_manager" - ) + AssetSpec( + key=AssetKey(f"raw_ferc1_dbf__{table_name}"), + deps=[FERC1_DBF_SQLITE_ASSET_KEY], + ).with_io_manager_key("ferc1_dbf_sqlite_io_manager") for table_name in dbf_table_names ] @@ -375,9 +396,10 @@ def create_raw_ferc1_assets() -> list[AssetSpec]: ) xbrl_table_names = tuple(set(xbrls_with_periods)) raw_ferc1_xbrl_assets = [ - AssetSpec(key=AssetKey(f"raw_ferc1_xbrl__{table_name}")).with_io_manager_key( - "ferc1_xbrl_sqlite_io_manager" - ) + AssetSpec( + key=AssetKey(f"raw_ferc1_xbrl__{table_name}"), + deps=[FERC1_XBRL_SQLITE_ASSET_KEY], + ).with_io_manager_key("ferc1_xbrl_sqlite_io_manager") for table_name in xbrl_table_names ] return raw_ferc1_dbf_assets + raw_ferc1_xbrl_assets @@ -391,7 +413,7 @@ def create_raw_ferc1_assets() -> list[AssetSpec]: # asset name. -@asset +@asset(deps=[FERC1_XBRL_SQLITE_ASSET_KEY]) def raw_ferc1_xbrl__metadata_json( context, ) -> dict[str, dict[str, list[dict[str, Any]]]]: @@ -406,7 +428,10 @@ def raw_ferc1_xbrl__metadata_json( filings. If there is no instant/duration table, an empty list is returned instead. """ - metadata_path = PudlPaths().output_dir / "ferc1_xbrl_taxonomy_metadata.json" + metadata_path = ( + PudlPaths().output_dir # type: ignore[call-arg] + / "ferc1_xbrl_taxonomy_metadata.json" + ) with Path.open(metadata_path) as f: xbrl_meta_all = json.load(f) @@ -442,7 +467,7 @@ def squash_period(xbrl_table: str | list[str], period, xbrl_meta_all): # Ferc extraction functions for devtool notebook testing def extract_dbf_generic( table_names: list[str], - io_manager: FercDBFSQLiteIOManager, + io_manager: FercDbfSQLiteIOManager, dataset_settings: DatasetsSettings, ) -> pd.DataFrame: """Combine multiple raw dbf tables into one. @@ -461,7 +486,7 @@ def extract_dbf_generic( context = build_input_context( asset_key=AssetKey(table_name), upstream_output=None, - resources={"dataset_settings": dataset_settings}, + resources={"etl_settings": EtlSettings(datasets=dataset_settings)}, ) tables.append(io_manager.load_input(context)) return pd.concat(tables) @@ -469,7 +494,7 @@ def extract_dbf_generic( def extract_xbrl_generic( table_names: list[str], - io_manager: FercXBRLSQLiteIOManager, + io_manager: FercXbrlSQLiteIOManager, dataset_settings: DatasetsSettings, period: Literal["duration", "instant"], ) -> pd.DataFrame: @@ -491,7 +516,7 @@ def extract_xbrl_generic( context = build_input_context( asset_key=AssetKey(full_xbrl_table_name), upstream_output=None, - resources={"dataset_settings": dataset_settings}, + resources={"etl_settings": EtlSettings(datasets=dataset_settings)}, ) tables.append(io_manager.load_input(context)) return pd.concat(tables) @@ -514,10 +539,9 @@ def extract_dbf(dataset_settings: DatasetsSettings) -> dict[str, pd.DataFrame]: """ ferc1_dbf_raw_dfs = {} - io_manager_init_context = build_init_resource_context( - resources={"dataset_settings": dataset_settings} + io_manager = ferc1_dbf_sqlite_io_manager.model_copy( + update={"etl_settings": EtlSettings(datasets=dataset_settings)} ) - io_manager = ferc1_dbf_sqlite_io_manager(io_manager_init_context) for table_name, raw_table_mapping in TABLE_NAME_MAP_FERC1.items(): dbf_table_or_tables = raw_table_mapping["dbf"] @@ -550,10 +574,9 @@ def extract_xbrl( """ ferc1_xbrl_raw_dfs = {} - io_manager_init_context = build_init_resource_context( - resources={"dataset_settings": dataset_settings} + io_manager = ferc1_xbrl_sqlite_io_manager.model_copy( + update={"etl_settings": EtlSettings(datasets=dataset_settings)} ) - io_manager = ferc1_xbrl_sqlite_io_manager(io_manager_init_context) for table_name, raw_table_mapping in TABLE_NAME_MAP_FERC1.items(): xbrl_table_or_tables = raw_table_mapping["xbrl"] diff --git a/src/pudl/extract/ferc2.py b/src/pudl/extract/ferc2.py index 44ee47fd63..0ba380a625 100644 --- a/src/pudl/extract/ferc2.py +++ b/src/pudl/extract/ferc2.py @@ -22,7 +22,7 @@ add_key_constraints, deduplicate_by_year, ) -from pudl.settings import FercToSqliteSettings, GenericDatasetSettings +from pudl.settings import FercDbfToSqliteSettings, FercToSqliteSettings logger = pudl.logging_helpers.get_logger(__name__) @@ -35,8 +35,8 @@ class Ferc2DbfExtractor(FercDbfExtractor): def get_settings( self: Self, global_settings: FercToSqliteSettings - ) -> GenericDatasetSettings: - """Returns settings for FERC Form 1 DBF dataset.""" + ) -> FercDbfToSqliteSettings: + """Returns settings for FERC Form 2 DBF dataset.""" return global_settings.ferc2_dbf_to_sqlite_settings def finalize_schema(self: Self, meta: sa.MetaData) -> sa.MetaData: diff --git a/src/pudl/extract/ferc6.py b/src/pudl/extract/ferc6.py index f1fa81bda3..a08a522bf7 100644 --- a/src/pudl/extract/ferc6.py +++ b/src/pudl/extract/ferc6.py @@ -9,7 +9,7 @@ add_key_constraints, deduplicate_by_year, ) -from pudl.settings import FercToSqliteSettings, GenericDatasetSettings +from pudl.settings import FercDbfToSqliteSettings, FercToSqliteSettings class Ferc6DbfExtractor(FercDbfExtractor): @@ -20,7 +20,7 @@ class Ferc6DbfExtractor(FercDbfExtractor): def get_settings( self, global_settings: FercToSqliteSettings - ) -> GenericDatasetSettings: + ) -> FercDbfToSqliteSettings: """Returns settings for FERC Form 6 DBF dataset.""" return global_settings.ferc6_dbf_to_sqlite_settings diff --git a/src/pudl/extract/ferc60.py b/src/pudl/extract/ferc60.py index 910ca85aa5..51b733bdf7 100644 --- a/src/pudl/extract/ferc60.py +++ b/src/pudl/extract/ferc60.py @@ -9,7 +9,7 @@ add_key_constraints, deduplicate_by_year, ) -from pudl.settings import FercToSqliteSettings, GenericDatasetSettings +from pudl.settings import FercDbfToSqliteSettings, FercToSqliteSettings class Ferc60DbfExtractor(FercDbfExtractor): @@ -20,7 +20,7 @@ class Ferc60DbfExtractor(FercDbfExtractor): def get_settings( self, global_settings: FercToSqliteSettings - ) -> GenericDatasetSettings: + ) -> FercDbfToSqliteSettings: """Returns settings for FERC Form 60 DBF dataset.""" return global_settings.ferc60_dbf_to_sqlite_settings diff --git a/src/pudl/extract/ferc714.py b/src/pudl/extract/ferc714.py index 02cca8aa14..5891d41120 100644 --- a/src/pudl/extract/ferc714.py +++ b/src/pudl/extract/ferc714.py @@ -14,6 +14,8 @@ logger = pudl.logging_helpers.get_logger(__name__) +FERC714_XBRL_SQLITE_ASSET_KEY = AssetKey("raw_ferc714_xbrl__sqlite") + FERC714_CSV_ENCODING: OrderedDict[str, dict[str, str]] = OrderedDict( { "yearly_id_certification": { @@ -93,7 +95,7 @@ def raw_ferc714_csv_asset_factory(table_name: str) -> AssetsDefinition: @asset( name=f"raw_ferc714_csv__{table_name}", - required_resource_keys={"datastore", "dataset_settings"}, + required_resource_keys={"datastore", "etl_settings"}, compute_kind="pandas", ) def _extract_raw_ferc714_csv(context): @@ -103,7 +105,7 @@ def _extract_raw_ferc714_csv(context): context: dagster keyword that provides access to resources and config. """ ds = context.resources.datastore - ferc714_settings = context.resources.dataset_settings.ferc714 + ferc714_settings = context.resources.etl_settings.dataset_settings.ferc714 years = ", ".join(map(str, ferc714_settings.csv_years)) logger.info( @@ -124,7 +126,7 @@ def _extract_raw_ferc714_csv(context): return _extract_raw_ferc714_csv -@asset +@asset(deps=[FERC714_XBRL_SQLITE_ASSET_KEY]) def raw_ferc714_xbrl__metadata_json( context, ) -> dict[str, dict[str, list[dict[str, Any]]]]: @@ -191,9 +193,10 @@ def create_raw_ferc714_xbrl_assets() -> list[AssetSpec]: ) xbrl_table_names = tuple(set(xbrls_with_periods)) raw_ferc714_xbrl_assets = [ - AssetSpec(key=AssetKey(f"raw_ferc714_xbrl__{table_name}")).with_io_manager_key( - "ferc714_xbrl_sqlite_io_manager" - ) + AssetSpec( + key=AssetKey(f"raw_ferc714_xbrl__{table_name}"), + deps=[FERC714_XBRL_SQLITE_ASSET_KEY], + ).with_io_manager_key("ferc714_xbrl_sqlite_io_manager") for table_name in xbrl_table_names ] return raw_ferc714_xbrl_assets diff --git a/src/pudl/extract/gridpathratoolkit.py b/src/pudl/extract/gridpathratoolkit.py index a178cad6fa..cac596c286 100644 --- a/src/pudl/extract/gridpathratoolkit.py +++ b/src/pudl/extract/gridpathratoolkit.py @@ -62,7 +62,7 @@ def raw_gridpathratoolkit_asset_factory(part: str) -> AssetsDefinition: """ asset_kwargs = { "name": f"raw_gridpathratoolkit__{part}", - "required_resource_keys": {"datastore", "dataset_settings"}, + "required_resource_keys": {"datastore", "etl_settings"}, "compute_kind": "Python", } if part == "aggregated_extended_solar_capacity": @@ -75,7 +75,9 @@ def _extract(context): Args: context: dagster keyword that provides access to resources and config. """ - gpratk_settings = context.resources.dataset_settings.gridpathratoolkit + gpratk_settings = ( + context.resources.etl_settings.dataset_settings.gridpathratoolkit + ) ds = context.resources.datastore csv_parts = [ "daily_weather", diff --git a/src/pudl/extract/sec10k.py b/src/pudl/extract/sec10k.py index 8d656e5e71..f19c297726 100644 --- a/src/pudl/extract/sec10k.py +++ b/src/pudl/extract/sec10k.py @@ -54,10 +54,12 @@ def raw_sec10k_asset_factory(table) -> dg.AssetsDefinition: @dg.asset( group_name="raw_sec10k", name=table, - required_resource_keys={"datastore", "dataset_settings"}, + required_resource_keys={"datastore", "etl_settings"}, ) def sec10k_asset(context) -> pd.DataFrame: - sec10k_settings: Sec10kSettings = context.resources.dataset_settings.sec10k + sec10k_settings: Sec10kSettings = ( + context.resources.etl_settings.dataset_settings.sec10k + ) ds: Datastore = context.resources.datastore if table in sec10k_settings.tables: return extract(ds=ds, table=table, years=sec10k_settings.years) diff --git a/src/pudl/extract/vcerare.py b/src/pudl/extract/vcerare.py index f4708e9a6c..c6f9bd45e0 100644 --- a/src/pudl/extract/vcerare.py +++ b/src/pudl/extract/vcerare.py @@ -57,7 +57,7 @@ def _clean_column_names( outs={table_name: AssetOut() for table_name in VCERARE_PAGES.values()}, required_resource_keys={ "datastore", - "dataset_settings", + "etl_settings", }, ) def extract_vcerare( @@ -67,7 +67,7 @@ def extract_vcerare( extracted_tables = defaultdict(dict) # Loop through all years in settings and extract - for year in context.resources.dataset_settings.vcerare.years: + for year in context.resources.etl_settings.dataset_settings.vcerare.years: partitions = {"year": year} # Extract each raw table, clean column names, then offload to parquet @@ -94,7 +94,7 @@ def extract_vcerare( return tuple(extracted_tables.values()) -@asset(required_resource_keys={"datastore", "dataset_settings"}) +@asset(required_resource_keys={"datastore", "etl_settings"}) def raw_vcerare__lat_lon_fips(context) -> pd.DataFrame: """Extract lat/lon to FIPS and county mapping CSV. @@ -102,7 +102,7 @@ def raw_vcerare__lat_lon_fips(context) -> pd.DataFrame: its extraction is controlled by a boolean in the ETL run. """ ds = context.resources.datastore - partition_settings = context.resources.dataset_settings.vcerare + partition_settings = context.resources.etl_settings.dataset_settings.vcerare if partition_settings.fips: return pd.read_csv( BytesIO(ds.get_unique_resource("vcerare", fips=partition_settings.fips)) diff --git a/src/pudl/extract/xbrl.py b/src/pudl/extract/xbrl.py index 75caff5708..c6ffcad648 100644 --- a/src/pudl/extract/xbrl.py +++ b/src/pudl/extract/xbrl.py @@ -1,7 +1,11 @@ """Generic extractor for all FERC XBRL data.""" import io +import logging +import re +import sys from collections.abc import Callable +from contextlib import contextmanager from pathlib import Path from dagster import op @@ -16,6 +20,50 @@ logger = pudl.logging_helpers.get_logger(__name__) +class _FilteringStream: + """Pass-through text stream that drops matching noisy lines.""" + + def __init__(self, wrapped, drop_patterns: list[re.Pattern[str]]): + self._wrapped = wrapped + self._drop_patterns = drop_patterns + self._dropped_previous_line = False + + def write(self, text: str) -> int: + for line in text.splitlines(keepends=True): + stripped = line.rstrip("\r\n") + if stripped and any(p.search(stripped) for p in self._drop_patterns): + self._dropped_previous_line = True + continue + + # Arelle occasionally emits a blank line right after spam lines. + if not stripped and self._dropped_previous_line: + continue + + self._wrapped.write(line) + self._dropped_previous_line = False + return len(text) + + def flush(self) -> None: + self._wrapped.flush() + + +@contextmanager +def _suppress_arelle_message_spam(): + """Filter known Arelle console spam without suppressing normal logs.""" + drop_patterns = [ + re.compile(r"^Message:\s+Try\s+#\d+"), + re.compile(r"^Message log error:\s+Formatting field not found in record:"), + ] + old_stdout, old_stderr = sys.stdout, sys.stderr + sys.stdout = _FilteringStream(old_stdout, drop_patterns) + sys.stderr = _FilteringStream(old_stderr, drop_patterns) + try: + yield + finally: + sys.stdout = old_stdout + sys.stderr = old_stderr + + class FercXbrlDatastore: """Simple datastore wrapper for accessing ferc1 xbrl resources.""" @@ -23,7 +71,7 @@ def __init__(self, datastore: Datastore): """Instantiate datastore wrapper for ferc1 resources.""" self.datastore = datastore - def get_taxonomy(self, form: XbrlFormNumber) -> tuple[io.BytesIO, str]: + def get_taxonomy(self, form: XbrlFormNumber) -> io.BytesIO: """Returns the path to the taxonomy entry point within the an archive.""" raw_archive = self.datastore.get_unique_resource( f"ferc{form.value}", @@ -47,7 +95,7 @@ def xbrl2sqlite_op_factory(form: XbrlFormNumber) -> Callable: @op( name=f"ferc{form.value}_xbrl", required_resource_keys={ - "ferc_to_sqlite_settings", + "etl_settings", "datastore", "runtime_settings", }, @@ -56,15 +104,13 @@ def xbrl2sqlite_op_factory(form: XbrlFormNumber) -> Callable: def inner_op(context) -> None: output_path = PudlPaths().output_dir rs: RuntimeSettings = context.resources.runtime_settings - settings = context.resources.ferc_to_sqlite_settings.get_xbrl_dataset_settings( - form - ) + settings = context.resources.etl_settings.get_xbrl_dataset_settings(form) datastore = FercXbrlDatastore(context.resources.datastore) logger.info(f"====== xbrl2sqlite runtime_settings: {rs}") - if settings is None or settings.disabled: + if settings is None or not settings.years: logger.info( - f"Skipping dataset ferc{form.value}_xbrl: no config or is disabled." + f"Skipping dataset ferc{form.value}_xbrl: no config or no years configured." ) return @@ -84,6 +130,7 @@ def inner_op(context) -> None: duckdb_path=duckdb_path, batch_size=rs.xbrl_batch_size, workers=rs.xbrl_num_workers, + loglevel=rs.xbrl_loglevel, ) return inner_op @@ -98,6 +145,7 @@ def convert_form( duckdb_path: Path, batch_size: int | None = None, workers: int | None = None, + loglevel: str = "INFO", ) -> None: """Clone a single FERC XBRL form to SQLite. @@ -113,8 +161,8 @@ def convert_form( Returns: None """ - datapackage_path = str(output_path / f"ferc{form.value}_xbrl_datapackage.json") - metadata_path = str(output_path / f"ferc{form.value}_xbrl_taxonomy_metadata.json") + datapackage_path = output_path / f"ferc{form.value}_xbrl_datapackage.json" + metadata_path = output_path / f"ferc{form.value}_xbrl_taxonomy_metadata.json" taxonomy_archive = datastore.get_taxonomy(form) # Process XBRL filings for each year requested @@ -123,16 +171,21 @@ def convert_form( ] # if we set clobber=True, clobbers on *every* call to run_main; # we already delete the existing base on `clobber=True` in `xbrl2sqlite` - run_main( - filings=filings_archives, - sqlite_path=sqlite_path, - duckdb_path=duckdb_path, - taxonomy=taxonomy_archive, - form_number=form.value, - metadata_path=metadata_path, - datapackage_path=datapackage_path, - workers=workers, - batch_size=batch_size, - loglevel="INFO", - logfile=None, - ) + # Arelle can emit very verbose internals; keep its logger at ERROR unless + # troubleshooting parser internals. + logging.getLogger("arelle").setLevel(logging.ERROR) + + with _suppress_arelle_message_spam(): + run_main( + filings=filings_archives, + sqlite_path=sqlite_path, + duckdb_path=duckdb_path, + taxonomy=taxonomy_archive, + form_number=form.value, + metadata_path=metadata_path, + datapackage_path=datapackage_path, + workers=workers, + batch_size=batch_size, + loglevel=loglevel, + logfile=None, + ) diff --git a/src/pudl/ferc_sqlite_provenance.py b/src/pudl/ferc_sqlite_provenance.py new file mode 100644 index 0000000000..dcd9fe2bb3 --- /dev/null +++ b/src/pudl/ferc_sqlite_provenance.py @@ -0,0 +1,202 @@ +"""Helpers for recording FERC SQLite provenance and checking compatibility. + +Compatibility between a persisted FERC SQLite prerequisite and a downstream +PUDL run is determined by two independent criteria: do the Zenodo DOIs match, +and does the FERC SQLite DB contain all the years needed by the downstream run? +""" + +import json +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import dagster as dg + +from pudl.settings import EtlSettings +from pudl.workspace.datastore import ZenodoDoiSettings + +PROVENANCE_METADATA_DATASET = "pudl_ferc_sqlite_dataset" +PROVENANCE_METADATA_STATUS = "pudl_ferc_sqlite_status" +PROVENANCE_METADATA_ZENODO_DOI = "pudl_ferc_sqlite_zenodo_doi" +PROVENANCE_METADATA_SETTINGS = "pudl_ferc_sqlite_etl_settings" +PROVENANCE_METADATA_YEARS = "pudl_ferc_sqlite_years" +PROVENANCE_METADATA_SQLITE_PATH = "pudl_ferc_sqlite_path" + + +@dataclass(frozen=True) +class FercSQLiteProvenance: + """Current provenance expectations for a FERC SQLite prerequisite asset.""" + + asset_key: dg.AssetKey + dataset: str + data_format: str + zenodo_doi: str + years: list[int] + + +def _get_dataset_and_format(db_name: str) -> tuple[str, str]: + match: re.Match[str] | None = re.search(r"ferc\d+", db_name) + if match is None: + raise ValueError(f"Could not determine FERC dataset from db_name={db_name!r}.") + + dataset: str = match.group() + if db_name.endswith("_dbf"): + return dataset, "dbf" + if db_name.endswith("_xbrl"): + return dataset, "xbrl" + + raise ValueError( + f"Could not determine FERC sqlite format from db_name={db_name!r}." + ) + + +def get_ferc_sqlite_provenance( + *, + db_name: str, + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, +) -> "FercSQLiteProvenance": + """Build the expected provenance fingerprint for a FERC SQLite database.""" + dataset, data_format = _get_dataset_and_format(db_name) + settings_attr = f"{dataset}_{data_format}_to_sqlite_settings" + settings = getattr(etl_settings.ferc_to_sqlite, settings_attr) + if settings is None: + raise ValueError(f"Missing {settings_attr} in ETL settings.") + + return FercSQLiteProvenance( + asset_key=dg.AssetKey(f"raw_{db_name}__sqlite"), + dataset=dataset, + data_format=data_format, + zenodo_doi=str(getattr(zenodo_dois, dataset)), + years=sorted(settings.years), + ) + + +def build_ferc_sqlite_provenance_metadata( + *, + db_name: str, + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, + sqlite_path: Path | None, + status: str, +) -> dict[str, Any]: + """Build materialization metadata for a FERC SQLite prerequisite asset.""" + provenance: FercSQLiteProvenance = get_ferc_sqlite_provenance( + db_name=db_name, + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ) + + # Serialize the full settings for debugging / audit; not used in compatibility checks. + dataset, data_format = _get_dataset_and_format(db_name) + settings_attr = f"{dataset}_{data_format}_to_sqlite_settings" + settings_obj = getattr(etl_settings.ferc_to_sqlite, settings_attr) + settings_json = json.loads( + json.dumps(settings_obj.model_dump(mode="json"), sort_keys=True) + ) + + metadata: dict[str, Any] = { + PROVENANCE_METADATA_DATASET: dg.MetadataValue.text(provenance.dataset), + PROVENANCE_METADATA_STATUS: dg.MetadataValue.text(status), + PROVENANCE_METADATA_ZENODO_DOI: dg.MetadataValue.text(provenance.zenodo_doi), + PROVENANCE_METADATA_SETTINGS: dg.MetadataValue.json(settings_json), + PROVENANCE_METADATA_YEARS: dg.MetadataValue.json(provenance.years), + } + if sqlite_path is not None: + metadata[PROVENANCE_METADATA_SQLITE_PATH] = dg.MetadataValue.path( + str(sqlite_path) + ) + + return metadata + + +def _unwrap_metadata_value(value: Any) -> Any: + return value.value if hasattr(value, "value") else value + + +def assert_ferc_sqlite_compatible( + *, + instance: Any | None, + db_name: str, + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, +) -> None: + """Ensure a persisted FERC SQLite prerequisite is compatible with this run. + + Compatibility requires two conditions to hold: + + 1. The Zenodo DOI recorded when the FERC SQLite DB was built must match the + current :class:`~pudl.workspace.datastore.ZenodoDoiSettings`. A mismatch + means the raw archive has changed version and the DB must be rebuilt. + + 2. The years stored in the FERC SQLite DB must be a *superset* of the years + needed by the current downstream settings. This allows a "full" FERC SQLite + DB to serve a "fast" downstream run without an expensive rebuild. + """ + if instance is None: + return + + provenance: FercSQLiteProvenance = get_ferc_sqlite_provenance( + db_name=db_name, + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ) + event = instance.get_latest_materialization_event(provenance.asset_key) + materialization = None if event is None else event.asset_materialization + if materialization is None: + raise RuntimeError( + "No Dagster provenance metadata is available for " + f"{provenance.asset_key.to_user_string()}. Refresh the FERC SQLite assets." + ) + + metadata: dict[str, Any] = { + key: _unwrap_metadata_value(value) + for key, value in (materialization.metadata or {}).items() + } + required_keys: list[str] = [ + PROVENANCE_METADATA_STATUS, + PROVENANCE_METADATA_ZENODO_DOI, + PROVENANCE_METADATA_YEARS, + ] + missing_keys: list[str] = [key for key in required_keys if key not in metadata] + if missing_keys: + missing_keys_str: str = ", ".join(sorted(missing_keys)) + raise RuntimeError( + f"Stored provenance metadata for {provenance.asset_key.to_user_string()} is " + f"missing {missing_keys_str}. Refresh the FERC SQLite assets." + ) + + if metadata[PROVENANCE_METADATA_STATUS] != "complete": + raise RuntimeError( + f"Stored provenance metadata for {provenance.asset_key.to_user_string()} has " + f"status={metadata[PROVENANCE_METADATA_STATUS]!r}. " + "Refresh the FERC SQLite assets." + ) + + mismatches: list[str] = [] + if metadata[PROVENANCE_METADATA_ZENODO_DOI] != provenance.zenodo_doi: + mismatches.append( + "Zenodo DOI mismatch: " + f"stored={metadata[PROVENANCE_METADATA_ZENODO_DOI]!r}, " + f"expected={provenance.zenodo_doi!r}" + ) + + stored_years: set[int] = set(metadata[PROVENANCE_METADATA_YEARS]) + required_years: set[int] = set(provenance.years) + missing_years: set[int] = required_years - stored_years + if missing_years: + mismatches.append( + "FERC SQLite DB is missing required years: " + f"missing={sorted(missing_years)}, " + f"stored={sorted(stored_years)}, " + f"required={sorted(required_years)}" + ) + + if mismatches: + mismatch_summary: str = "; ".join(mismatches) + raise RuntimeError( + f"Stored prerequisite asset {provenance.asset_key.to_user_string()} is not " + f"compatible with the current run configuration. {mismatch_summary}. " + "Refresh the FERC SQLite assets." + ) diff --git a/src/pudl/ferc_to_sqlite/__init__.py b/src/pudl/ferc_to_sqlite/__init__.py deleted file mode 100644 index 615c7c80e2..0000000000 --- a/src/pudl/ferc_to_sqlite/__init__.py +++ /dev/null @@ -1,71 +0,0 @@ -"""Dagster definitions for the FERC to SQLite process.""" - -import importlib.resources - -from dagster import Definitions, graph - -import pudl -from pudl.extract.ferc import ALL_DBF_EXTRACTORS -from pudl.extract.xbrl import xbrl2sqlite_op_factory -from pudl.resources import RuntimeSettings, datastore, ferc_to_sqlite_settings -from pudl.settings import EtlSettings, XbrlFormNumber - -logger = pudl.logging_helpers.get_logger(__name__) - - -@graph -def ferc_to_sqlite(): - """Clone the FERC FoxPro databases and XBRL filings into SQLite.""" - for extractor in ALL_DBF_EXTRACTORS: - extractor.get_dagster_op()() - for form in XbrlFormNumber: - xbrl2sqlite_op_factory(form)() - - -default_resources_defs = { - "ferc_to_sqlite_settings": ferc_to_sqlite_settings, - "runtime_settings": RuntimeSettings(), - "datastore": datastore, -} - -ferc_to_sqlite_full_settings = EtlSettings.from_yaml( - importlib.resources.files("pudl.package_data.settings") / "etl_full.yml" -).ferc_to_sqlite_settings - -ferc_to_sqlite_full = ferc_to_sqlite.to_job( - resource_defs=default_resources_defs, - name="ferc_to_sqlite_full", - config={ - "resources": { - "ferc_to_sqlite_settings": { - "config": ferc_to_sqlite_full_settings.model_dump(), - }, - "runtime_settings": { - "config": {}, - }, - }, - }, -) - -ferc_to_sqlite_fast_settings = EtlSettings.from_yaml( - importlib.resources.files("pudl.package_data.settings") / "etl_fast.yml" -).ferc_to_sqlite_settings - -ferc_to_sqlite_fast = ferc_to_sqlite.to_job( - resource_defs=default_resources_defs, - name="ferc_to_sqlite_fast", - config={ - "resources": { - "ferc_to_sqlite_settings": { - "config": ferc_to_sqlite_fast_settings.model_dump(), - }, - "runtime_settings": { - "config": {}, - }, - }, - }, -) - -defs: Definitions = Definitions(jobs=[ferc_to_sqlite_full, ferc_to_sqlite_fast]) -"""A collection of dagster assets, resources, IO managers, and jobs for the FERC to -SQLite ETL.""" diff --git a/src/pudl/ferc_to_sqlite/cli.py b/src/pudl/ferc_to_sqlite/cli.py deleted file mode 100644 index 6f2aed505c..0000000000 --- a/src/pudl/ferc_to_sqlite/cli.py +++ /dev/null @@ -1,205 +0,0 @@ -"""A script using Dagster to convert FERC data from DBF and XBRL to SQLite databases.""" - -import pathlib -import sys -import time -from collections.abc import Callable - -import click -from dagster import ( - DagsterInstance, - JobDefinition, - build_reconstructable_job, - execute_job, -) - -import pudl -from pudl import ferc_to_sqlite -from pudl.helpers import get_dagster_execution_config -from pudl.settings import EtlSettings - -# Create a logger to output any messages we might have... -logger = pudl.logging_helpers.get_logger(__name__) - - -def ferc_to_sqlite_job_factory( - logfile: str | None = None, - loglevel: str = "INFO", - dataset_only: str | None = None, -) -> Callable[[], JobDefinition]: - """Factory for parameterizing a reconstructable ferc_to_sqlite job. - - Args: - logfile: Path to a log file for the job's execution. - loglevel: The log level for the job's execution. - - Returns: - The job definition to be executed. - """ - - def get_ferc_to_sqlite_job(): - """Module level func for creating a job to be wrapped by reconstructable.""" - ferc_to_sqlite_graph = ferc_to_sqlite.ferc_to_sqlite - op_selection = None - if dataset_only is not None: - logger.warning(f"Running ferc_to_sqlite restricted to {dataset_only}") - op_selection = [dataset_only] - return ferc_to_sqlite_graph.to_job( - resource_defs=ferc_to_sqlite.default_resources_defs, - name="ferc_to_sqlite_job", - op_selection=op_selection, - ) - - return get_ferc_to_sqlite_job - - -@click.command( - name="ferc_to_sqlite", - context_settings={"help_option_names": ["-h", "--help"]}, -) -@click.argument( - "etl_settings_yml", - type=click.Path( - exists=True, - dir_okay=False, - resolve_path=True, - path_type=pathlib.Path, - ), -) -@click.option( - "-b", - "--batch-size", - type=int, - default=50, - help="Number of XBRL instances to be processed at a time.", -) -@click.option( - "-w", - "--workers", - type=int, - default=None, - help=( - "Number of worker processes to use when parsing XBRL filings. " - "Defaults to using the number of CPUs." - ), -) -@click.option( - "--dagster-workers", - type=int, - default=0, - help=( - "Set the max number of processes that dagster can launch. " - "If set to 1, in-process serial executor will be used. If set to 0, " - "dagster will saturate available CPUs (this is the default)." - ), -) -@click.option( - "--cloud-cache-path", - type=str, - default="s3://pudl.catalyst.coop/zenodo", - help=( - "Load cached inputs from cloud object storage (S3 or GCS) . This is typically " - "much faster and more reliable than downloading from Zenodo directly. By " - "default we read from the cache in PUDL's free, public AWS Open Data Registry " - "bucket." - ), -) -@click.option( - "--logfile", - type=click.Path( - exists=False, - resolve_path=True, - path_type=pathlib.Path, - ), - help="If specified, write logs to this file.", -) -@click.option( - "--loglevel", - type=click.Choice( - ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False - ), - default="INFO", -) -@click.option( - "--dataset-only", - type=str, - help=( - "If specified, restricts processing to only a given dataset. This is" - "expected to be in the form of ferc1_dbf, ferc1_xbrl. " - "This is intended for ci-integration purposes where we fan-out the " - "execution into several parallel small jobs that should finish faster. " - "Other operations are still going to be invoked, but they will terminate " - "early if this setting is in use." - ), -) -def main( - etl_settings_yml: pathlib.Path, - batch_size: int, - workers: int | None, - dagster_workers: int, - cloud_cache_path: str, - logfile: pathlib.Path, - loglevel: str, - dataset_only: str, -): - """Use Dagster to convert FERC data from DBF and XBRL to SQLite databases. - - Reads settings specifying which forms and years to convert from ETL_SETTINGS_YML. - - Also produces JSON versions of XBRL taxonomies and datapackage descriptors which - annotate the XBRL derived SQLite databases. - """ - # Display logged output from the PUDL package: - pudl.logging_helpers.configure_root_logger(logfile=logfile, loglevel=loglevel) - - etl_settings = EtlSettings.from_yaml(etl_settings_yml) - - ferc_to_sqlite_reconstructable_job = build_reconstructable_job( - "pudl.ferc_to_sqlite.cli", - "ferc_to_sqlite_job_factory", - reconstructable_kwargs={ - "loglevel": loglevel, - "logfile": logfile, - "dataset_only": dataset_only, - }, - ) - - run_config = { - "resources": { - "ferc_to_sqlite_settings": { - "config": etl_settings.ferc_to_sqlite_settings.model_dump() - }, - "datastore": { - "config": { - "cloud_cache_path": cloud_cache_path, - }, - }, - "runtime_settings": { - "config": { - "xbrl_num_workers": workers, - "xbrl_batch_size": batch_size, - }, - }, - }, - } - run_config.update(get_dagster_execution_config(dagster_workers)) - - start_time = time.time() - result = execute_job( - ferc_to_sqlite_reconstructable_job, - instance=DagsterInstance.get(), - run_config=run_config, - raise_on_error=True, - ) - end_time = time.time() - logger.info(f"FERC to SQLite job completed in {end_time - start_time} seconds.") - - # Workaround to reliably getting full stack trace - if not result.success: - for event in result.all_events: - if event.event_type_value == "STEP_FAILURE": - raise Exception(event.event_specific_data.error) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/src/pudl/glue/ferc1_eia.py b/src/pudl/glue/ferc1_eia.py index c86313293e..7aa4d96191 100644 --- a/src/pudl/glue/ferc1_eia.py +++ b/src/pudl/glue/ferc1_eia.py @@ -40,7 +40,7 @@ from pudl.io_managers import ferc1_dbf_sqlite_io_manager, ferc1_xbrl_sqlite_io_manager from pudl.metadata.classes import Package from pudl.metadata.fields import apply_pudl_dtypes -from pudl.resources import dataset_settings +from pudl.resources import etl_settings, zenodo_dois from pudl.transform.classes import StringNormalization, normalize_strings_multicol from pudl.transform.ferc1 import ( Ferc1AbstractTableTransformer, @@ -336,7 +336,8 @@ def plants_ferc1_raw(**transformed_plant_tables): resources={ "ferc1_dbf_sqlite_io_manager": ferc1_dbf_sqlite_io_manager, "ferc1_xbrl_sqlite_io_manager": ferc1_xbrl_sqlite_io_manager, - "dataset_settings": dataset_settings, + "etl_settings": etl_settings, + "zenodo_dois": zenodo_dois, }, jobs=[define_asset_job(name="get_plants_ferc1_raw")], ).get_job_def("get_plants_ferc1_raw") diff --git a/src/pudl/helpers.py b/src/pudl/helpers.py index 3b7cfdc2fa..8deda8c91d 100644 --- a/src/pudl/helpers.py +++ b/src/pudl/helpers.py @@ -2022,55 +2022,6 @@ def scale_by_ownership( return gens -def get_dagster_execution_config( - num_workers: int = 0, tag_concurrency_limits: list[dict] = [] -): - """Get the dagster execution config for a given number of workers. - - If num_workers is 0, then the dagster execution config will not include - any limits. With num_workers set to 1, we will use in-process serial - executor, otherwise multi-process executor with maximum of num_workers - will be used. - - Args: - num_workers: The number of workers to use for the dagster execution config. - If 0, then the dagster execution config will not include a multiprocess - executor. - tag_concurrency_limits: A set of limits that are applied to steps with - particular tags. This is helpful for applying concurrency limits to - highly concurrent and memory intensive portions of the ETL like CEMS. - - Dagster description: If a value is set, the limit is applied to - only that key-value pair. If no value is set, the limit is applied - across all values of that key. If the value is set to a dict with - ``applyLimitPerUniqueValue: true``, the limit will apply to the - number of unique values for that key. Note that these limits are - per run, not global. - - Returns: - A dagster execution config. - """ - if num_workers == 1: - return { - "execution": { - "config": { - "in_process": {}, - }, - }, - } - - return { - "execution": { - "config": { - "multiprocess": { - "max_concurrent": num_workers, - "tag_concurrency_limits": tag_concurrency_limits, - }, - }, - }, - } - - def assert_cols_areclose( df: pd.DataFrame, a_cols: list[str], diff --git a/src/pudl/io_managers.py b/src/pudl/io_managers.py index 8829e0af0e..512d76a04d 100644 --- a/src/pudl/io_managers.py +++ b/src/pudl/io_managers.py @@ -2,9 +2,11 @@ import json import re +from functools import cached_property from pathlib import Path from sqlite3 import sqlite_version +import dagster as dg import geopandas as gpd # noqa: ICN002 import pandas as pd import polars as pl @@ -14,7 +16,8 @@ from alembic.autogenerate.api import compare_metadata from alembic.migration import MigrationContext from dagster import ( - Field, + ConfigurableIOManager, + DagsterInvariantViolationError, InitResourceContext, InputContext, IOManager, @@ -22,10 +25,18 @@ io_manager, ) from packaging import version +from pydantic import model_validator import pudl +from pudl.ferc_sqlite_provenance import assert_ferc_sqlite_compatible from pudl.helpers import get_parquet_table, get_parquet_table_polars from pudl.metadata.classes import PUDL_PACKAGE, Package, Resource +from pudl.resources import ( + PudlEtlSettingsResource, + ZenodoDoiSettingsResource, + etl_settings, + zenodo_dois, +) from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -33,7 +44,22 @@ MINIMUM_SQLITE_VERSION = "3.32.0" -def get_table_name_from_context(context: OutputContext) -> str: +def _get_dagster_instance_if_available( + context: InputContext, +) -> dg.DagsterInstance | None: + """Return the Dagster instance from an input context if one was provided. + + Some notebook and integration-test helpers build ad hoc ``InputContext`` objects + without attaching a Dagster instance. Provenance checks should be skipped for those + direct reads rather than raising while trying to access ``context.instance``. + """ + try: + return context.instance + except DagsterInvariantViolationError: + return None + + +def get_table_name_from_context(context: InputContext | OutputContext) -> str: """Retrieves the table name from the context object.""" # TODO(rousik): Figure out which kind of identifier is used when. if context.has_asset_key: @@ -41,59 +67,60 @@ def get_table_name_from_context(context: OutputContext) -> str: return context.get_identifier() -class PudlMixedFormatIOManager(IOManager): +def get_ferc_form_name(db_name: str) -> str: + """Extract the FERC form name from a SQLite database name.""" + match: re.Match[str] | None = re.search(r"ferc\d+", db_name) + if match is None: + raise ValueError(f"Could not determine FERC form from db_name={db_name!r}") + return match.group() + + +class PudlMixedFormatIOManager(ConfigurableIOManager): """Format switching IOManager that supports sqlite and parquet. This IOManager provides for the use of parquet files along with the standard SQLite database produced by PUDL. """ - # Defaults should be provided here and should be potentially - # overridden by os env variables. This now resides in the - # @io_manager constructor of this, see pudl_mixed_format_io_manager". - write_to_parquet: bool + write_to_parquet: bool = True """If true, data will be written to parquet files.""" - read_from_parquet: bool + read_from_parquet: bool = True """If true, data will be read from parquet files instead of sqlite.""" - def __init__(self, write_to_parquet: bool = False, read_from_parquet: bool = False): - """Creates new instance of mixed format pudl IO manager. - - By default, data is written and read from sqlite, but experimental - support for writing and/or reading from parquet files can be enabled - by setting the corresponding flags to True. - - Args: - write_to_parquet: if True, all data will be written to parquet - files in addition to sqlite. - read_from_parquet: if True, all data reads will be using - parquet files as source of truth. Otherwise, data will be - read from the sqlite database. Reading from parquet provides - performance increases as well as better datatype handling, so - this option is encouraged. - """ - if read_from_parquet and not write_to_parquet: + @model_validator(mode="after") + def validate_parquet_settings(self) -> "PudlMixedFormatIOManager": + """Ensure the configured read/write mode is internally consistent.""" + if self.read_from_parquet and not self.write_to_parquet: raise RuntimeError( "read_from_parquet cannot be set when write_to_parquet is False." ) - self.write_to_parquet = write_to_parquet - self.read_from_parquet = read_from_parquet - self._sqlite_io_manager = PudlSQLiteIOManager( + return self + + @cached_property + def _sqlite_io_manager(self) -> "PudlSQLiteIOManager": + """Build the SQLite-backed runtime IO manager lazily.""" + return PudlSQLiteIOManager( base_dir=PudlPaths().output_dir, db_name="pudl", ) - self._parquet_io_manager = PudlParquetIOManager() + + @cached_property + def _parquet_io_manager(self) -> "PudlParquetIOManager": + """Build the Parquet-backed runtime IO manager lazily.""" + return PudlParquetIOManager() def handle_output( - self, context: OutputContext, obj: pd.DataFrame | str - ) -> pd.DataFrame: + self, context: OutputContext, obj: pd.DataFrame | pl.LazyFrame + ) -> None: """Passes the output to the appropriate IO manager instance.""" self._sqlite_io_manager.handle_output(context, obj) if self.write_to_parquet: self._parquet_io_manager.handle_output(context, obj) - def load_input(self, context: InputContext) -> pd.DataFrame: + def load_input( + self, context: InputContext + ) -> pd.DataFrame | gpd.GeoDataFrame | pl.LazyFrame: """Reads input from the appropriate IO manager instance.""" if self.read_from_parquet: return self._parquet_io_manager.load_input(context) @@ -191,7 +218,7 @@ def _get_sqlalchemy_table(self, table_name: str) -> sa.Table: ) return sa_table - def _handle_pandas_output(self, context: OutputContext, df: pd.DataFrame): + def _handle_pandas_output(self, context: OutputContext, df: pd.DataFrame) -> None: """Write dataframe to the database. SQLite does not support concurrent writes to the database. Instead, SQLite @@ -228,52 +255,22 @@ def _handle_pandas_output(self, context: OutputContext, df: pd.DataFrame): dtype={c.name: c.type for c in sa_table.columns}, ) - # TODO (bendnorman): Create a SQLQuery type so it's clearer what this method expects - def _handle_str_output(self, context: OutputContext, query: str): - """Execute a sql query on the database. - - This is used for creating output views in the database. - - Args: - context: dagster keyword that provides access output information like asset - name. - query: sql query to execute in the database. - """ - engine = self.engine - table_name = get_table_name_from_context(context) - - # Make sure the metadata has been created for the view - _ = self._get_sqlalchemy_table(table_name) - - with engine.begin() as con: - # Drop the existing view if it exists and create the new view. - # TODO (bendnorman): parameterize this safely. - con.execute(f"DROP VIEW IF EXISTS {table_name}") - con.execute(query) - - def handle_output(self, context: OutputContext, obj: pd.DataFrame | str): + def handle_output(self, context: OutputContext, obj: pd.DataFrame) -> None: """Handle an op or asset output. - If the output is a dataframe, write it to the database. If it is a string - execute it as a SQL query. - Args: context: dagster keyword that provides access output information like asset name. - obj: a sql query or dataframe to add to the database. + obj: a dataframe to add to the database. Raises: - Exception: if an asset or op returns an unsupported datatype. + TypeError: if an asset or op returns an unsupported datatype. """ - if isinstance(obj, pd.DataFrame): - self._handle_pandas_output(context, obj) - elif isinstance(obj, str): - self._handle_str_output(context, obj) - else: - raise Exception( - "SQLiteIOManager only supports pandas DataFrames and strings of SQL " - "queries." + if not isinstance(obj, pd.DataFrame): + raise TypeError( + f"SQLiteIOManager only supports pandas DataFrames, got {type(obj)}." ) + self._handle_pandas_output(context, obj) def load_input(self, context: InputContext) -> pd.DataFrame: """Load a dataframe from a sqlite database. @@ -326,7 +323,6 @@ def handle_output( schema=pa_schema, ) elif isinstance(obj, pl.LazyFrame): - logger.warning("PudlParquetIOManager does not do any schema enforcement.") obj.cast(res.to_polars_dtypes()).sink_parquet( parquet_path, engine="streaming", @@ -483,39 +479,7 @@ def __init__( "--autogenerate -m 'relevant message' && alembic upgrade head`." ) - def _handle_str_output(self, context: OutputContext, query: str): - """Execute a sql query on the database. - - This is used for creating output views in the database. - - Args: - context: dagster keyword that provides access output information like asset - name. - query: sql query to execute in the database. - """ - engine = self.engine - table_name = get_table_name_from_context(context) - - # Check if there is a Resource in self.package for table_name. - # We don't want folks creating views without adding package metadata. - try: - _ = self.package.get_resource(table_name) - except ValueError as err: - raise ValueError( - f"{table_name} does not appear in pudl.metadata.resources. " - "Check for typos, or add the table to the metadata and recreate the " - f"PUDL SQlite database. It's also possible that {table_name} is one of " - "the tables that does not get loaded into the PUDL SQLite DB because " - "it's a work in progress or is distributed in Apache Parquet format." - ) from err - - with engine.begin() as con: - # Drop the existing view if it exists and create the new view. - # TODO (bendnorman): parameterize this safely. - con.execute(f"DROP VIEW IF EXISTS {table_name}") - con.execute(query) - - def _handle_pandas_output(self, context: OutputContext, df: pd.DataFrame): + def _handle_pandas_output(self, context: OutputContext, df: pd.DataFrame) -> None: """Enforce PUDL DB schema and write dataframe to SQLite.""" table_name = get_table_name_from_context(context) # If table_name doesn't show up in the self.md object, this will raise an error @@ -581,29 +545,7 @@ def load_input(self, context: InputContext) -> pd.DataFrame: return df -@io_manager( - config_schema={ - "write_to_parquet": Field( - bool, - description="""If true, data will be written to parquet files, - in addition to the SQLite database.""", - default_value=True, - ), - "read_from_parquet": Field( - bool, - description="""If True, the canonical source of data for reads - will be parquet files. Otherwise, data will be read from the - SQLite database.""", - default_value=True, - ), - } -) -def pudl_mixed_format_io_manager(init_context: InitResourceContext) -> IOManager: - """Create a SQLiteManager dagster resource for the pudl database.""" - return PudlMixedFormatIOManager( - write_to_parquet=init_context.resource_config["write_to_parquet"], - read_from_parquet=init_context.resource_config["read_from_parquet"], - ) +pudl_mixed_format_io_manager = PudlMixedFormatIOManager() @io_manager @@ -627,11 +569,13 @@ class FercSQLiteIOManager(SQLiteIOManager): This IOManager expects the database to already exist. """ + _db_path: Path # set during _setup_database(); typed here for IDE and type-checker visibility + def __init__( self, - base_dir: str = None, - db_name: str = None, - md: sa.MetaData = None, + base_dir: str | None = None, + db_name: str | None = None, + md: sa.MetaData | None = None, timeout: float = 1_000.0, ): """Initialize FercSQLiteIOManager. @@ -649,12 +593,12 @@ def __init__( """ # TODO(rousik): Note that this is a bit of a partially implemented IO manager that # is not actually used for writing anything. Given that this is derived from base - # SqliteIOManager, we do not support handling of parquet formats. This is probably + # SQLiteIOManager, we do not support handling of parquet formats. This is probably # okay for now. super().__init__(base_dir, db_name, md, timeout) def _setup_database(self, timeout: float = 1_000.0) -> sa.Engine: - """Create database engine and read the metadata. + """Create database engine and read metadata if the DB already exists. Args: timeout: How many seconds the connection should wait before raising an @@ -665,26 +609,42 @@ def _setup_database(self, timeout: float = 1_000.0) -> sa.Engine: Returns: engine: SQL Alchemy engine that connects to a database in the base_dir. """ - # If the sqlite directory doesn't exist, create it. db_path = self.base_dir / f"{self.db_name}.sqlite" - if not db_path.exists(): - raise ValueError( - f"No DB found at {db_path}. Run the job that creates the " - f"{self.db_name} database." - ) + self._db_path = db_path engine = sa.create_engine( f"sqlite:///{db_path}", connect_args={"timeout": timeout} ) - # Connect to the local SQLite DB and read its structure. - ferc1_meta = sa.MetaData() - ferc1_meta.reflect(engine) - self.md = ferc1_meta + # For single-pass Dagster runs, this resource may initialize before upstream + # sqlite-producing assets have materialized the DB file. + if db_path.exists(): + self._reflect_metadata(engine) + else: + logger.info( + f"{db_path} not found during resource initialization; metadata reflection " + "will happen on first load." + ) return engine - def handle_output(self, context: OutputContext, obj): + def _reflect_metadata(self, engine: sa.Engine | None = None) -> None: + """Reflect table metadata from the sqlite database into ``self.md``.""" + reflected = sa.MetaData() + reflected.reflect(engine if engine is not None else self.engine) + self.md: sa.MetaData = reflected + + def _ensure_database_ready(self) -> None: + """Ensure the sqlite DB exists and metadata has been reflected.""" + if not self._db_path.exists(): + raise ValueError( + f"No DB found at {self._db_path}. Run the job that creates the " + f"{self.db_name} database." + ) + if not self.md.tables: + self._reflect_metadata() + + def handle_output(self, context: OutputContext, obj: pd.DataFrame | str) -> None: """Handle an op or asset output.""" raise NotImplementedError( "FercSQLiteIOManager can't write outputs. Subclass FercSQLiteIOManager and " @@ -704,59 +664,133 @@ def load_input(self, context: InputContext) -> pd.DataFrame: ) -class FercDBFSQLiteIOManager(FercSQLiteIOManager): - """IO Manager for only reading tables from the FERC 1 database. +class FercDbfSQLiteIOManager(FercSQLiteIOManager): + """IO Manager for reading tables from FERC DBF SQLite databases. This IO Manager is for reading data only. It does not handle outputs because the raw FERC tables are not known prior to running the ETL and are not recorded in our metadata. + + The form name is inferred from ``self.db_name`` via :func:`get_ferc_form_name`, so + a single class serves all FERC DBF datasets (ferc1_dbf, ferc2_dbf, etc.) as long as + the corresponding settings object exposes a ``dbf_years`` attribute. """ - def handle_output(self, context: OutputContext, obj: pd.DataFrame | str): + def handle_output(self, context: OutputContext, obj: pd.DataFrame | str) -> None: """Handle an op or asset output.""" - raise NotImplementedError("FercDBFSQLiteIOManager can't write outputs yet.") + raise NotImplementedError("FercDbfSQLiteIOManager can't write outputs yet.") - def load_input(self, context: InputContext) -> pd.DataFrame: - """Load a dataframe from a sqlite database. + def _query(self, table_name: str, dbf_years: list[int]) -> pd.DataFrame: + """Execute the year-filtered read against the FERC DBF SQLite database. Args: - context: dagster keyword that provides access output information like asset - name. + table_name: Name of the table to query (without the ``raw___`` + prefix). + dbf_years: Years to include in the result set. """ - # TODO (daz): this is hard-coded to FERC1, though this is nominally for all FERC datasets. - ferc1_settings = context.resources.dataset_settings.ferc1 - - table_name = get_table_name_from_context(context) - # Remove preceding asset name metadata - table_name = table_name.replace("raw_ferc1_dbf__", "") - - # Check if the table_name exists in the self.md object _ = self._get_sqlalchemy_table(table_name) - - engine = self.engine - - with engine.begin() as con: + with self.engine.begin() as con: return pd.read_sql_query( f"SELECT * FROM {table_name} " # noqa: S608 "WHERE report_year BETWEEN :min_year AND :max_year;", con=con, params={ - "min_year": min(ferc1_settings.dbf_years), - "max_year": max(ferc1_settings.dbf_years), + "min_year": min(dbf_years), + "max_year": max(dbf_years), }, ).assign(sched_table_name=table_name) + def load_input(self, context: InputContext) -> pd.DataFrame: + """Load a dataframe from a FERC DBF sqlite database. -@io_manager(required_resource_keys={"dataset_settings"}) -def ferc1_dbf_sqlite_io_manager(init_context) -> FercDBFSQLiteIOManager: - """Create a SQLiteManager dagster resource for the ferc1 dbf database.""" - return FercDBFSQLiteIOManager( - base_dir=PudlPaths().output_dir, - db_name="ferc1_dbf", - ) + Args: + context: dagster keyword that provides access output information like asset + name. + """ + self._ensure_database_ready() + ferc_settings = getattr( + context.resources.etl_settings.dataset_settings, + get_ferc_form_name(self.db_name), + ) + table_name = get_table_name_from_context(context).replace( + f"raw_{self.db_name}__", "" + ) + return self._query(table_name, ferc_settings.dbf_years) + + +class _FercSQLiteConfigurableIOManagerBase(ConfigurableIOManager): + """Base class for Dagster-native FERC SQLite IO manager wrappers. + + Holds the shared resource dependencies (``etl_settings``, ``zenodo_dois``, + ``db_name``) and provides default delegation for ``engine`` and + ``handle_output``. Subclasses must implement the ``_manager`` cached property + and ``load_input``. + + Note: + This wrapper pattern is a temporary workaround for nested ``etl_settings`` + resource dependencies inside the FERC IO managers. Because Dagster wires + resource dependencies at instantiation time, overriding the top-level + ``etl_settings`` resource alone (e.g. in tests) is not enough — the IO + managers must be rebuilt against the new resource instance. ``build_defs`` + in ``pudl.etl`` handles that rebuilding explicitly. A follow-up PR will + remove the nested dependency, at which point this base class can be + simplified or eliminated. See issue #5118 + """ + + etl_settings: dg.ResourceDependency[PudlEtlSettingsResource] + zenodo_dois: dg.ResourceDependency[ZenodoDoiSettingsResource] + db_name: str + + @property + def engine(self) -> sa.Engine: + """Expose the underlying SQLAlchemy engine for tests and helpers.""" + return self._manager.engine + + def handle_output(self, context: OutputContext, obj: pd.DataFrame | str) -> None: + """Delegate writes to the underlying runtime IO manager.""" + return self._manager.handle_output(context, obj) + + def _prepare(self, context: InputContext) -> None: + """Ensure the database is ready and provenance is compatible with this run.""" + self._manager._ensure_database_ready() + assert_ferc_sqlite_compatible( + instance=_get_dagster_instance_if_available(context), + db_name=self.db_name, + etl_settings=self.etl_settings, + zenodo_dois=self.zenodo_dois, + ) + + +class FercDbfSQLiteConfigurableIOManager(_FercSQLiteConfigurableIOManagerBase): + """Configurable IO manager for reading tables from FERC DBF SQLite databases. + + The form name is inferred from ``self.db_name`` via :func:`get_ferc_form_name`, so + a single class serves all FERC DBF datasets. Instantiate with the appropriate + ``db_name`` (e.g. ``"ferc1_dbf"``, ``"ferc2_dbf"``) to target a specific form. + """ + @cached_property + def _manager(self) -> FercDbfSQLiteIOManager: + """Build the underlying SQLite reader lazily.""" + return FercDbfSQLiteIOManager( + base_dir=PudlPaths().output_dir, + db_name=self.db_name, + ) -class FercXBRLSQLiteIOManager(FercSQLiteIOManager): + def load_input(self, context: InputContext) -> pd.DataFrame: + """Load a dataframe from a FERC DBF SQLite database.""" + self._prepare(context) + ferc_settings = getattr( + self.etl_settings.dataset_settings, + get_ferc_form_name(self.db_name), + ) + table_name = get_table_name_from_context(context).replace( + f"raw_{self.db_name}__", "" + ) + return self._manager._query(table_name, ferc_settings.dbf_years) + + +class FercXbrlSQLiteIOManager(FercSQLiteIOManager): """IO Manager for only reading tables from the XBRL database. This IO Manager is for reading data only. It does not handle outputs because the raw @@ -806,61 +840,89 @@ def get_year(df: pd.DataFrame, col: str) -> pd.Series: .reset_index(drop=True) ) - def handle_output(self, context: OutputContext, obj: pd.DataFrame | str): + def handle_output(self, context: OutputContext, obj: pd.DataFrame | str) -> None: """Handle an op or asset output.""" - raise NotImplementedError("FercXBRLSQLiteIOManager can't write outputs yet.") + raise NotImplementedError("FercXbrlSQLiteIOManager can't write outputs yet.") - def load_input(self, context: InputContext) -> pd.DataFrame: - """Load a dataframe from a sqlite database. + def _query(self, table_name: str, xbrl_years: list[int]) -> pd.DataFrame: + """Execute the full-table read against the FERC XBRL SQLite database. Args: - context: dagster keyword that provides access output information like asset - name. + table_name: Name of the table to query (without the ``raw___`` + prefix). + xbrl_years: Years to include in the result set (passed to + :meth:`refine_report_year`). """ - ferc_settings = getattr( - context.resources.dataset_settings, - re.search(r"ferc\d+", self.db_name).group(), - ) - - table_name = get_table_name_from_context(context) - # Remove preceding asset name metadata - table_name = table_name.replace(f"raw_{self.db_name}__", "") - - # TODO (bendnorman): Figure out a better to handle tables that - # don't have duration and instant - # Not every table contains both instant and duration - # Return empty dataframe if table doesn't exist + # TODO (bendnorman): Figure out a better way to handle tables that + # don't have duration and instant variants. + # Not every table contains both instant and duration; + # return an empty dataframe if the table doesn't exist. if table_name not in self.md.tables: return pd.DataFrame() - - engine = self.engine - sched_table_name = re.sub("_instant|_duration", "", table_name) - with engine.begin() as con: + with self.engine.begin() as con: df = pd.read_sql( f"SELECT {table_name}.* FROM {table_name}", # noqa: S608 - table names not supplied by user con=con, ).assign(sched_table_name=sched_table_name) - return df.pipe( - FercXBRLSQLiteIOManager.refine_report_year, - xbrl_years=ferc_settings.xbrl_years, + FercXbrlSQLiteIOManager.refine_report_year, xbrl_years=xbrl_years + ) + + def load_input(self, context: InputContext) -> pd.DataFrame: + """Load a dataframe from a sqlite database. + + Args: + context: dagster keyword that provides access output information like asset + name. + """ + self._ensure_database_ready() + ferc_settings = getattr( + context.resources.etl_settings.dataset_settings, + get_ferc_form_name(self.db_name), + ) + table_name = get_table_name_from_context(context).replace( + f"raw_{self.db_name}__", "" ) + return self._query(table_name, ferc_settings.xbrl_years) + + +class FercXbrlSQLiteConfigurableIOManager(_FercSQLiteConfigurableIOManagerBase): + """Configurable IO manager for reading tables from a FERC XBRL SQLite database.""" + @cached_property + def _manager(self) -> FercXbrlSQLiteIOManager: + """Build the underlying SQLite reader lazily.""" + return FercXbrlSQLiteIOManager( + base_dir=PudlPaths().output_dir, + db_name=self.db_name, + ) -@io_manager(required_resource_keys={"dataset_settings"}) -def ferc1_xbrl_sqlite_io_manager(init_context) -> FercXBRLSQLiteIOManager: - """Create a SQLiteManager dagster resource for the ferc1 xbrl database.""" - return FercXBRLSQLiteIOManager( - base_dir=PudlPaths().output_dir, - db_name="ferc1_xbrl", - ) + def load_input(self, context: InputContext) -> pd.DataFrame: + """Load a dataframe from the configured FERC XBRL SQLite database.""" + self._prepare(context) + ferc_settings = getattr( + self.etl_settings.dataset_settings, + get_ferc_form_name(self.db_name), + ) + table_name = get_table_name_from_context(context).replace( + f"raw_{self.db_name}__", "" + ) + return self._manager._query(table_name, ferc_settings.xbrl_years) -@io_manager(required_resource_keys={"dataset_settings"}) -def ferc714_xbrl_sqlite_io_manager(init_context) -> FercXBRLSQLiteIOManager: - """Create a SQLiteManager dagster resource for the ferc714 xbrl database.""" - return FercXBRLSQLiteIOManager( - base_dir=PudlPaths().output_dir, - db_name="ferc714_xbrl", - ) +ferc1_dbf_sqlite_io_manager = FercDbfSQLiteConfigurableIOManager( + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + db_name="ferc1_dbf", +) +ferc1_xbrl_sqlite_io_manager = FercXbrlSQLiteConfigurableIOManager( + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + db_name="ferc1_xbrl", +) +ferc714_xbrl_sqlite_io_manager = FercXbrlSQLiteConfigurableIOManager( + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + db_name="ferc714_xbrl", +) diff --git a/src/pudl/logging_helpers.py b/src/pudl/logging_helpers.py index d505bc80ba..ec56bc2ce3 100644 --- a/src/pudl/logging_helpers.py +++ b/src/pudl/logging_helpers.py @@ -1,10 +1,25 @@ """Configure logging for the PUDL package.""" import logging +from typing import Literal import coloredlogs from dagster import get_dagster_logger +DEFAULT_DEPENDENCY_LOGLEVELS: dict[str, int] = { + "aiobotocore": logging.WARNING, + "alembic": logging.WARNING, + "arelle": logging.INFO, + "asyncio": logging.INFO, + "boto3": logging.WARNING, + "botocore": logging.WARNING, + "fsspec": logging.INFO, + "google": logging.INFO, + "matplotlib": logging.WARNING, + "numba": logging.WARNING, + "urllib3": logging.INFO, +} + def get_logger(name: str): """Helper function to append 'catalystcoop' to logger name and return logger.""" @@ -13,38 +28,57 @@ def get_logger(name: str): def configure_root_logger( logfile: str | None = None, - loglevel: str = "INFO", + loglevel: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO", dependency_loglevels: dict[str, int] | None = None, + color_logs: bool = True, propagate: bool = False, ) -> None: """Configure the root catalystcoop logger. Args: logfile: Path to logfile or None. - loglevel: Level of detail at which to log, by default INFO. + loglevel: Level of detail at which to log. Defaults to ``INFO``. dependency_loglevels: Dictionary mapping dependency name to desired loglevel. This allows us to filter excessive logs from dependencies. + color_logs: Whether to emit ANSI color codes. Defaults to ``True``. propagate: Whether to propagate logs to ancestor loggers. Useful for ensuring that pytest has access to PUDL logs during testing. """ if dependency_loglevels is None: - dependency_loglevels = {"numba": logging.WARNING} + dependency_loglevels = dict(DEFAULT_DEPENDENCY_LOGLEVELS) + # Explicitly set log-level for dependency loggers for dependency_name, dependency_loglevel in dependency_loglevels.items(): logging.getLogger(dependency_name).setLevel(dependency_loglevel) - logger = get_dagster_logger("catalystcoop") + # Normalize upstream ferc_xbrl_extractor logging to flow through our configured + # handlers and formatter without requiring changes in that package. + ferc_xbrl_logger = logging.getLogger("catalystcoop.ferc_xbrl_extractor") + if ferc_xbrl_logger.handlers: + ferc_xbrl_logger.handlers.clear() + ferc_xbrl_logger.propagate = True + log_format = "%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s" - coloredlogs.install(fmt=log_format, level=loglevel, logger=logger) + loggers_to_configure = [ + get_dagster_logger("catalystcoop"), + logging.getLogger("catalystcoop"), + ] + for logger in loggers_to_configure: + coloredlogs.install( + fmt=log_format, + level=loglevel, + logger=logger, + isatty=color_logs, + ) - logger.addHandler(logging.NullHandler()) + logger.addHandler(logging.NullHandler()) - if logfile is not None: - file_logger = logging.FileHandler(logfile) - file_logger.setFormatter(logging.Formatter(log_format)) - logger.addHandler(file_logger) + if logfile is not None: + file_logger = logging.FileHandler(logfile) + file_logger.setFormatter(logging.Formatter(log_format)) + logger.addHandler(file_logger) - logger.propagate = propagate + logger.propagate = propagate if propagate: logging.getLogger("dagster").propagate = True diff --git a/src/pudl/output/eia930.py b/src/pudl/output/eia930.py index 6799d5f3de..ad4631f4ad 100644 --- a/src/pudl/output/eia930.py +++ b/src/pudl/output/eia930.py @@ -28,7 +28,7 @@ def _add_timezone( @asset( compute_kind="Python", - required_resource_keys={"dataset_settings"}, + required_resource_keys={"etl_settings"}, ) def _out_eia930__hourly_operations( core_eia930__hourly_operations: pd.DataFrame, @@ -54,7 +54,7 @@ def _out_eia930__hourly_operations( @asset( compute_kind="Python", - required_resource_keys={"dataset_settings"}, + required_resource_keys={"etl_settings"}, ) def _out_eia930__hourly_subregion_demand( core_eia930__hourly_subregion_demand: pd.DataFrame, @@ -80,7 +80,7 @@ def _out_eia930__hourly_subregion_demand( def _years_from_context(context) -> list[int]: return [ int(half_year[:4]) - for half_year in context.resources.dataset_settings.eia.eia930.half_years + for half_year in context.resources.etl_settings.dataset_settings.eia.eia930.half_years ] diff --git a/src/pudl/output/ferc714.py b/src/pudl/output/ferc714.py index 0857dcf0bb..9af6a62329 100644 --- a/src/pudl/output/ferc714.py +++ b/src/pudl/output/ferc714.py @@ -347,7 +347,7 @@ def filled_service_territory_eia861( @asset( compute_kind="Python", - required_resource_keys={"dataset_settings"}, + required_resource_keys={"etl_settings"}, ) def _out_ferc714__annualized_respondents( context, @@ -366,7 +366,7 @@ def _out_ferc714__annualized_respondents( if "report_date" in core_ferc714__respondent_id.columns: raise AssertionError("report_date already present, can't be added again!") - ferc714_settings = context.resources.dataset_settings.ferc714 + ferc714_settings = context.resources.etl_settings.dataset_settings.ferc714 report_dates = pd.DataFrame( {"report_date": pd.to_datetime(sorted(ferc714_settings.years), format="%Y")} ) @@ -692,7 +692,9 @@ def out_ferc714__summarized_demand( imputed_hourly_planning_area_demand_assets = impute_timeseries_asset_factory( input_asset_name="core_ferc714__hourly_planning_area_demand", output_asset_name="out_ferc714__hourly_planning_area_demand", - years_from_context=lambda context: context.resources.dataset_settings.ferc714.years, + years_from_context=lambda context: ( + context.resources.etl_settings.dataset_settings.ferc714.years + ), value_col="demand_mwh", imputed_value_col="demand_imputed_pudl_mwh", id_col="respondent_id_ferc714", diff --git a/src/pudl/output/sql/__init__.py b/src/pudl/output/sql/__init__.py deleted file mode 100644 index 92c4de178d..0000000000 --- a/src/pudl/output/sql/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""A module of python helper functions and sql files for creating SQL views.""" diff --git a/src/pudl/output/sql/denorm_plants_utilities_ferc1.sql b/src/pudl/output/sql/denorm_plants_utilities_ferc1.sql deleted file mode 100644 index b47ca7e6c8..0000000000 --- a/src/pudl/output/sql/denorm_plants_utilities_ferc1.sql +++ /dev/null @@ -1,5 +0,0 @@ --- Build a view of useful FERC Plant & Utility information. -CREATE VIEW denorm_plants_utilities_ferc1 AS -SELECT * -FROM core_pudl__assn_ferc1_pudl_plants - INNER JOIN core_pudl__assn_ferc1_pudl_utilities USING(utility_id_ferc1); diff --git a/src/pudl/output/sql/helpers.py b/src/pudl/output/sql/helpers.py deleted file mode 100644 index c8fe6dadb3..0000000000 --- a/src/pudl/output/sql/helpers.py +++ /dev/null @@ -1,37 +0,0 @@ -"""Helper functions for creating output assets.""" - -import importlib.resources - -from dagster import AssetsDefinition, asset - - -def sql_asset_factory( - name: str, - deps: set[str] = {}, - io_manager_key: str = "pudl_io_manager", - compute_kind: str = "SQL", -) -> AssetsDefinition: - """Factory for creating assets that run SQL statements.""" - - @asset( - name=name, - deps=deps, - io_manager_key=io_manager_key, - compute_kind=compute_kind, - ) - def sql_view_asset() -> str: - """Asset that creates sql view in a database.""" - sql_path_traversable = ( - importlib.resources.files("pudl.output.sql") / f"{name}.sql" - ) - try: - with importlib.resources.as_file(sql_path_traversable) as sql_path: - return sql_path.read_text() - # Raise a helpful error here if a sql file doesn't exist - except FileNotFoundError as err: - raise FileNotFoundError( - f"Could not find {sql_path}. " - f"Create a sql file in pudl.output.sql subpackage for {name} asset." - ) from err - - return sql_view_asset diff --git a/src/pudl/package_data/settings/dg_fast.yml b/src/pudl/package_data/settings/dg_fast.yml new file mode 100644 index 0000000000..644450ce05 --- /dev/null +++ b/src/pudl/package_data/settings/dg_fast.yml @@ -0,0 +1,25 @@ +execution: + config: + multiprocess: + max_concurrent: 0 + tag_concurrency_limits: + - key: memory-use + value: high + limit: 4 +loggers: + console: + config: + log_level: INFO +resources: + datastore: + config: + cloud_cache_path: s3://pudl.catalyst.coop/zenodo + use_local_cache: true + etl_settings: + config: + etl_settings_path: src/pudl/package_data/settings/etl_fast.yml + runtime_settings: + config: + xbrl_num_workers: null + xbrl_batch_size: 50 + xbrl_loglevel: INFO diff --git a/src/pudl/package_data/settings/dg_full.yml b/src/pudl/package_data/settings/dg_full.yml new file mode 100644 index 0000000000..b4f1eba633 --- /dev/null +++ b/src/pudl/package_data/settings/dg_full.yml @@ -0,0 +1,25 @@ +execution: + config: + multiprocess: + max_concurrent: 0 + tag_concurrency_limits: + - key: memory-use + value: high + limit: 4 +loggers: + console: + config: + log_level: INFO +resources: + datastore: + config: + cloud_cache_path: s3://pudl.catalyst.coop/zenodo + use_local_cache: true + etl_settings: + config: + etl_settings_path: src/pudl/package_data/settings/etl_full.yml + runtime_settings: + config: + xbrl_num_workers: 4 + xbrl_batch_size: 50 + xbrl_loglevel: INFO diff --git a/src/pudl/package_data/settings/dg_nightly.yml b/src/pudl/package_data/settings/dg_nightly.yml new file mode 100644 index 0000000000..6cab1c6959 --- /dev/null +++ b/src/pudl/package_data/settings/dg_nightly.yml @@ -0,0 +1,25 @@ +execution: + config: + multiprocess: + max_concurrent: 0 + tag_concurrency_limits: + - key: memory-use + value: high + limit: 4 +loggers: + console: + config: + log_level: DEBUG +resources: + datastore: + config: + cloud_cache_path: s3://pudl.catalyst.coop/zenodo + use_local_cache: true + etl_settings: + config: + etl_settings_path: src/pudl/package_data/settings/etl_full.yml + runtime_settings: + config: + xbrl_num_workers: 8 + xbrl_batch_size: 50 + xbrl_loglevel: DEBUG diff --git a/src/pudl/package_data/settings/dg_pytest.yml b/src/pudl/package_data/settings/dg_pytest.yml new file mode 100644 index 0000000000..66fa997a11 --- /dev/null +++ b/src/pudl/package_data/settings/dg_pytest.yml @@ -0,0 +1,26 @@ +# Dagster launch config for pytest integration test prebuilds. +# +# Uses the in-process executor so that pytest's coverage collection can instrument +# all ETL code in the same process. Multi-process execution would require a separate +# coverage setup (e.g. coverage subprocess plugins). See GitHub issue #5119 for the +# follow-up work to enable coverage with the multiprocess executor. +execution: + config: + in_process: {} +loggers: + console: + config: + log_level: INFO +resources: + datastore: + config: + cloud_cache_path: s3://pudl.catalyst.coop/zenodo + use_local_cache: true + etl_settings: + config: + etl_settings_path: src/pudl/package_data/settings/etl_fast.yml + runtime_settings: + config: + xbrl_num_workers: null + xbrl_batch_size: 50 + xbrl_loglevel: INFO diff --git a/src/pudl/package_data/settings/etl_fast.yml b/src/pudl/package_data/settings/etl_fast.yml index 46ef7bf7c4..5dd3b1a130 100644 --- a/src/pudl/package_data/settings/etl_fast.yml +++ b/src/pudl/package_data/settings/etl_fast.yml @@ -1,6 +1,6 @@ --- ########################################################################### -# Settings for ferc_to_sqlite script +# Settings for FERC-to-SQLite extraction ########################################################################### ferc_to_sqlite_settings: ferc1_dbf_to_sqlite_settings: @@ -9,22 +9,22 @@ ferc_to_sqlite_settings: ferc1_xbrl_to_sqlite_settings: years: [2021, 2024] ferc2_dbf_to_sqlite_settings: - years: [2019, 2020] + years: [] ferc2_xbrl_to_sqlite_settings: - years: [2021, 2024] + years: [] ferc6_dbf_to_sqlite_settings: - years: [2019, 2020] + years: [] ferc6_xbrl_to_sqlite_settings: - years: [2021, 2024] + years: [] ferc60_dbf_to_sqlite_settings: - years: [2019, 2020] + years: [] ferc60_xbrl_to_sqlite_settings: - years: [2021, 2024] + years: [] ferc714_xbrl_to_sqlite_settings: years: [2021, 2024] ########################################################################### -# Settings for pudl_etl script +# Settings for the main PUDL ETL ########################################################################### name: pudl-fast title: PUDL Fast ETL diff --git a/src/pudl/package_data/settings/etl_full.yml b/src/pudl/package_data/settings/etl_full.yml index 9af4360790..5a95086cd4 100644 --- a/src/pudl/package_data/settings/etl_full.yml +++ b/src/pudl/package_data/settings/etl_full.yml @@ -1,6 +1,6 @@ --- ########################################################################### -# Settings for ferc_to_sqlite script +# Settings for FERC-to-SQLite extraction ########################################################################### ferc_to_sqlite_settings: ferc1_dbf_to_sqlite_settings: @@ -120,7 +120,7 @@ ferc_to_sqlite_settings: years: [2021, 2022, 2023, 2024] ########################################################################### -# Settings for pudl_etl script +# Settings for the main PUDL ETL ########################################################################### name: pudl-full title: PUDL Full ETL diff --git a/src/pudl/resources.py b/src/pudl/resources.py index 56b65862df..aa7d834398 100644 --- a/src/pudl/resources.py +++ b/src/pudl/resources.py @@ -1,9 +1,13 @@ """Collection of Dagster resources for PUDL.""" -from dagster import ConfigurableResource, Field, resource +import dagster as dg +from dagster import ConfigurableResource -from pudl.settings import DatasetsSettings, FercToSqliteSettings, create_dagster_config -from pudl.workspace.datastore import Datastore +from pudl.settings import ( + EtlSettings, + load_etl_settings, +) +from pudl.workspace.datastore import Datastore, ZenodoDoiSettings from pudl.workspace.setup import PudlPaths @@ -12,49 +16,53 @@ class RuntimeSettings(ConfigurableResource): xbrl_num_workers: None | int = None xbrl_batch_size: int = 50 + xbrl_loglevel: str = "INFO" -@resource(config_schema=create_dagster_config(DatasetsSettings())) -def dataset_settings(init_context) -> DatasetsSettings: - """Dagster resource for parameterizing PUDL ETL assets. +class PudlEtlSettingsResource(ConfigurableResource): + """Load validated PUDL ETL settings from a shared ETL YAML file.""" - This resource allows us to specify the years we want to process for each datasource - in the Dagit UI. - """ - return DatasetsSettings(**init_context.resource_config) + etl_settings_path: str + def create_resource(self, context) -> EtlSettings: + """Create runtime ETL settings from the configured ETL settings file.""" + del context # Required by Dagster's hook signature; intentionally unused here. + return load_etl_settings(self.etl_settings_path) -@resource(config_schema=create_dagster_config(FercToSqliteSettings())) -def ferc_to_sqlite_settings(init_context) -> FercToSqliteSettings: - """Dagster resource for parameterizing the ``ferc_to_sqlite`` graph. - This resource allows us to specify the years we want to process for each datasource - in the Dagit UI. - """ - return FercToSqliteSettings(**init_context.resource_config) +class ZenodoDoiSettingsResource(ConfigurableResource): + """Load the canonical Zenodo DOI settings for Dagster-managed runs.""" + zenodo_dois_path: str | None = None -@resource( - config_schema={ - "cloud_cache_path": Field( - str, - description="Load datastore resources from this GCS or S3 path.", - default_value="s3://pudl.catalyst.coop/zenodo", - ), - "use_local_cache": Field( - bool, - description="If enabled, the local file cache for datastore will be used.", - default_value=True, - ), - }, -) -def datastore(init_context) -> Datastore: + def create_resource(self, context) -> ZenodoDoiSettings: + """Create runtime DOI settings, optionally from an override YAML file.""" + del context # Required by Dagster's hook signature; intentionally unused here. + if self.zenodo_dois_path is None: + return ZenodoDoiSettings() + return ZenodoDoiSettings.from_yaml(self.zenodo_dois_path) + + +class DatastoreResource(ConfigurableResource): """Dagster resource to interact with Zenodo archives.""" - ds_kwargs = {} - ds_kwargs["cloud_cache_path"] = init_context.resource_config["cloud_cache_path"] - - if init_context.resource_config["use_local_cache"]: - # TODO(rousik): we could also just use PudlPaths().input_dir here, because - # it should be initialized to the right values. - ds_kwargs["local_cache_path"] = PudlPaths().input_dir - return Datastore(**ds_kwargs) + + zenodo_dois: dg.ResourceDependency[ZenodoDoiSettingsResource] + cloud_cache_path: str = "s3://pudl.catalyst.coop/zenodo" + use_local_cache: bool = True + + def create_resource(self, context) -> Datastore: + """Create a configured datastore runtime object.""" + del context # Required by Dagster's hook signature; intentionally unused here. + ds_kwargs = { + "cloud_cache_path": self.cloud_cache_path, + "zenodo_dois": self.zenodo_dois, + } + + if self.use_local_cache: + ds_kwargs["local_cache_path"] = PudlPaths().input_dir # type: ignore[call-arg] + return Datastore(**ds_kwargs) + + +etl_settings = PudlEtlSettingsResource.configure_at_launch() +zenodo_dois = ZenodoDoiSettingsResource() +datastore = DatastoreResource(zenodo_dois=zenodo_dois) diff --git a/src/pudl/scripts/update_zenodo_dois.py b/src/pudl/scripts/update_zenodo_dois.py index 989ad2831f..5f89f458eb 100644 --- a/src/pudl/scripts/update_zenodo_dois.py +++ b/src/pudl/scripts/update_zenodo_dois.py @@ -5,7 +5,6 @@ require hand mapping to extract in PUDL. """ -import importlib import re import sys from pathlib import Path @@ -15,6 +14,7 @@ import yaml from pudl.logging_helpers import get_logger +from pudl.workspace.datastore import get_zenodo_dois_path logger = get_logger(__name__) @@ -47,7 +47,11 @@ def update_yaml_dois(yaml_file: Path, datasets: tuple[str, ...]) -> dict[str, di for dataset_name, current_doi in data.items(): if dataset_name in datasets: # Extract record ID from DOI (e.g, grab 123456 from 10.5281/zenodo.123456) - record_id = re.search(r"^10\.5281/zenodo\.(\d+)$", current_doi).group(1) + if (match := re.search(r"^10\.5281/zenodo\.(\d+)$", current_doi)) is None: + raise ValueError( + f"Unexpected Zenodo DOI format for {dataset_name}: {current_doi}" + ) + record_id = match.group(1) latest_id, latest_doi = get_latest_record_id(record_id) @@ -94,9 +98,7 @@ def main(datasets: tuple[str, ...]): # pragma: no cover logger.warn("No datasets provided, nothing will be updated.") sys.exit(0) - yaml_file = importlib.resources.files("pudl.package_data.settings").joinpath( - "zenodo_dois.yml" - ) + yaml_file = get_zenodo_dois_path() if not yaml_file.exists(): logger.warn(f"❌ File not found: {yaml_file}") sys.exit(1) diff --git a/src/pudl/settings.py b/src/pudl/settings.py index a370c2bfe4..58764d3a5f 100644 --- a/src/pudl/settings.py +++ b/src/pudl/settings.py @@ -1,13 +1,14 @@ """Module for validating pudl etl settings.""" +import importlib.resources import json from enum import Enum, StrEnum, auto, unique +from pathlib import Path from typing import Any, ClassVar, Self import fsspec import pandas as pd import yaml -from dagster import Field as DagsterField from dagster import StaticPartitionsDefinition from pydantic import ( AnyHttpUrl, @@ -54,9 +55,6 @@ class GenericDatasetSettings(FrozenBaseModel): of partitions. """ - disabled: bool = False - """If true, skip processing this dataset.""" - data_source: ClassVar[DataSource] """The DataSource metadata object for this dataset.""" @@ -77,13 +75,15 @@ def validate_partitions(self: Self): partition = getattr(self, name) except KeyError as err: raise ValueError( - f"{self.__name__} is missing required '{name}' field." + f"{self.__class__.__name__} is missing required '{name}' field." ) from err # Partition should never be None -- should get a default value set in # the child classes based on the working partitions. if partition is None: - raise ValueError(f"'In {self.__name__} partition {name} is None.") + raise ValueError( + f"'In {self.__class__.__name__} partition {name} is None." + ) if nonworking_partitions := list(set(partition) - set(working_partitions)): raise ValueError(f"'{nonworking_partitions}' {name} are not available.") @@ -687,7 +687,22 @@ def make_datasources_table(self: Self, ds: Datastore) -> pd.DataFrame: return df -class Ferc1DbfToSqliteSettings(GenericDatasetSettings): +class FercDbfToSqliteSettings(GenericDatasetSettings): + """Base class for all FERC DBF-to-SQLite settings models. + + Declares the ``years`` and ``refyear`` attributes shared by every FERC DBF + form so that :class:`~pudl.extract.dbf.FercDbfExtractor` can be typed + against this base rather than the looser :class:`GenericDatasetSettings`. + """ + + years: list[int] = [] + """Years of DBF data to extract.""" + + refyear: ClassVar[int] + """Reference year used to build the destination schema; provided by each subclass.""" + + +class Ferc1DbfToSqliteSettings(FercDbfToSqliteSettings): """An immutable Pydantic model to validate FERC 1 to SQLite settings.""" data_source: ClassVar[DataSource] = DataSource.from_id("ferc1") @@ -705,7 +720,6 @@ class FercGenericXbrlToSqliteSettings(BaseSettings): years: list[int] """The list of years to validate.""" - disabled: bool = False class Ferc1XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings): @@ -728,7 +742,7 @@ class Ferc2XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings): """The list of years to validate.""" -class Ferc2DbfToSqliteSettings(GenericDatasetSettings): +class Ferc2DbfToSqliteSettings(FercDbfToSqliteSettings): """An immutable Pydantic model to validate FERC 2 to SQLite settings.""" data_source: ClassVar[DataSource] = DataSource.from_id("ferc2") @@ -741,7 +755,7 @@ class Ferc2DbfToSqliteSettings(GenericDatasetSettings): """The reference year for the dataset.""" -class Ferc6DbfToSqliteSettings(GenericDatasetSettings): +class Ferc6DbfToSqliteSettings(FercDbfToSqliteSettings): """An immutable Pydantic model to validate FERC 6 to SQLite settings.""" data_source: ClassVar[DataSource] = DataSource.from_id("ferc6") @@ -750,8 +764,6 @@ class Ferc6DbfToSqliteSettings(GenericDatasetSettings): ] """The list of years to validate.""" - disabled: bool = False - refyear: ClassVar[int] = max(years) """The reference year for the dataset.""" @@ -766,13 +778,8 @@ class Ferc6XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings): """The list of years to validate.""" -class Ferc60DbfToSqliteSettings(GenericDatasetSettings): - """An immutable Pydantic model to validate FERC 60 to SQLite settings. - - Args: - years: List of years to validate. - disabled: if True, skip processing this dataset. - """ +class Ferc60DbfToSqliteSettings(FercDbfToSqliteSettings): + """An immutable Pydantic model to validate FERC 60 to SQLite settings.""" data_source: ClassVar[DataSource] = DataSource.from_id("ferc60") years: list[int] = [ @@ -780,8 +787,6 @@ class Ferc60DbfToSqliteSettings(GenericDatasetSettings): ] """The list of years to validate.""" - disabled: bool = False - refyear: ClassVar[int] = max(years) """The reference year for the dataset.""" @@ -838,7 +843,7 @@ def default_load_all(cls, data: dict[str, Any]) -> dict[str, Any]: def get_xbrl_dataset_settings( self, form_number: XbrlFormNumber - ) -> FercGenericXbrlToSqliteSettings: + ) -> FercGenericXbrlToSqliteSettings | None: """Return a list with all requested FERC XBRL to SQLite datasets. Args: @@ -871,9 +876,6 @@ class EtlSettings(BaseSettings): description: str | None = None version: str | None = None - publish_destinations: list[str] = [] - """This is list of fsspec compatible paths to publish the output datasets to.""" - @classmethod def from_yaml(cls, path: str) -> "EtlSettings": """Create an EtlSettings instance from a yaml_file path. @@ -888,6 +890,16 @@ def from_yaml(cls, path: str) -> "EtlSettings": yaml_file = yaml.safe_load(f) return cls.model_validate(yaml_file) + @property + def ferc_to_sqlite(self) -> "FercToSqliteSettings": + """Return validated FERC-to-SQLite settings, or raise if unavailable.""" + if self.ferc_to_sqlite_settings is None: + raise ValueError( + "ferc_to_sqlite_settings is not set in ETL settings. " + "Ensure ferc_to_sqlite_settings is configured before accessing this property." + ) + return self.ferc_to_sqlite_settings + @model_validator(mode="after") def validate_xbrl_years(self): """Ensure the XBRL years in DatasetsSettings align with FercToSqliteSettings. @@ -896,6 +908,9 @@ def validate_xbrl_years(self): that the years we are trying to process in the PUDL ETL are included in the XBRL to SQLite settings. """ + if self.datasets is None or self.ferc_to_sqlite_settings is None: + return self + for which_ferc in ["ferc1", "ferc714"]: if ( self.datasets is not None @@ -916,38 +931,32 @@ def validate_xbrl_years(self): ) return self + @property + def dataset_settings(self) -> DatasetsSettings: + """Return validated dataset settings or raise if they are unavailable.""" + if self.datasets is None: + raise ValueError("Missing datasets settings in ETL settings.") + return self.datasets -def _convert_settings_to_dagster_config(settings_dict: dict[str, Any]) -> None: - """Recursively convert a dictionary of dataset settings to dagster config in place. - - For each partition parameter in a :class:`GenericDatasetSettings` subclass, create a - corresponding :class:`DagsterField`. By default the :class:`GenericDatasetSettings` - subclasses will default to include all working partitions if the partition value is - None. Get the value type so dagster can do some basic type checking in the UI. - - Args: - settings_dict: dictionary of datasources and their parameters. - """ - for key, value in settings_dict.items(): - if isinstance(value, dict): - _convert_settings_to_dagster_config(value) - else: - settings_dict[key] = DagsterField(type(value), default_value=value) + def get_xbrl_dataset_settings( + self, form_number: XbrlFormNumber + ) -> FercGenericXbrlToSqliteSettings | None: + """Proxy FERC XBRL settings lookup through the canonical ETL settings.""" + return self.ferc_to_sqlite.get_xbrl_dataset_settings(form_number) -def create_dagster_config(settings: GenericDatasetSettings) -> dict[str, DagsterField]: - """Create a dictionary of dagster config out of a :class:`GenericDatasetsSettings`. +def load_etl_settings(path: str | Path) -> EtlSettings: + """Load ETL settings from a path, supporting relative paths from cwd.""" + return EtlSettings.from_yaml(str(Path(path).expanduser().resolve())) - Args: - settings: A dataset settings object, subclassed from - :class:`GenericDatasetSettings`. - Returns: - A dictionary of :class:`DagsterField` objects. - """ - settings_dict = settings.model_dump() - _convert_settings_to_dagster_config(settings_dict) - return settings_dict +def load_packaged_etl_settings(setting_filename: str) -> EtlSettings: + """Load ETL settings from a profile in ``pudl.package_data.settings``.""" + settings_path = ( + importlib.resources.files("pudl.package_data.settings") + / f"{setting_filename}.yml" + ) + return EtlSettings.from_yaml(str(settings_path)) def _zenodo_doi_to_url(doi: ZenodoDoi) -> AnyHttpUrl: diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index e240195554..36f637c5fe 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -691,7 +691,7 @@ def harvest_entity_tables( # noqa: C901 ), ), }, - required_resource_keys={"dataset_settings"}, + required_resource_keys={"etl_settings"}, io_manager_key="pudl_io_manager", ) def core_eia860__assn_boiler_generator(context, **clean_dfs) -> pd.DataFrame: @@ -729,7 +729,7 @@ def core_eia860__assn_boiler_generator(context, **clean_dfs) -> pd.DataFrame: AssertionError: If all generators do not end up with the same unit_id each year. """ debug = context.op_config["debug"] - eia_settings = context.resources.dataset_settings.eia + eia_settings = context.resources.etl_settings.dataset_settings.eia # Do some final data formatting and assign appropriate types: clean_dfs = { @@ -1245,7 +1245,7 @@ def harvested_entity_asset_factory( ), ), }, - required_resource_keys={"dataset_settings"}, + required_resource_keys={"etl_settings"}, name=f"harvested_{entity.value}_eia", ) def harvested_entity(context, **clean_dfs): @@ -1276,7 +1276,7 @@ def harvested_entity(context, **clean_dfs): # the longitude column is very different in the ytd 860M data (it appears # to have an additional decimal point) bc it shows up in the generator # table but it is a plant level data point, it mucks up the consistency - eia_settings = context.resources.dataset_settings.eia + eia_settings = context.resources.etl_settings.dataset_settings.eia special_case_strictness = { "plant_name_eia": 0, "utility_name_eia": 0, diff --git a/src/pudl/transform/epacems.py b/src/pudl/transform/epacems.py index fe6fc98494..c1e708736b 100644 --- a/src/pudl/transform/epacems.py +++ b/src/pudl/transform/epacems.py @@ -242,7 +242,7 @@ def _partitioned_path() -> Path: @dg.asset( - required_resource_keys={"datastore", "dataset_settings"}, + required_resource_keys={"datastore", "etl_settings"}, io_manager_key="parquet_io_manager", op_tags={"memory-use": "high"}, ) @@ -262,7 +262,9 @@ def core_epacems__hourly_emissions( # internally and this will save us significant dagster process startup overhead and # avoid CPU resource contention. output_paths = [] - for year_quarter in context.resources.dataset_settings.epacems.year_quarters: + for ( + year_quarter + ) in context.resources.etl_settings.dataset_settings.epacems.year_quarters: output_path = partitioned_path / f"{year_quarter}.parquet" logger.info(f"Processing EPA CEMS {year_quarter}") diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 91bdc07be9..9fcf45c3ba 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -13,7 +13,7 @@ from importlib.metadata import version from pathlib import Path from tempfile import TemporaryDirectory -from typing import Annotated, Any, Self +from typing import Annotated, Any, Self, cast import click import frictionless @@ -41,6 +41,16 @@ ] +def get_zenodo_dois_path() -> Path: + """Return the canonical packaged Zenodo DOI settings path.""" + return cast( + Path, + importlib.resources.files("pudl.package_data.settings").joinpath( + "zenodo_dois.yml" + ), + ) + + class ChecksumMismatchError(ValueError): """Resource checksum (md5) does not match.""" @@ -221,9 +231,7 @@ def __init__(self, **data: Any): is provided, it will be merged with defaults from the YAML file. """ # Load defaults from YAML file - default_path = ( - importlib.resources.files("pudl.package_data.settings") / "zenodo_dois.yml" - ) + default_path = get_zenodo_dois_path() with default_path.open() as f: yaml_data = yaml.safe_load(f) @@ -359,6 +367,7 @@ def __init__( local_cache_path: str | Path | UPath | None = None, cloud_cache_path: str | UPath | None = "s3://pudl.catalyst.coop/zenodo", timeout: float = 15.0, + zenodo_dois: ZenodoDoiSettings | None = None, ): """Datastore manages input data retrieval for PUDL datasets. @@ -376,6 +385,8 @@ def __init__( {gs,s3}://bucket[/path_prefix] timeout: connection timeouts (in seconds) to use when connecting to Zenodo servers. + zenodo_dois: canonical DOI settings to use when resolving dataset + versions. If not provided, defaults are loaded from packaged settings. Raises: ValueError: if neither local_cache_path nor cloud_cache_path is provided. @@ -431,7 +442,19 @@ def __init__( f"Falling back to Zenodo if necessary. Error was: {e}" ) - self._zenodo_fetcher = ZenodoFetcher(timeout=timeout) + self._zenodo_fetcher = ZenodoFetcher( + zenodo_dois=zenodo_dois, + timeout=timeout, + ) + + @property + def zenodo_dois(self) -> ZenodoDoiSettings: + """Expose the DOI settings used by this datastore instance.""" + return self._zenodo_fetcher.zenodo_dois + + def get_doi(self, dataset: str) -> ZenodoDoi: + """Return the configured DOI for a dataset.""" + return self._zenodo_fetcher.get_doi(dataset) def get_known_datasets(self) -> list[str]: """Returns list of supported datasets.""" diff --git a/test/conftest.py b/test/conftest.py index fd79e8a434..c5511b7e19 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,30 +1,30 @@ -"""PyTest configuration module. - -Defines useful fixtures, command line args. -""" +"""Shared pytest fixtures and CLI options for integration test setup.""" import logging +import os +import shutil +import subprocess +import sys +from collections.abc import Generator from pathlib import Path -from typing import Any import duckdb import pydantic import pytest import sqlalchemy as sa +import yaml from dagster import ( AssetValueLoader, + DagsterInstance, build_init_resource_context, - graph, materialize_to_memory, ) import pudl from pudl import resources -from pudl.etl import defs -from pudl.etl.cli import pudl_etl_job_factory -from pudl.extract.ferc1 import Ferc1DbfExtractor, raw_ferc1_xbrl__metadata_json +from pudl.etl import build_defs +from pudl.extract.ferc1 import raw_ferc1_xbrl__metadata_json from pudl.extract.ferc714 import raw_ferc714_xbrl__metadata_json -from pudl.extract.xbrl import xbrl2sqlite_op_factory from pudl.io_managers import ( PudlMixedFormatIOManager, ferc1_dbf_sqlite_io_manager, @@ -37,17 +37,20 @@ DatasetsSettings, EtlSettings, FercToSqliteSettings, - XbrlFormNumber, ) from pudl.workspace.datastore import Datastore from pudl.workspace.setup import PudlPaths logger = logging.getLogger(__name__) -AS_MS_ONLY_FREQ_TABLES = [ - "gen_eia923", - "gen_fuel_by_generator_eia923", -] +DG_CONFIG_PATH_DEFAULT = "src/pudl/package_data/settings/dg_pytest.yml" + +# Preamble: before pytest starts handling this module's CLI options and fixtures, we +# do a small amount of one-time environment setup and controller-side validation. The +# DuckDB httpfs extension must be available early so collection-time imports and any +# tests that touch remote resources can work in restricted environments. We also +# inspect the requested test targets and reject incompatible combinations up front, +# before xdist workers start or fixture setup can poison shared environment variables. # In general we run tests and subprocesses with multiple workers, and some tests touch # remote HTTPS / S3 resources. We try to LOAD first so collection works in @@ -60,25 +63,116 @@ duckdb.execute("LOAD httpfs") +def _requested_test_targets(config: pytest.Config) -> list[Path]: + """Return normalized path targets requested on the pytest command line.""" + raw_targets = config.args or ["test"] + targets: list[Path] = [] + + for raw_target in raw_targets: + path_text = raw_target.split("::", maxsplit=1)[0] + if not path_text: + continue + + target = Path(path_text) + if not target.is_absolute(): + target = (Path(config.rootpath) / target).resolve() + else: + target = target.resolve() + targets.append(target) + + return targets + + +def _target_includes_suite(config: pytest.Config, suite_root: str) -> bool: + """Return whether any requested target could include the named test suite.""" + suite_path = (Path(config.rootpath) / suite_root).resolve() + + return any( + suite_path.is_relative_to(target) or target.is_relative_to(suite_path) + for target in _requested_test_targets(config) + ) + + +def _raise_if_live_output_mixes_unit_and_integration(config: pytest.Config) -> None: + """Reject invocations that mix live-output unit and integration suites.""" + if not config.getoption("--live-pudl-output", default=False): + return + + has_unit = _target_includes_suite(config, "test/unit") + has_integration = _target_includes_suite(config, "test/integration") + if has_unit and has_integration: + raise pytest.UsageError( + "Cannot combine unit and integration tests in one session with " + "--live-pudl-output: the unit fixture overrides PUDL_OUTPUT to a " + "temp directory, which would corrupt the integration test environment. " + "Run them in separate pytest invocations." + ) + + +def pytest_configure(config: pytest.Config) -> None: + """Run controller-only validation of incompatible pytest CLI combinations.""" + if hasattr(config, "workerinput"): + return + + _raise_if_live_output_mixes_unit_and_integration(config) + + +def pytest_collection_finish(session) -> None: + """Abort if unit and integration tests are collected together with --live-pudl-output. + + When both suites run in a single pytest process with ``--live-pudl-output``, the + unit-scoped ``pudl_test_paths`` override in ``test/unit/conftest.py`` would + overwrite ``os.environ["PUDL_OUTPUT"]`` to a temporary directory *after* the + top-level fixture has set it to the live path. Integration tests that construct + ``PudlPaths()`` directly (rather than via the fixture) would then silently resolve + to the wrong directory. Run unit and integration tests in separate invocations. + """ + if hasattr(session.config, "workerinput"): + return + + if not session.config.getoption("--live-pudl-output", default=False): + return + + has_unit = any(item.nodeid.startswith("test/unit/") for item in session.items) + has_integration = any( + item.nodeid.startswith("test/integration/") for item in session.items + ) + if has_unit and has_integration: + pytest.exit( + "Cannot combine unit and integration tests in one session with " + "--live-pudl-output: the unit fixture overrides PUDL_OUTPUT to a " + "temp directory, which would corrupt the integration test environment. " + "Run them in separate pytest invocations.", + returncode=4, + ) + + +################################################################################ +# Main test configuration, helper functions, and fixture definitions start here. +################################################################################ + + def pytest_addoption(parser): - """Add a command line option Requiring fresh data download.""" parser.addoption( - "--live-dbs", + "--live-pudl-output", action="store_true", default=False, help="Use existing PUDL/FERC1 DBs instead of creating temporary ones.", ) parser.addoption( - "--tmp-data", + "--temp-pudl-input", action="store_true", default=False, help="Download fresh input data for use with this test run only.", ) parser.addoption( - "--etl-settings", + "--dg-config", action="store", - default=False, - help="Path to a non-standard ETL settings file to use.", + default=DG_CONFIG_PATH_DEFAULT, + help=( + "Path to a Dagster dg launch config YAML file for integration tests. " + f"Defaults to {DG_CONFIG_PATH_DEFAULT}." + ), ) parser.addoption( "--bypass-local-cache", @@ -92,205 +186,308 @@ def pytest_addoption(parser): default=False, help="Write the unmapped IDs to disk.", ) - parser.addoption( - "--ignore-foreign-key-constraints", - action="store_true", - default=False, - help="If enabled, do not check the foreign keys.", + + +def _pudl_etl( + dg_config_path: Path, + pudl_test_paths: PudlPaths, + dagster_home: Path, +) -> None: + """Run a dg launch job for pudl_with_ferc_to_sqlite including coverage collection. + + Uses the dg executable path directly since ``dg`` is a console script and not a + Python module importable via ``python -m dg``. + """ + dg_path = shutil.which("dg") + if dg_path is None: + pytest.exit("Could not find `dg` executable in PATH.") + + cmd = [ + sys.executable, + "-m", + "coverage", + "run", + "--append", + dg_path, + "launch", + "--job", + "pudl_with_ferc_to_sqlite", + "--config", + str(dg_config_path), + "--verbose", + ] + # Command args are fully constructed in-process and do not include user input. + env = os.environ.copy() + # Force dg launch to read/write within pytest-managed paths. + env["PUDL_INPUT"] = str(pudl_test_paths.input_dir) + env["PUDL_OUTPUT"] = str(pudl_test_paths.output_dir) + env["DAGSTER_HOME"] = str(dagster_home) + env["PYTHONUNBUFFERED"] = "1" + logger.info("Starting PUDL pytest ETL using dg launch.") + logger.info(f"Command: {' '.join(cmd)}") + logger.info( + "Running dg launch with " + f"{env['PUDL_INPUT']=} {env['PUDL_OUTPUT']=} {env['DAGSTER_HOME']=}" ) + # Stream subprocess output into pytest's live logging so progress is visible. + # Popen is used instead of run to allow streaming output. We also set text=True and + # line-buffered output to ensure logs are emitted in real time. + with subprocess.Popen( # noqa: S603 + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + env=env, + ) as proc: + assert proc.stdout is not None + for line in proc.stdout: + logger.info(line.rstrip()) + + returncode = proc.wait() + if returncode != 0: + raise subprocess.CalledProcessError(returncode, cmd) + + logger.info("Completed PUDL pytest ETL using dg launch.") + + +def _assert_prebuilt_ferc_sqlite_dbs(pudl_test_paths: PudlPaths) -> None: + """Validate that required FERC SQLite databases exist after prebuild.""" + required = [ + pudl_test_paths.output_dir / "ferc1_dbf.sqlite", + pudl_test_paths.output_dir / "ferc1_xbrl.sqlite", + pudl_test_paths.output_dir / "ferc714_xbrl.sqlite", + ] + missing = [str(path) for path in required if not path.exists()] + if missing: + raise FileNotFoundError( + "Missing expected FERC SQLite outputs after prebuild: " + ", ".join(missing) + ) -@pytest.fixture(scope="session", name="test_dir") -def test_directory(): + +def _engine_from_io_manager( + io_manager_factory, + dataset_settings_config: DatasetsSettings | None = None, +) -> sa.Engine: + """Return the SQLAlchemy engine exposed by a Dagster IO manager resource.""" + io_manager = io_manager_factory + if dataset_settings_config is not None: + io_manager = io_manager_factory.model_copy( + update={"etl_settings": EtlSettings(datasets=dataset_settings_config)} + ) + if isinstance(io_manager, PudlMixedFormatIOManager): + return io_manager._sqlite_io_manager.engine + return io_manager.engine + + +@pytest.fixture(scope="session") +def test_dir(): """Return the path to the top-level directory containing the tests.""" return Path(__file__).parent -@pytest.fixture(scope="session", name="live_dbs") -def live_databases(request) -> bool: - """Fixture that tells whether to use existing live FERC1/PUDL DBs).""" - return request.config.getoption("--live-dbs") +@pytest.fixture(scope="session") +def dg_config_path(request, test_dir: Path) -> Path: + """Resolve Dagster launch config path used by integration-test prebuild.""" + config_path = Path(request.config.getoption("--dg-config")) + + if not config_path.is_absolute(): + config_path = (test_dir.parent / config_path).resolve() + + if not config_path.exists(): + raise FileNotFoundError(f"Missing dg config file: {config_path}") + + return config_path + + +@pytest.fixture(scope="session") +def dagster_home(tmp_path_factory, request) -> Path: + """Resolve the Dagster home shared by this test session. + + Live-output integration runs need to reuse the existing Dagster instance from the + ETL build so FERC SQLite provenance metadata written during `dg launch` remains + visible to later in-process reads. Fixture-managed prebuilds still use an isolated + temporary Dagster home. + """ + if request.config.getoption("--live-pudl-output") and os.environ.get( + "DAGSTER_HOME" + ): + return Path(os.environ["DAGSTER_HOME"]).resolve() + + dagster_home = tmp_path_factory.mktemp("dagster_home") + (dagster_home / "dagster.yaml").touch() + os.environ["DAGSTER_HOME"] = str(dagster_home) + return dagster_home.resolve() + + +@pytest.fixture(scope="session") +def dagster_instance(dagster_home: Path) -> DagsterInstance: + """Return the Dagster instance shared by the pytest session and ETL subprocess.""" + return DagsterInstance.get() @pytest.fixture(scope="session") -def asset_value_loader() -> AssetValueLoader: +def asset_value_loader( + prebuilt_outputs, + etl_settings_path: Path, + dagster_instance: DagsterInstance, +) -> Generator[AssetValueLoader]: """Fixture that initializes an asset value loader. - Use this as ``asset_value_loader.load_asset_value`` instead - of ``defs.load_asset_value`` to not reinitialize the asset - value loader over and over again. + Use this as ``asset_value_loader.load_asset_value`` instead of + ``defs.load_asset_value`` to not reinitialize the asset value loader over and over + again. """ - return defs.get_asset_value_loader() + configured_defs = build_defs( + resource_overrides={ + "etl_settings": resources.PudlEtlSettingsResource( + etl_settings_path=str(etl_settings_path) + ) + } + ) + with configured_defs.get_asset_value_loader(instance=dagster_instance) as loader: + yield loader -@pytest.fixture(scope="session", name="save_unmapped_ids") +@pytest.fixture(scope="session") def save_unmapped_ids(request) -> bool: - """Fixture that tells whether to use existing live FERC1/PUDL DBs).""" + """Fixture that indicates whether to save unmapped IDs to disk.""" return request.config.getoption("--save-unmapped-ids") -@pytest.fixture -def check_foreign_keys_flag(request) -> bool: - """Fixture that tells whether to use existing live FERC1/PUDL DBs).""" - return not request.config.getoption("--ignore-foreign-key-constraints") +@pytest.fixture(scope="session") +def etl_settings(etl_settings_path: Path) -> EtlSettings: + """Read ETL settings referenced by Dagster integration config.""" + return EtlSettings.from_yaml(str(etl_settings_path)) -@pytest.fixture(scope="session", name="etl_settings") -def etl_parameters(request, test_dir) -> EtlSettings: - """Read the ETL parameters from the test settings or proffered file.""" - if request.config.getoption("--etl-settings"): - etl_settings_yml = Path(request.config.getoption("--etl-settings")) - else: - etl_settings_yml = Path( - test_dir.parent / "src/pudl/package_data/settings/etl_fast.yml" - ) - etl_settings = EtlSettings.from_yaml(etl_settings_yml) - return etl_settings +@pytest.fixture(scope="session") +def etl_settings_path(dg_config_path: Path, test_dir: Path) -> Path: + """Resolve the ETL settings file referenced by Dagster integration config.""" + with dg_config_path.open() as f: + dg_config = yaml.safe_load(f) + try: + etl_settings_ref = dg_config["resources"]["etl_settings"]["config"][ + "etl_settings_path" + ] + except KeyError as err: + raise ValueError( + "Dagster config must define resources.etl_settings.config.etl_settings_path" + ) from err -@pytest.fixture(scope="session", name="ferc_to_sqlite_settings") -def ferc_to_sqlite_parameters(etl_settings: EtlSettings) -> FercToSqliteSettings: - """Read ferc_to_sqlite parameters out of test settings dictionary.""" - return etl_settings.ferc_to_sqlite_settings + etl_settings_yml = Path(etl_settings_ref) + if not etl_settings_yml.is_absolute(): + etl_settings_yml = (test_dir.parent / etl_settings_yml).resolve() + if not etl_settings_yml.exists(): + raise FileNotFoundError(f"Missing ETL settings file: {etl_settings_yml}") -@pytest.fixture(scope="session", name="pudl_etl_settings") -def pudl_etl_parameters(etl_settings: EtlSettings) -> DatasetsSettings: - """Read PUDL ETL parameters out of test settings dictionary.""" - return etl_settings.datasets + return etl_settings_yml @pytest.fixture(scope="session") -def ferc1_dbf_extract( - live_dbs: bool, - pudl_datastore_config, - etl_settings: EtlSettings, -): - """Creates raw FERC 1 SQlite DBs, based only on DBF sources.""" - - @graph - def local_dbf_ferc1_graph(): - Ferc1DbfExtractor.get_dagster_op()() - - if not live_dbs: - execute_result = local_dbf_ferc1_graph.to_job( - name="ferc_to_sqlite_dbf_ferc1", - resource_defs=pudl.ferc_to_sqlite.default_resources_defs, - ).execute_in_process( - run_config={ - "resources": { - "ferc_to_sqlite_settings": { - "config": etl_settings.ferc_to_sqlite_settings.model_dump() - }, - "datastore": { - "config": pudl_datastore_config, - }, - "runtime_settings": {"config": {"xbrl_num_workers": 2}}, - }, - }, - ) - assert execute_result.success, "ferc_to_sqlite_dbf_ferc1 failed!" +def dbt_target(etl_settings_path: Path) -> str: + """Infer the dbt target name from the ETL settings used for the test run.""" + if etl_settings_path.name == "etl_full.yml": + return "etl-full" + if etl_settings_path.name == "etl_fast.yml": + return "etl-fast" + + raise ValueError(f"Unexpected ETL settings file: {etl_settings_path}") @pytest.fixture(scope="session") -def ferc1_xbrl_extract( - live_dbs: bool, pudl_datastore_config, etl_settings: EtlSettings -): - """Runs ferc_to_sqlite dagster job for FERC Form 1 XBRL data.""" - - @graph - def local_xbrl_ferc1_graph(): - xbrl2sqlite_op_factory(XbrlFormNumber.FORM1)() - - if not live_dbs: - execute_result = local_xbrl_ferc1_graph.to_job( - name="ferc_to_sqlite_xbrl_ferc1", - resource_defs=pudl.ferc_to_sqlite.default_resources_defs, - ).execute_in_process( - run_config={ - "resources": { - "ferc_to_sqlite_settings": { - "config": etl_settings.ferc_to_sqlite_settings.model_dump(), - }, - "datastore": { - "config": pudl_datastore_config, - }, - "runtime_settings": {"config": {"xbrl_num_workers": 2}}, - }, - } - ) - assert execute_result.success, "ferc_to_sqlite_xbrl_ferc1 failed!" +def ferc_to_sqlite_settings(etl_settings: EtlSettings) -> FercToSqliteSettings: + """Read ferc_to_sqlite parameters out of test settings dictionary.""" + return etl_settings.ferc_to_sqlite -@pytest.fixture(scope="session", name="ferc1_engine_dbf") -def ferc1_dbf_sql_engine(ferc1_dbf_extract, dataset_settings_config) -> sa.Engine: - """Grab a connection to the FERC Form 1 DB clone.""" - context = build_init_resource_context( - resources={"dataset_settings": dataset_settings_config} +@pytest.fixture(scope="session") +def pudl_etl_settings(etl_settings: EtlSettings) -> DatasetsSettings: + """Read PUDL ETL parameters out of test settings dictionary.""" + return etl_settings.dataset_settings + + +@pytest.fixture(scope="session") +def ferc1_engine_dbf(prebuilt_outputs, dataset_settings_config) -> sa.Engine: + """Return the SQLAlchemy engine for the prebuilt FERC Form 1 DBF database.""" + return _engine_from_io_manager( + ferc1_dbf_sqlite_io_manager, + dataset_settings_config, ) - return ferc1_dbf_sqlite_io_manager(context).engine @pytest.fixture(scope="session") -def ferc714_xbrl_extract( - live_dbs: bool, pudl_datastore_config, etl_settings: EtlSettings +def prebuilt_outputs( + request, + dg_config_path: Path, + pudl_test_paths: PudlPaths, + dagster_home: Path, ): - """Runs ferc_to_sqlite dagster job for FERC Form 714 XBRL data.""" - - @graph - def local_xbrl_ferc714_graph(): - xbrl2sqlite_op_factory(XbrlFormNumber.FORM714)() - - if not live_dbs: - execute_result = local_xbrl_ferc714_graph.to_job( - name="ferc_to_sqlite_xbrl_ferc1", - resource_defs=pudl.ferc_to_sqlite.default_resources_defs, - ).execute_in_process( - run_config={ - "resources": { - "ferc_to_sqlite_settings": { - "config": etl_settings.ferc_to_sqlite_settings.model_dump(), - }, - "datastore": { - "config": pudl_datastore_config, - }, - "runtime_settings": {"config": {"xbrl_num_workers": 2}}, - }, - } - ) - assert execute_result.success, "ferc_to_sqlite_xbrl_ferc714 failed!" + """Prebuild fast integration databases in pytest-managed output directories. + + When ``--live-pudl-output`` is not set, ``pudl_test_paths`` should have already + set ``PUDL_OUTPUT`` to point at a temporary pytest session directory. + """ + if request.config.getoption("--live-pudl-output"): + logger.info("Using live DBs; skipping fixture-managed prebuild.") + return + + logger.info( + f"Prebuilding PUDL outputs in temporary directory: {pudl_test_paths.output_dir}" + ) + logger.info( + f"Initializing empty pudl.sqlite with current schema at {pudl_test_paths.pudl_db}." + ) + md = PUDL_PACKAGE.to_sql() + pudl_engine = sa.create_engine(pudl_test_paths.pudl_db) + md.create_all(pudl_engine) + + _pudl_etl(dg_config_path, pudl_test_paths, dagster_home) + _assert_prebuilt_ferc_sqlite_dbs(pudl_test_paths) -@pytest.fixture(scope="session", name="ferc1_engine_xbrl") -def ferc1_xbrl_sql_engine(ferc1_xbrl_extract, dataset_settings_config) -> sa.Engine: - """Grab a connection to the FERC Form 1 DB clone.""" - context = build_init_resource_context( - resources={"dataset_settings": dataset_settings_config} +@pytest.fixture(scope="session") +def ferc1_engine_xbrl(prebuilt_outputs, dataset_settings_config) -> sa.Engine: + """Return the SQLAlchemy engine for the prebuilt FERC Form 1 XBRL database.""" + return _engine_from_io_manager( + ferc1_xbrl_sqlite_io_manager, + dataset_settings_config, ) - return ferc1_xbrl_sqlite_io_manager(context).engine -@pytest.fixture(scope="session", name="ferc1_xbrl_taxonomy_metadata") +@pytest.fixture(scope="session") def ferc1_xbrl_taxonomy_metadata(ferc1_engine_xbrl: sa.Engine): - """Read the FERC 1 XBRL taxonomy metadata from JSON.""" + """Read the FERC 1 XBRL taxonomy metadata from JSON. + + ``ferc1_engine_xbrl`` is an ordering-only dependency that ensures the FERC 1 XBRL + database is prebuilt before this fixture runs. Its return value is not used here. + """ result = materialize_to_memory([raw_ferc1_xbrl__metadata_json]) assert result.success return result.output_for_node("raw_ferc1_xbrl__metadata_json") -@pytest.fixture(scope="session", name="ferc714_engine_xbrl") -def ferc714_xbrl_sql_engine(ferc714_xbrl_extract, dataset_settings_config) -> sa.Engine: - """Grab a connection to the FERC Form 714 DB clone.""" - context = build_init_resource_context( - resources={"dataset_settings": dataset_settings_config} +@pytest.fixture(scope="session") +def ferc714_engine_xbrl(prebuilt_outputs, dataset_settings_config) -> sa.Engine: + """Return the SQLAlchemy engine for the prebuilt FERC Form 714 XBRL database.""" + return _engine_from_io_manager( + ferc714_xbrl_sqlite_io_manager, + dataset_settings_config, ) - return ferc714_xbrl_sqlite_io_manager(context).engine -@pytest.fixture(scope="session", name="ferc714_xbrl_taxonomy_metadata") +@pytest.fixture(scope="session") def ferc714_xbrl_taxonomy_metadata(ferc714_engine_xbrl: sa.Engine): - """Read the FERC 714 XBRL taxonomy metadata from JSON.""" + """Read the FERC 714 XBRL taxonomy metadata from JSON. + + ``ferc714_engine_xbrl`` is an ordering-only dependency that ensures the FERC 714 + XBRL database is prebuilt before this fixture runs. Its return value is not used + here. + """ result = materialize_to_memory([raw_ferc714_xbrl__metadata_json]) assert result.success @@ -298,103 +495,86 @@ def ferc714_xbrl_taxonomy_metadata(ferc714_engine_xbrl: sa.Engine): @pytest.fixture(scope="session") -def pudl_io_manager( - ferc1_engine_dbf: sa.Engine, # Implicit dependency - ferc1_engine_xbrl: sa.Engine, # Implicit dependency - ferc714_engine_xbrl: sa.Engine, - live_dbs: bool, - pudl_datastore_config, - dataset_settings_config, - request, -) -> PudlMixedFormatIOManager: - """Grab a connection to the PUDL IO manager. - - If we are using the test database, we initialize the PUDL DB from scratch. If we're - using the live database, then we just make a connection to it. - """ - logger.info("setting up the pudl_engine fixture") - if not live_dbs: - # Create the database and schemas - engine = sa.create_engine(PudlPaths().pudl_db) - md = PUDL_PACKAGE.to_sql() - md.create_all(engine) - # Run the ETL and generate a new PUDL SQLite DB for testing: - execute_result = pudl_etl_job_factory(base_job="etl_fast")().execute_in_process( - run_config={ - "resources": { - "dataset_settings": { - "config": dataset_settings_config, - }, - "datastore": { - "config": pudl_datastore_config, - }, - }, - }, - ) - assert execute_result.success, "pudl_etl failed!" - # Grab a connection to the freshly populated PUDL DB, and hand it off. - # All the hard work here is being done by the datapkg and - # datapkg_to_sqlite fixtures, above. - context = build_init_resource_context() - return pudl_mixed_format_io_manager(context) - - -@pytest.fixture(scope="session") -def pudl_engine(pudl_io_manager: PudlMixedFormatIOManager) -> sa.Engine: - """Get PUDL SQL engine from io manager.""" - return pudl_io_manager._sqlite_io_manager.engine +def pudl_engine(prebuilt_outputs) -> sa.Engine: + """Return the SQLAlchemy engine for the prepared PUDL integration database.""" + return _engine_from_io_manager(pudl_mixed_format_io_manager) @pytest.fixture(scope="session", autouse=True) -def configure_paths_for_tests(tmp_path_factory, request): +def pudl_test_paths(tmp_path_factory, request, dagster_home: Path): """Configures PudlPaths for tests. Default behavior: PUDL_INPUT is read from the environment. - PUDL_OUTPUT is set to a tmp path, to avoid clobbering existing databases. + PUDL_OUTPUT is set to a temporary path, to avoid clobbering existing outputs. - Set ``--tmp-data`` to force PUDL_INPUT to a temporary directory, causing + Set ``--temp-pudl-input`` to force PUDL_INPUT to a temporary directory, causing re-downloads of all raw inputs. - Set ``--live-dbs`` to force PUDL_OUTPUT to *NOT* be a temporary directory + Set ``--live-pudl-output`` to force PUDL_OUTPUT to *NOT* be a temporary directory and instead inherit from environment. - ``--live-dbs`` flag is ignored in unit tests, see pudl/test/unit/conftest.py. + Note: ``test/unit/conftest.py`` defines ``unit_pudl_test_paths`` which overrides + this fixture for the unit test subtree. It ignores ``--live-pudl-output`` and always + forces a temporary ``PUDL_OUTPUT`` so unit tests can never write to the live output + directory. + + Warning: running unit and integration tests *together* with ``--live-pudl-output`` + in the same pytest session is not supported. The unit fixture would overwrite + ``os.environ["PUDL_OUTPUT"]`` after this fixture has set it to the live path, + silently misdirecting any integration-test code that constructs ``PudlPaths()`` + directly. A ``pytest_collection_finish`` hook in this file prevents that combination. """ # Just in case we need this later... pudl_tmpdir = tmp_path_factory.mktemp("pudl") + + input_dir = Path(os.environ["PUDL_INPUT"]).resolve() + output_dir = Path(os.environ["PUDL_OUTPUT"]).resolve() + # We only use a temporary input directory when explicitly requested. # This will force a re-download of raw inputs from Zenodo or the S3 cache. - if request.config.getoption("--tmp-data"): + if request.config.getoption("--temp-pudl-input"): in_tmp = pudl_tmpdir / "input" in_tmp.mkdir() - PudlPaths.set_path_overrides( - input_dir=str(Path(in_tmp).resolve()), - ) + input_dir = in_tmp.resolve() logger.info(f"Using temporary PUDL_INPUT: {in_tmp}") # Temporary output path is used when not using live DBs. - if not request.config.getoption("--live-dbs"): + if not request.config.getoption("--live-pudl-output"): out_tmp = pudl_tmpdir / "output" out_tmp.mkdir() - PudlPaths.set_path_overrides( - output_dir=str(Path(out_tmp).resolve()), - ) + output_dir = out_tmp.resolve() logger.info(f"Using temporary PUDL_OUTPUT: {out_tmp}") + os.environ["DAGSTER_HOME"] = str(dagster_home) + logger.info(f"Using temporary DAGSTER_HOME: {dagster_home}") + + PudlPaths.set_path_overrides( + input_dir=str(input_dir), + output_dir=str(output_dir), + ) + # Keep process env in sync so subprocesses inherit the same locations. + os.environ["PUDL_INPUT"] = str(input_dir) + os.environ["PUDL_OUTPUT"] = str(output_dir) + try: - return PudlPaths() + return PudlPaths( + pudl_input=input_dir, + pudl_output=output_dir, + ) except pydantic.ValidationError as err: pytest.exit( - f"Set PUDL_INPUT, PUDL_OUTPUT env variables, or use --tmp-path, --live-dbs flags. Error: {err}." + f"Set PUDL_INPUT, PUDL_OUTPUT env variables, or use --temp-pudl-input, --live-pudl-output flags. Error: {err}." ) @pytest.fixture(scope="session") def dataset_settings_config(request, etl_settings: EtlSettings): - """Create dagster dataset_settings resource.""" - return etl_settings.datasets.model_dump() + """Create dataset settings for test helpers and IO managers.""" + if etl_settings.datasets is None: + raise ValueError("Missing datasets settings in ETL settings.") + return etl_settings.datasets @pytest.fixture(scope="session", autouse=True) @@ -419,23 +599,18 @@ def logger_config(): @pytest.fixture(scope="session") -def pudl_datastore_config(request) -> dict[str, Any]: - """Produce a :class:pudl.workspace.datastore.Datastore.""" - return { - "use_local_cache": not request.config.getoption("--bypass-local-cache"), - } - - -@pytest.fixture(scope="session") -def pudl_datastore_fixture(pudl_datastore_config: dict[str, Any]) -> Datastore: +def pudl_datastore_fixture(request) -> Generator[Datastore]: """Create pudl Datastore resource.""" - init_context = build_init_resource_context(config=pudl_datastore_config) - return resources.datastore(init_context) - - -def skip_table_if_null_freq_table(table_name: str, freq: str | None): - """Check.""" - if table_name in AS_MS_ONLY_FREQ_TABLES and freq is None: - pytest.skip( - f"Data validation for {table_name} does not work with a null frequency." + with resources.ZenodoDoiSettingsResource.from_resource_context_cm( + build_init_resource_context() + ) as zenodo_dois: + init_context = build_init_resource_context( + config={ + "use_local_cache": not request.config.getoption("--bypass-local-cache"), + }, + resources={"zenodo_dois": zenodo_dois}, ) + with resources.DatastoreResource.from_resource_context_cm( + init_context + ) as datastore: + yield datastore diff --git a/test/integration/console_scripts_test.py b/test/integration/console_scripts_test.py index d0c2674d6f..4018b03cce 100644 --- a/test/integration/console_scripts_test.py +++ b/test/integration/console_scripts_test.py @@ -5,7 +5,6 @@ import geopandas as gpd # noqa: ICN002 import pytest -import sqlalchemy as sa @pytest.mark.parametrize( @@ -78,19 +77,15 @@ def test_pudl_datastore(script_runner, command: str): ], ) @pytest.mark.script_launch_mode("inprocess") +@pytest.mark.usefixtures("prebuilt_outputs") def test_pudl_service_territories( script_runner, command: str, tmp_path: Path, filename: str, expected_cols: set[str], - pudl_engine: sa.Engine, ): - """CLI tests specific to the pudl_service_territories script. - - Depends on the ``pudl_engine`` fixture to ensure that the censusdp1tract.sqlite - database has been generated, since that data is required for the script to run. - """ + """CLI tests specific to the pudl_service_territories script.""" out_path = tmp_path / filename assert not out_path.exists() command += str(tmp_path) @@ -115,6 +110,7 @@ def test_pudl_service_territories( ], ) @pytest.mark.script_launch_mode("inprocess") +@pytest.mark.order(1) def test_resource_description(script_runner, resource_id: str): """CLI tests specific to the resource_description script.""" ret = script_runner.run( diff --git a/test/integration/data_validation_test.py b/test/integration/data_validation_test.py new file mode 100644 index 0000000000..490a1bd379 --- /dev/null +++ b/test/integration/data_validation_test.py @@ -0,0 +1,39 @@ +"""Integration tests for data validation checks that run on prebuilt outputs.""" + +import pytest +import sqlalchemy as sa + +from pudl.dbt_wrapper import build_with_context +from pudl.etl.check_foreign_keys import check_foreign_keys + + +@pytest.mark.order(3) +def test_pudl_foreign_keys(pudl_engine: sa.Engine): + """Validate foreign key constraints on the prebuilt PUDL SQLite database.""" + check_foreign_keys(pudl_engine) + + +@pytest.mark.order(4) +@pytest.mark.usefixtures("prebuilt_outputs", "test_dir") +def test_dbt(dbt_target: str): + """Run the dbt data validations programmatically. + + Because dbt reads data from our Parquet outputs, and the location of the Parquet + outputs is determined by the PUDL_OUTPUT environment variable, and that environment + variable is set during the test setup, we shouldn't need to do any special setup + here to point dbt at the correct outputs. + + This test relies on the prebuilt outputs so the Parquet files are available. + + Note that the row count checks will automatically be disabled unless dbt_target is + 'etl-full'. See the ``check_row_counts_per_partition.sql`` generic test. + """ + test_result = build_with_context( + node_selection="*", + dbt_target=dbt_target, + ) + + if not test_result.success: + raise AssertionError( + f"failure contexts:\n{test_result.format_failure_contexts()}" + ) diff --git a/test/integration/dbt_test.py b/test/integration/dbt_test.py index c56c5685c3..108b3faf31 100644 --- a/test/integration/dbt_test.py +++ b/test/integration/dbt_test.py @@ -1,81 +1,28 @@ +"""Integration tests for dbt validations and helper commands on prebuilt outputs.""" + import contextlib import json -import logging import re -from pathlib import Path import pytest from click.testing import CliRunner -from pudl.dbt_wrapper import build_with_context -from pudl.io_managers import PudlMixedFormatIOManager +from pudl.dbt_wrapper import install_dbt_deps from pudl.scripts.dbt_helper import dbt_helper -logger = logging.getLogger(__name__) - @pytest.fixture(scope="module") -def dbt_target(test_dir: Path, request) -> str: - """Fixture defining the dbt target based on the full/fast ETL spec.""" - # Identify whether we're running the full or fast ETL, and set the dbt target - # appropriately (since we have different test expectations in the two cases) - if request.config.getoption("--etl-settings"): - etl_settings_yml = Path(request.config.getoption("--etl-settings")) - else: - etl_settings_yml = Path( - test_dir.parent / "src/pudl/package_data/settings/etl_fast.yml" - ) - if etl_settings_yml.name == "etl_full.yml": - dbt_target = "etl-full" - elif etl_settings_yml.name == "etl_fast.yml": - dbt_target = "etl-fast" - else: - raise ValueError(f"Unexpected ETL settings file: {etl_settings_yml}") - return dbt_target - - -@pytest.mark.order(4) -def test_dbt( - pudl_io_manager: PudlMixedFormatIOManager, - test_dir: Path, - dbt_target: str, -): - """Run the dbt data validations programmatically. - - Because dbt reads data from our Parquet outputs, and the location of the Parquet - outputs is determined by the PUDL_OUTPUT environment variable, and that environment - variable is set during the test setup, we shouldn't need to do any special setup - here to point dbt at the correct outputs. - - The dependency on pudl_io_manager is necessary because it ensures that the dbt - tests don't run until after the ETL has completed and the Parquet files are - available. - - Note that the row count checks will automatically be disabled unless dbt_target is - 'etl-full'. See the ``check_row_counts_per_partition.sql` generic test. - """ - test_result = build_with_context( - node_selection="*", - dbt_target=dbt_target, - ) - - if not test_result.success: - raise AssertionError( - f"failure contexts:\n{test_result.format_failure_contexts()}" - ) +def dbt_dependencies() -> None: + """Install dbt package dependencies for tests that exercise dbt commands.""" + install_dbt_deps() @pytest.mark.script_launch_mode("inprocess") -def test_update_tables( - dbt_target: str, - pudl_io_manager: PudlMixedFormatIOManager, - script_runner, -): +@pytest.mark.usefixtures("prebuilt_outputs") +def test_update_tables(dbt_target: str, script_runner): """Run update-tables. Should detect everything already exists, and do nothing. - The dependency on pudl_io_manager is necessary because it ensures that the dbt - tests don't run until after the ETL has completed and the Parquet files are - available. + This test relies on the prebuilt outputs so the Parquet files are available. """ args = [ "dbt_helper", @@ -92,14 +39,13 @@ def test_update_tables( assert ret.success -# Has to run after test_dbt above otherwise dbt dependencies aren't installed -@pytest.mark.order(5) -@pytest.mark.xfail(reason="Logs swallowed by pytest. Revisit when click >=8.3.2") -def test_validate_asset_selection(caplog): - caplog.set_level(logging.INFO) +def test_validate_asset_selection(mocker, dbt_dependencies): + """Verify that dbt_helper expands asset selections in dry-run mode.""" runner = CliRunner() - # Workaround for https://github.com/pallets/click/issues/3110 - # Use isolation() directly instead of invoke() to avoid "ValueError: I/O operation on closed file" + logger_mock = mocker.patch("pudl.scripts.dbt_helper.logger.info") + # Click 8.3.1 still raises "I/O operation on closed file" in invoke() here, + # so keep using isolation() until the bundled version actually behaves. + # See https://github.com/pallets/click/issues/3110 with runner.isolation(), contextlib.suppress(SystemExit): dbt_helper.main( args=[ @@ -112,10 +58,16 @@ def test_validate_asset_selection(caplog): standalone_mode=False, ) - output = caplog.text + if logger_mock.call_args is None: + raise AssertionError("Expected dbt_helper dry-run to log build parameters.") + + output = logger_mock.call_args.args[0] if "node_selection" not in output: raise AssertionError(f"Unexpected output: {output}") - out_params = json.loads(re.search(r"({.+})", output).group(0)) + params_match = re.search(r"({.+})", output) + if params_match is None: + raise AssertionError(f"Could not parse JSON parameters from output: {output}") + out_params = json.loads(params_match.group(0)) obs_node_selection = out_params["node_selection"].split(" ") # just need to know that the key got expanded at all - specifics of expansion tested in dbt_wrapper_test assert len(obs_node_selection) > 1 diff --git a/test/integration/etl_test.py b/test/integration/etl_test.py index be08a225fd..3d182395ad 100644 --- a/test/integration/etl_test.py +++ b/test/integration/etl_test.py @@ -1,41 +1,47 @@ """PyTest based testing of the FERC Database & PUDL data package initializations. -This module also contains fixtures for returning connections to the databases. These -connections can be either to the live databases for post-ETL testing or to new temporary -databases, which are created from scratch and dropped after the tests have completed. +Database connections are provided by session-scoped fixtures in ``conftest.py``. The +``prebuilt_outputs`` fixture builds all integration databases via ``dg launch`` +before these tests run. """ import logging +from typing import Literal +import pandas as pd import pytest import sqlalchemy as sa -from dagster import build_init_resource_context +from sqlalchemy.engine.reflection import Inspector import pudl -from pudl.etl.check_foreign_keys import check_foreign_keys -from pudl.resources import dataset_settings +from pudl.settings import DatasetsSettings, Ferc1Settings logger = logging.getLogger(__name__) -@pytest.mark.order(3) -def test_pudl_engine( - pudl_engine: sa.Engine, - check_foreign_keys_flag: bool, -): - """Get pudl_engine and do basic inspection. +@pytest.mark.order(2) +def test_pudl_engine(pudl_engine: sa.Engine): + """Verify that key PUDL tables exist and are populated. - By default the foreign key checks are not enabled in pudl.sqlite. This test will - check if there are any foreign key errors if check_foreign_keys is True. + Foreign key validation lives in a separate data-validation test so the nightly + build can report it independently from the rest of the integration suite. """ assert isinstance(pudl_engine, sa.Engine) - insp = sa.inspect(pudl_engine) - assert "core_pudl__entity_plants_pudl" in insp.get_table_names() - assert "core_pudl__entity_utilities_pudl" in insp.get_table_names() + insp: Inspector = sa.inspect(pudl_engine) + required_tables = ( + "core_pudl__entity_plants_pudl", + "core_pudl__entity_utilities_pudl", + ) - if check_foreign_keys_flag: - # Raises ForeignKeyErrors if there are any - check_foreign_keys(pudl_engine) + for table_name in required_tables: + assert table_name in insp.get_table_names() + + with pudl_engine.connect() as connection: + for table_name in required_tables: + first_row: int | None = connection.execute( + sa.select(sa.literal(1)).select_from(sa.table(table_name)).limit(1) + ).scalar() + assert first_row is not None, f"Expected {table_name} to contain data." class TestCsvExtractor: @@ -157,35 +163,25 @@ def test_extract_eia923(self, pudl_datastore_fixture): class TestFerc1ExtractDebugFunctions: """Verify the ferc1 extraction debug functions are working properly.""" - def test_extract_dbf(self, ferc1_engine_dbf: sa.Engine): + @pytest.mark.usefixtures("ferc1_engine_dbf") + def test_extract_dbf(self): """Test extract_dbf.""" - years = [2020, 2021] # add desired years here - configured_dataset_settings = {"ferc1": {"years": years}} - - dataset_init_context = build_init_resource_context( - config=configured_dataset_settings + ferc1_dbf_raw_dfs: dict[str, pd.DataFrame] = pudl.extract.ferc1.extract_dbf( + dataset_settings=DatasetsSettings(ferc1=Ferc1Settings(years=[2020, 2021])) ) - configured_dataset_settings = dataset_settings(dataset_init_context) - - ferc1_dbf_raw_dfs = pudl.extract.ferc1.extract_dbf(configured_dataset_settings) for table_name, df in ferc1_dbf_raw_dfs.items(): assert (df.report_year >= 2020).all() and (df.report_year < 2022).all(), ( f"Unexpected years found in table: {table_name}" ) - def test_extract_xbrl(self, ferc1_engine_xbrl: sa.Engine): + @pytest.mark.usefixtures("ferc1_engine_xbrl") + def test_extract_xbrl(self): """Test extract_xbrl.""" - years = [2021] # add desired years here - configured_dataset_settings = {"ferc1": {"years": years}} - - dataset_init_context = build_init_resource_context( - config=configured_dataset_settings - ) - configured_dataset_settings = dataset_settings(dataset_init_context) - - ferc1_xbrl_raw_dfs = pudl.extract.ferc1.extract_xbrl( - configured_dataset_settings + ferc1_xbrl_raw_dfs: dict[ + str, dict[Literal["duration", "instant"], pd.DataFrame] + ] = pudl.extract.ferc1.extract_xbrl( + dataset_settings=DatasetsSettings(ferc1=Ferc1Settings(years=[2021])) ) for table_name, xbrl_tables in ferc1_xbrl_raw_dfs.items(): diff --git a/test/integration/ferc1_eia_train_test.py b/test/integration/ferc1_eia_train_test.py index 19009ff16a..6e8b1f3175 100644 --- a/test/integration/ferc1_eia_train_test.py +++ b/test/integration/ferc1_eia_train_test.py @@ -19,7 +19,6 @@ import pandas as pd import pytest -import sqlalchemy as sa from pudl.analysis.record_linkage.eia_ferc1_inputs import ( restrict_train_connections_on_date_range, @@ -116,6 +115,7 @@ def eia_ferc1_training_data() -> pd.DataFrame: ), ], ) +@pytest.mark.usefixtures("prebuilt_outputs") def test_validate_override_fixes( eia_ferc1_training_data: pd.DataFrame, verified: list[str], @@ -124,9 +124,8 @@ def test_validate_override_fixes( record_id_ferc1: list[str], utility_id_pudl_ferc1: list[int], expectation, - pudl_engine: sa.Engine, # Required to ensure that the data is available. ) -> None: - """Test the validate override fixes function.""" + """Validate override fixes against the prebuilt integration outputs.""" # Get data tables with only the columns needed by validate_override_fixes # to reduce memory usage during testing plant_parts_eia = get_parquet_table( @@ -164,10 +163,9 @@ def test_validate_override_fixes( ) -def test_generate_all_override_spreadsheets( - pudl_engine: sa.Engine, # Required to ensure that the data is available. -): - """Test the genation of the override spreadsheet for mapping FERC-EIA records.""" +@pytest.mark.usefixtures("prebuilt_outputs") +def test_generate_all_override_spreadsheets(): + """Generate override spreadsheets from the prebuilt integration outputs.""" # Get data tables directly plant_parts_eia = get_parquet_table("out_eia__yearly_plant_parts") eia_ferc1 = get_parquet_table("out_pudl__yearly_assn_eia_ferc1_plant_parts") diff --git a/test/integration/ferc_dbf_extract_test.py b/test/integration/ferc_dbf_extract_test.py index d6d3e97bfa..f1f5189a79 100644 --- a/test/integration/ferc_dbf_extract_test.py +++ b/test/integration/ferc_dbf_extract_test.py @@ -15,16 +15,8 @@ logger = get_logger(__name__) -@pytest.mark.order(2) def test_ferc1_dbf2sqlite(ferc1_engine_dbf): - """Attempt to access the DBF based FERC 1 SQLite DB fixture. - - This test is marked with order(2) to ensure that it is explicitly run before the - main PUDL ETL test, and is the first attempt to make use of the conceptually related - FERC Form 1 DBF DB engine & taxonomy fixtures. This means that if they fail, the - failure will be more clearly associated with the fixture, and not some random - downstream test that just happened to run first. - """ + """Attempt to access the DBF based FERC 1 SQLite DB fixture.""" assert isinstance(ferc1_engine_dbf, sa.Engine) assert "f1_respondent_id" in sa.inspect(ferc1_engine_dbf).get_table_names() @@ -38,6 +30,7 @@ def test_ferc1_dbf2sqlite(ferc1_engine_dbf): pytest.param(Ferc60DbfExtractor, id="ferc60"), ], ) +@pytest.mark.order(1) def test_ferc_schema(ferc_to_sqlite_settings, pudl_datastore_fixture, extractor_class): """Check to make sure we aren't missing any old FERC Form N tables or fields. diff --git a/test/integration/ferc_xbrl_extract_test.py b/test/integration/ferc_xbrl_extract_test.py index e0c08112d2..f2affbf835 100644 --- a/test/integration/ferc_xbrl_extract_test.py +++ b/test/integration/ferc_xbrl_extract_test.py @@ -9,7 +9,6 @@ import pytest import sqlalchemy as sa -from pudl.etl import defs from pudl.extract.ferc1 import TABLE_NAME_MAP_FERC1 from pudl.settings import FercToSqliteSettings from pudl.transform.ferc import filter_for_freshest_data_xbrl, get_primary_key_raw_xbrl @@ -36,15 +35,17 @@ def _find_empty_tables(db_conn, tables: set[str]) -> set[str]: return set(empty_tables) -@pytest.mark.order(2) -def test_sqlite_duckdb_equivalence(ferc_to_sqlite_settings: FercToSqliteSettings): +def test_sqlite_duckdb_equivalence( + prebuilt_outputs, + ferc_to_sqlite_settings: FercToSqliteSettings, +): """Ensure that the XBRL-derived FERC SQLite and DuckDB databases are equivalent.""" for form in FERC_FORMS: - if ferc_to_sqlite_settings.__getattribute__( + if not ferc_to_sqlite_settings.__getattribute__( f"ferc{form}_xbrl_to_sqlite_settings" - ).disabled: + ).years: logger.info( - f"Skipping FERC Form {form} sqlite vs duckdb equivalence test..." + f"Skipping FERC Form {form} sqlite vs duckdb equivalence test: no years configured." ) continue logger.info(f"Comparing FERC Form {form} SQLite vs. DuckDB outputs...") @@ -89,19 +90,11 @@ def test_sqlite_duckdb_equivalence(ferc_to_sqlite_settings: FercToSqliteSettings logger.info(f" - All {n_tables} tables are identical.") -@pytest.mark.order(2) def test_ferc1_xbrl2sqlite(ferc1_engine_xbrl: sa.Engine, ferc1_xbrl_taxonomy_metadata): """Attempt to access the XBRL based FERC 1 SQLite DB & XBRL taxonomy metadata. We're testing both the SQLite & JSON taxonomy here because they are generated - together by the FERC 1 XBRL ETL. - - This test is marked with order(2) to ensure that it is explicitly run before the - main PUDL ETL test, and is the first attempt to make use of the conceptually related - FERC Form 1 XBRL DB engine & taxonomy fixtures. This means that if they fail, the - failure will be more clearly associated with the fixture, and not some random - downstream test that just happened to run first. - """ + together by the FERC 1 XBRL ETL.""" # Does the database exist, and contain a table we expect it to contain? assert isinstance(ferc1_engine_xbrl, sa.Engine) assert ( @@ -135,18 +128,10 @@ def test_ferc1_xbrl2sqlite(ferc1_engine_xbrl: sa.Engine, ferc1_xbrl_taxonomy_met ) -@pytest.mark.order(2) def test_ferc714_xbrl2sqlite( ferc714_engine_xbrl: sa.Engine, ferc714_xbrl_taxonomy_metadata: dict[str, Any] ): - """Attempt to access the XBRL based FERC 714 SQLite DB & XBRL taxonomy metadata. - - This test is marked with order(2) to ensure that it is explicitly run before the - main PUDL ETL test, and is the first attempt to make use of the conceptually related - FERC-714 XBRL DB engine & taxonomy fixtures. This means that if they fail, the - failure will be more clearly associated with the fixture, and not some random - downstream test that just happened to run first. - """ + """Attempt to access the XBRL based FERC 714 SQLite DB & XBRL taxonomy metadata.""" assert isinstance(ferc714_engine_xbrl, sa.Engine) assert ( "identification_and_certification_01_1_duration" @@ -174,7 +159,11 @@ def test_ferc714_xbrl2sqlite( "core_ferc1__yearly_income_statements_sched114", ], ) -def test_filter_for_freshest_data(ferc1_engine_xbrl: sa.Engine, table_name: str): +def test_filter_for_freshest_data( + ferc1_engine_xbrl: sa.Engine, + table_name: str, + asset_value_loader, +): """Test if we are unexpectedly replacing records during filter_for_freshest_data.""" raw_table_names = TABLE_NAME_MAP_FERC1[table_name]["xbrl"] @@ -188,7 +177,7 @@ def test_filter_for_freshest_data(ferc1_engine_xbrl: sa.Engine, table_name: str) ) for raw_table_name in xbrls_with_periods: logger.info(f"Checking if our filtering methodology works for {raw_table_name}") - xbrl_table: pd.DataFrame = defs.load_asset_value(raw_table_name) + xbrl_table: pd.DataFrame = asset_value_loader.load_asset_value(raw_table_name) if not xbrl_table.empty: primary_keys = get_primary_key_raw_xbrl( raw_table_name.removeprefix("raw_ferc1_xbrl__"), "ferc1" diff --git a/test/integration/glue_test.py b/test/integration/glue_test.py index 713caa0f26..d47eccd725 100644 --- a/test/integration/glue_test.py +++ b/test/integration/glue_test.py @@ -6,6 +6,7 @@ import pandas as pd import pytest import sqlalchemy as sa +from dagster import DagsterInstance from pudl.glue.ferc1_eia import ( get_missing_ids, @@ -24,11 +25,13 @@ logger = logging.getLogger(__name__) -def plants_ferc1_raw(dataset_settings_config) -> pd.DataFrame: +def plants_ferc1_raw( + etl_settings_path: Path, dagster_instance: DagsterInstance +) -> pd.DataFrame: """Execute the partial ETL of FERC plant tables. Args: - dataset_settings_config: dataset settings for the given pytest run. + etl_settings_path: ETL settings file used for the given pytest run. Returns: plants_ferc1_raw: all plants in the FERC Form 1 DBF and XBRL DB for given years. @@ -36,28 +39,31 @@ def plants_ferc1_raw(dataset_settings_config) -> pd.DataFrame: result = get_plants_ferc1_raw_job().execute_in_process( run_config={ "resources": { - "dataset_settings": { - "config": dataset_settings_config, + "etl_settings": { + "config": { + "etl_settings_path": str(etl_settings_path), + }, }, } - } + }, + instance=dagster_instance, ) return result.output_for_node("plants_ferc1_raw") @pytest.fixture(scope="module") def glue_test_dfs( - pudl_engine: sa.Engine, # Necessary to ensure data is already available. + prebuilt_outputs, ferc1_engine_xbrl: sa.Engine, ferc1_engine_dbf: sa.Engine, - etl_settings, - dataset_settings_config, + etl_settings_path: Path, + dagster_instance: DagsterInstance, ) -> dict[str, pd.DataFrame]: - """Make a dictionary of the dataframes required for this test module.""" + """Build the dataframes required for glue integration tests.""" glue_test_dfs = { "util_ids_ferc1_raw_xbrl": get_util_ids_ferc1_raw_xbrl(ferc1_engine_xbrl), "util_ids_ferc1_raw_dbf": get_util_ids_ferc1_raw_dbf(ferc1_engine_dbf), - "plants_ferc1_raw": plants_ferc1_raw(dataset_settings_config), + "plants_ferc1_raw": plants_ferc1_raw(etl_settings_path, dagster_instance), "plants_eia_pudl_db": get_parquet_table("out_eia__yearly_plants"), "plants_eia_labeled": label_plants_eia( get_parquet_table("out_eia__yearly_plants"), @@ -87,7 +93,7 @@ def glue_test_dfs( return glue_test_dfs -def save_to_devtools_glue(missing_df: pd.DataFrame, test_dir, file_name: str): +def save_to_devtools_glue(missing_df: pd.DataFrame, test_dir: Path, file_name: str): """Save a dataframe as a CSV to the glue directory in devtools.""" file_path = Path(test_dir.parent, "devtools", "ferc1-eia-glue", file_name) missing_df.to_csv(file_path) diff --git a/test/integration/plant_parts_eia_test.py b/test/integration/plant_parts_eia_test.py index ce00588373..ebdcb3d364 100644 --- a/test/integration/plant_parts_eia_test.py +++ b/test/integration/plant_parts_eia_test.py @@ -3,7 +3,7 @@ import logging import pandas as pd -import sqlalchemy as sa +import pytest import pudl from pudl.analysis.plant_parts_eia import ( @@ -44,12 +44,11 @@ def prep_test_merge( return test_merge -def test_run_aggregations( - pudl_engine: sa.Engine, # Implicit dependency to ensure data is available. -) -> None: - """Run a test of the aggregated columns. +@pytest.mark.usefixtures("prebuilt_outputs") +def test_run_aggregations() -> None: + """Validate plant-part aggregations against the prebuilt integration outputs. - This test will used the plant_parts_eia, re-run groubys and check similarity. + This test re-runs the groupbys and compares them against the stored outputs. """ logger.info("Testing ownership fractions for owned records.") diff --git a/test/integration/record_linkage_test.py b/test/integration/record_linkage_test.py index 0980a71e4d..292901ad4e 100644 --- a/test/integration/record_linkage_test.py +++ b/test/integration/record_linkage_test.py @@ -232,6 +232,7 @@ def _score_model( return ratio_correct +@pytest.mark.order(1) def test_classify_plants_ferc1(mock_ferc1_plants_df): """Test the FERC inter-year plant linking model.""" steam_plants = mock_ferc1_plants_df[ diff --git a/test/integration/resource_cache_test.py b/test/integration/resource_cache_test.py index f055e40a9d..f85f7dbb61 100644 --- a/test/integration/resource_cache_test.py +++ b/test/integration/resource_cache_test.py @@ -49,6 +49,7 @@ def sample_resource(): class TestUPathCacheIntegration: """Integration tests for UPathCache with real storage backends.""" + @pytest.mark.order(1) def test_local_filesystem_via_upath(self, tmp_path, sample_resource): """Test UPathCache with local filesystem.""" cache = UPathCache(UPath(f"file://{tmp_path}")) @@ -185,6 +186,7 @@ def test_three_layer_cache_with_s3(self, tmp_path, gcs_test_cache_path): class TestCacheInteroperability: """Test that different cache implementations can interoperate.""" + @pytest.mark.order(1) def test_multiple_upath_caches_in_layered_cache(self, tmp_path, sample_resource): """Test using multiple UPathCache instances in the same LayeredCache.""" # Create different UPath caches pointing to different directories diff --git a/test/integration/timeseries_plot_test.py b/test/integration/timeseries_plot_test.py index 13b25ef284..0fdaf96635 100644 --- a/test/integration/timeseries_plot_test.py +++ b/test/integration/timeseries_plot_test.py @@ -1,6 +1,7 @@ """Test timeseries plotting functions.""" import pandas as pd +import pytest from pudl.analysis.timeseries_evaluation import ( plot_correlation, @@ -8,8 +9,9 @@ ) -def test_plot_imputation(pudl_io_manager, mocker, asset_value_loader): - """Test that plot function doesn't error.""" +@pytest.mark.usefixtures("prebuilt_outputs") +def test_plot_imputation(mocker, asset_value_loader): + """Smoke test plot_imputation against the prebuilt integration outputs.""" mocker.patch("pudl.analysis.timeseries_evaluation.plt.show") eia930_sub = asset_value_loader.load_asset_value( "out_eia930__hourly_subregion_demand" @@ -33,8 +35,9 @@ def test_plot_imputation(pudl_io_manager, mocker, asset_value_loader): ) -def test_plot_correlation(pudl_io_manager, mocker, asset_value_loader): - """Test that plot function doesn't error.""" +@pytest.mark.usefixtures("prebuilt_outputs") +def test_plot_correlation(mocker, asset_value_loader): + """Smoke test plot_correlation against the prebuilt integration outputs.""" mocker.patch("pudl.analysis.timeseries_evaluation.plt.show") eia930_sub = asset_value_loader.load_asset_value( "out_eia930__hourly_subregion_demand" diff --git a/test/unit/conftest.py b/test/unit/conftest.py index 15a2905c07..1ba508efc9 100644 --- a/test/unit/conftest.py +++ b/test/unit/conftest.py @@ -1,7 +1,6 @@ import logging -from pathlib import Path +import os -import pydantic import pytest from pudl.workspace.setup import PudlPaths @@ -9,45 +8,32 @@ logger = logging.getLogger(__name__) -@pytest.fixture(scope="session", autouse=True) -def configure_paths_for_tests(tmp_path_factory, request): - """Configures PudlPaths for tests. +@pytest.fixture(name="pudl_test_paths", scope="session", autouse=True) +def unit_pudl_test_paths( + tmp_path_factory, + request, + pudl_test_paths: PudlPaths, +) -> PudlPaths: + """Apply unit-test-specific path safety on top of ``pudl_test_paths``. - Default behavior: - - PUDL_INPUT is read from the environment. - PUDL_OUTPUT is set to a tmp path, to avoid clobbering existing databases. - - Set ``--tmp-data`` to force PUDL_INPUT to a temporary directory, causing - re-downloads of all raw inputs. - - Ignores the ``--live-dbs`` flag; always forces PUDL_OUTPUT to a temp dir so - unit test can never mess with the outputs. - - See pudl/test/conftest.py for the non-unit test counterpart. + ``pudl_test_paths`` in ``test/conftest.py`` is the canonical path setup fixture. + This unit-test fixture only enforces one additional rule: always use a temporary + ``PUDL_OUTPUT`` even when ``--live-pudl-output`` is passed. """ - pudl_tmpdir = tmp_path_factory.mktemp("pudl") + if not request.config.getoption("--live-pudl-output"): + return pudl_test_paths - # We only use a temporary input directory when explicitly requested. - # This will force a re-download of raw inputs from Zenodo or the S3 cache. - if request.config.getoption("--tmp-data"): - in_tmp = pudl_tmpdir / "input" - in_tmp.mkdir() - PudlPaths.set_path_overrides( - input_dir=str(Path(in_tmp).resolve()), - ) - logger.info(f"Using temporary PUDL_INPUT: {in_tmp}") - - out_tmp = pudl_tmpdir / "output" + pudl_tmpdir = tmp_path_factory.mktemp("pudl") + out_tmp = (pudl_tmpdir / "output").resolve() out_tmp.mkdir() - PudlPaths.set_path_overrides( - output_dir=str(Path(out_tmp).resolve()), + + PudlPaths.set_path_overrides(output_dir=str(out_tmp)) + os.environ["PUDL_OUTPUT"] = str(out_tmp) + logger.info( + f"Unit tests ignore --live-pudl-output and use temporary PUDL_OUTPUT: {out_tmp}" + ) + + return PudlPaths( + pudl_input=pudl_test_paths.input_dir, + pudl_output=out_tmp, ) - logger.info(f"Using temporary PUDL_OUTPUT: {out_tmp}") - - try: - return PudlPaths() - except pydantic.ValidationError as err: - pytest.exit( - f"Set PUDL_INPUT, PUDL_OUTPUT env variables, or use --tmp-path, --live-dbs flags. Error: {err}." - ) diff --git a/test/unit/extract/extractor_test.py b/test/unit/extract/extractor_test.py index 839c3e08e3..3ec98749be 100644 --- a/test/unit/extract/extractor_test.py +++ b/test/unit/extract/extractor_test.py @@ -3,7 +3,7 @@ from dagster import build_op_context from pudl.extract.extractor import concat_pages, partitions_from_settings_factory -from pudl.settings import DatasetsSettings +from pudl.settings import DatasetsSettings, EtlSettings @pytest.mark.parametrize( @@ -18,7 +18,7 @@ def test_years_from_settings(dataset, expected_years): partitions_from_settings = partitions_from_settings_factory(dataset) with build_op_context( - resources={"dataset_settings": DatasetsSettings()} + resources={"etl_settings": EtlSettings(datasets=DatasetsSettings())} ) as context: # Assert actual years are a superset of expected. Instead of doing # an equality check, this avoids having to update expected years diff --git a/test/unit/extract/xbrl_test.py b/test/unit/extract/xbrl_test.py index 426524fce5..1d6076cf73 100644 --- a/test/unit/extract/xbrl_test.py +++ b/test/unit/extract/xbrl_test.py @@ -1,12 +1,21 @@ """Tests for xbrl extraction module.""" +from pathlib import Path + +import dagster as dg import pytest from dagster import ResourceDefinition +from dagster._core.definitions.assets.definition.assets_definition import ( + AssetsDefinition, +) +from dagster._core.execution.execute_in_process_result import ExecuteInProcessResult +from pudl.etl import ferc_to_sqlite_assets +from pudl.extract.ferc1 import Ferc1DbfExtractor from pudl.extract.xbrl import FercXbrlDatastore, convert_form -from pudl.ferc_to_sqlite import ferc_to_sqlite from pudl.resources import RuntimeSettings from pudl.settings import ( + EtlSettings, Ferc1DbfToSqliteSettings, Ferc1XbrlToSqliteSettings, Ferc2XbrlToSqliteSettings, @@ -17,6 +26,7 @@ FercToSqliteSettings, XbrlFormNumber, ) +from pudl.workspace.datastore import ZenodoDoiSettings from pudl.workspace.setup import PudlPaths @@ -85,35 +95,51 @@ def test_ferc_xbrl_datastore_get_filings(mocker): ), [], ), + ( + FercToSqliteSettings( + ferc1_xbrl_to_sqlite_settings=Ferc1XbrlToSqliteSettings(years=[]), + ferc2_xbrl_to_sqlite_settings=Ferc2XbrlToSqliteSettings(years=[]), + ferc6_xbrl_to_sqlite_settings=Ferc6XbrlToSqliteSettings(years=[]), + ferc60_xbrl_to_sqlite_settings=Ferc60XbrlToSqliteSettings(years=[]), + ferc714_xbrl_to_sqlite_settings=Ferc714XbrlToSqliteSettings(years=[]), + ), + [], + ), ], ) def test_xbrl2sqlite(settings, forms, mocker, tmp_path): convert_form_mock = mocker.MagicMock() - mocker.patch("pudl.extract.xbrl.convert_form", new=convert_form_mock) + mocker.patch("pudl.etl.ferc_to_sqlite_assets.convert_form", new=convert_form_mock) # Mock datastore object to allow comparison mock_datastore = mocker.MagicMock() - mocker.patch("pudl.extract.xbrl.FercXbrlDatastore", return_value=mock_datastore) + mocker.patch( + "pudl.etl.ferc_to_sqlite_assets.FercXbrlDatastore", return_value=mock_datastore + ) - # Only select operations that are tagged with data_format=xbrl. - op_selection = [ - op.name - for op in ferc_to_sqlite.node_defs - if op.tags.get("data_format") == "xbrl" + xbrl_assets: list[AssetsDefinition] = [ + ferc_to_sqlite_assets.raw_ferc1_xbrl__sqlite, + ferc_to_sqlite_assets.raw_ferc2_xbrl__sqlite, + ferc_to_sqlite_assets.raw_ferc6_xbrl__sqlite, + ferc_to_sqlite_assets.raw_ferc60_xbrl__sqlite, + ferc_to_sqlite_assets.raw_ferc714_xbrl__sqlite, ] - ferc_to_sqlite.execute_in_process( - op_selection=op_selection, + result: ExecuteInProcessResult = dg.materialize( + assets=xbrl_assets, resources={ - "ferc_to_sqlite_settings": settings, + "etl_settings": EtlSettings(ferc_to_sqlite_settings=settings), "datastore": ResourceDefinition.mock_resource(), "runtime_settings": RuntimeSettings( xbrl_batch_size=20, xbrl_num_workers=10, ), + "zenodo_dois": ZenodoDoiSettings(), }, ) + assert result.success + assert convert_form_mock.call_count == len(forms) for form in forms: @@ -126,6 +152,7 @@ def test_xbrl2sqlite(settings, forms, mocker, tmp_path): duckdb_path=PudlPaths().output_dir / f"ferc{form.value}_xbrl.duckdb", batch_size=20, workers=10, + loglevel="INFO", ) @@ -146,7 +173,7 @@ def get_filings(self, year, form: XbrlFormNumber): years=[2020, 2021], ) - output_path = PudlPaths().pudl_output + output_path: Path = PudlPaths().pudl_output # Test convert_form for every form number for form in XbrlFormNumber: @@ -162,22 +189,41 @@ def get_filings(self, year, form: XbrlFormNumber): ) # Verify extractor is called correctly - filings = [f"filings_{year}_{form.value}" for year in settings.years] + filings: list[str] = [f"filings_{year}_{form.value}" for year in settings.years] extractor_mock.assert_called_with( filings=filings, sqlite_path=output_path / f"ferc{form.value}_xbrl.sqlite", duckdb_path=output_path / f"ferc{form.value}_xbrl.duckdb", taxonomy=f"raw_archive_{form.value}", form_number=form.value, - metadata_path=str( - output_path / f"ferc{form.value}_xbrl_taxonomy_metadata.json" - ), - datapackage_path=str( - output_path / f"ferc{form.value}_xbrl_datapackage.json" - ), + metadata_path=output_path / f"ferc{form.value}_xbrl_taxonomy_metadata.json", + datapackage_path=output_path / f"ferc{form.value}_xbrl_datapackage.json", workers=5, batch_size=10, loglevel="INFO", logfile=None, ) extractor_mock.reset_mock() + + +def test_ferc_dbf_extractor_skips_with_empty_years(mocker, tmp_path): + """FercDbfExtractor.execute() should return early when years=[].""" + mocker.patch.object( + Ferc1DbfExtractor, "get_dbf_reader", return_value=mocker.MagicMock() + ) + mocker.patch("pudl.extract.dbf.sa.create_engine", return_value=mocker.MagicMock()) + mocker.patch("pudl.extract.dbf.sa.MetaData", return_value=mocker.MagicMock()) + + settings = FercToSqliteSettings( + ferc1_dbf_to_sqlite_settings=Ferc1DbfToSqliteSettings(years=[]), + ) + extractor = Ferc1DbfExtractor( + datastore=mocker.MagicMock(), + settings=settings, + output_path=tmp_path, + ) + + delete_schema_mock = mocker.patch.object(extractor, "delete_schema") + extractor.execute() + + delete_schema_mock.assert_not_called() diff --git a/test/unit/ferc_sqlite_provenance_test.py b/test/unit/ferc_sqlite_provenance_test.py new file mode 100644 index 0000000000..6338f14aa3 --- /dev/null +++ b/test/unit/ferc_sqlite_provenance_test.py @@ -0,0 +1,312 @@ +"""Unit tests for the FERC SQLite provenance helpers.""" + +from pathlib import Path + +import dagster as dg +import pytest + +from pudl.ferc_sqlite_provenance import ( + FercSQLiteProvenance, + assert_ferc_sqlite_compatible, + build_ferc_sqlite_provenance_metadata, + get_ferc_sqlite_provenance, +) +from pudl.settings import EtlSettings, FercToSqliteSettings +from pudl.workspace.datastore import ZenodoDoiSettings + + +@pytest.fixture() +def etl_settings() -> EtlSettings: + """Minimal ETL settings with FERC-to-SQLite config for provenance tests.""" + return EtlSettings(ferc_to_sqlite_settings=FercToSqliteSettings()) + + +@pytest.fixture() +def zenodo_dois() -> ZenodoDoiSettings: + """Default Zenodo DOI settings.""" + return ZenodoDoiSettings() + + +@pytest.mark.parametrize( + ("db_name", "expected_dataset", "expected_format"), + [ + ("ferc1_dbf", "ferc1", "dbf"), + ("ferc1_xbrl", "ferc1", "xbrl"), + ("ferc714_xbrl", "ferc714", "xbrl"), + ("ferc2_dbf", "ferc2", "dbf"), + ], +) +def test_get_ferc_sqlite_provenance_dataset_and_format( + db_name: str, + expected_dataset: str, + expected_format: str, + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, +) -> None: + """Provenance fingerprint extracts dataset and format from the db_name.""" + provenance: FercSQLiteProvenance = get_ferc_sqlite_provenance( + db_name=db_name, + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ) + assert isinstance(provenance, FercSQLiteProvenance) + assert provenance.dataset == expected_dataset + assert provenance.data_format == expected_format + assert provenance.asset_key == dg.AssetKey(f"raw_{db_name}__sqlite") + + +def test_get_ferc_sqlite_provenance_years_are_non_empty( + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, +) -> None: + """The provenance fingerprint must include a non-empty, sorted list of years.""" + provenance = get_ferc_sqlite_provenance( + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ) + assert isinstance(provenance.years, list) + assert len(provenance.years) > 0 + assert provenance.years == sorted(provenance.years) + + +def test_build_ferc_sqlite_provenance_metadata_keys( + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, +) -> None: + """Metadata dict should contain all required provenance keys.""" + metadata = build_ferc_sqlite_provenance_metadata( + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + sqlite_path=Path("test-data/ferc1_dbf.sqlite"), + status="complete", + ) + required_keys: set[str] = { + "pudl_ferc_sqlite_dataset", + "pudl_ferc_sqlite_status", + "pudl_ferc_sqlite_zenodo_doi", + "pudl_ferc_sqlite_etl_settings", + "pudl_ferc_sqlite_years", + "pudl_ferc_sqlite_path", + } + assert required_keys <= set(metadata.keys()) + + +def test_assert_ferc_sqlite_compatible_skips_without_instance( + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, +) -> None: + """Provenance check should be a no-op when no Dagster instance is available.""" + # Should not raise even though no instance is available. + assert_ferc_sqlite_compatible( + instance=None, + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ) + + +def test_assert_ferc_sqlite_compatible_passes_matching_provenance( + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, + mocker, +) -> None: + """Compatible provenance should not raise.""" + metadata = build_ferc_sqlite_provenance_metadata( + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + sqlite_path=None, + status="complete", + ) + instance: dg.DagsterInstance = mocker.MagicMock() + instance.get_latest_materialization_event.return_value = mocker.MagicMock( + asset_materialization=mocker.MagicMock(metadata=metadata) + ) + # Should not raise. + assert_ferc_sqlite_compatible( + instance=instance, + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ) + + +def test_assert_ferc_sqlite_compatible_passes_superset_years( + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, + mocker, +) -> None: + """A stored DB covering more years than required should be compatible.""" + # Build metadata as if the DB was built with the full settings (all years). + stored_metadata = build_ferc_sqlite_provenance_metadata( + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + sqlite_path=None, + status="complete", + ) + instance: dg.DagsterInstance = mocker.MagicMock() + instance.get_latest_materialization_event.return_value = mocker.MagicMock( + asset_materialization=mocker.MagicMock(metadata=stored_metadata) + ) + + # Downstream run requests only a single year — a strict subset of what is stored. + stored_years: list[int] = get_ferc_sqlite_provenance( + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ).years + one_year: int = stored_years[len(stored_years) // 2] # pick a year from the middle + + from pudl.settings import Ferc1DbfToSqliteSettings + + fast_settings = EtlSettings( + ferc_to_sqlite_settings=etl_settings.ferc_to_sqlite.model_copy( + update={ + "ferc1_dbf_to_sqlite_settings": Ferc1DbfToSqliteSettings( + years=[one_year] + ) + } + ) + ) + # Should not raise: stored years ⊇ required years. + assert_ferc_sqlite_compatible( + instance=instance, + db_name="ferc1_dbf", + etl_settings=fast_settings, + zenodo_dois=zenodo_dois, + ) + + +def test_assert_ferc_sqlite_compatible_rejects_doi_mismatch( + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, + mocker, +) -> None: + """A Zenodo DOI mismatch should raise a descriptive RuntimeError.""" + stale_dois = ZenodoDoiSettings(ferc1="10.5281/zenodo.9999999") + metadata = build_ferc_sqlite_provenance_metadata( + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=stale_dois, + sqlite_path=None, + status="complete", + ) + instance: dg.DagsterInstance = mocker.MagicMock() + instance.get_latest_materialization_event.return_value = mocker.MagicMock( + asset_materialization=mocker.MagicMock(metadata=metadata) + ) + with pytest.raises(RuntimeError, match="Zenodo DOI mismatch"): + assert_ferc_sqlite_compatible( + instance=instance, + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ) + + +def test_assert_ferc_sqlite_compatible_rejects_missing_years( + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, + mocker, +) -> None: + """Stored DB that lacks required years should raise a descriptive RuntimeError.""" + from pudl.settings import Ferc1DbfToSqliteSettings + + # DB was built with only year 2021. + narrow_settings = EtlSettings( + ferc_to_sqlite_settings=etl_settings.ferc_to_sqlite.model_copy( + update={ + "ferc1_dbf_to_sqlite_settings": Ferc1DbfToSqliteSettings(years=[2021]) + } + ) + ) + stored_metadata = build_ferc_sqlite_provenance_metadata( + db_name="ferc1_dbf", + etl_settings=narrow_settings, + zenodo_dois=zenodo_dois, + sqlite_path=None, + status="complete", + ) + instance: dg.DagsterInstance = mocker.MagicMock() + instance.get_latest_materialization_event.return_value = mocker.MagicMock( + asset_materialization=mocker.MagicMock(metadata=stored_metadata) + ) + # Downstream run requests all years — a strict superset of what is stored. + with pytest.raises(RuntimeError, match="missing required years"): + assert_ferc_sqlite_compatible( + instance=instance, + db_name="ferc1_dbf", + etl_settings=etl_settings, # full years + zenodo_dois=zenodo_dois, + ) + + +def test_assert_ferc_sqlite_compatible_rejects_missing_materialization( + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, + mocker, +) -> None: + """Missing materialization event should raise a descriptive RuntimeError.""" + instance: dg.DagsterInstance = mocker.MagicMock() + instance.get_latest_materialization_event.return_value = None + with pytest.raises(RuntimeError, match="No Dagster provenance metadata"): + assert_ferc_sqlite_compatible( + instance=instance, + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ) + + +def test_assert_ferc_sqlite_compatible_rejects_incomplete_status( + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, + mocker, +) -> None: + """A DB built with status='skipped' (e.g. years=[]) should raise RuntimeError. + + A skipped extraction means the SQLite file was never populated, so downstream + IO managers must refuse to read from it. + """ + metadata = build_ferc_sqlite_provenance_metadata( + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + sqlite_path=None, + status="skipped", + ) + instance: dg.DagsterInstance = mocker.MagicMock() + instance.get_latest_materialization_event.return_value = mocker.MagicMock( + asset_materialization=mocker.MagicMock(metadata=metadata) + ) + with pytest.raises(RuntimeError, match="status="): + assert_ferc_sqlite_compatible( + instance=instance, + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ) + + +@pytest.mark.parametrize( + "db_name", + [ + "not_a_ferc_db", + "ferc1", # missing _dbf or _xbrl suffix + ], +) +def test_get_ferc_sqlite_provenance_rejects_bad_db_name( + db_name: str, + etl_settings: EtlSettings, + zenodo_dois: ZenodoDoiSettings, +) -> None: + """Malformed db_names should raise ValueError.""" + with pytest.raises(ValueError): + get_ferc_sqlite_provenance( + db_name=db_name, + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + ) diff --git a/test/unit/helpers_test.py b/test/unit/helpers_test.py index 14e7ff4225..70feb079e1 100644 --- a/test/unit/helpers_test.py +++ b/test/unit/helpers_test.py @@ -29,7 +29,6 @@ standardize_phone_column, zero_pad_numeric_string, ) -from pudl.output.sql.helpers import sql_asset_factory MONTHLY_GEN_FUEL = pd.DataFrame( { @@ -630,13 +629,6 @@ def test_flatten_mix_types(): assert list(flatten_list(list1a)) == ["1", 22, "333", 4, "5", 666] -def test_sql_asset_factory_missing_file(): - """Test sql_asset_factory throws a file not found error if file doesn't exist for an - asset name.""" - with pytest.raises(FileNotFoundError): - sql_asset_factory(name="fake_view")() - - @pytest.mark.parametrize( "df", [ diff --git a/test/unit/io_managers_test.py b/test/unit/io_managers_test.py index f55e67dd52..c1f138925a 100644 --- a/test/unit/io_managers_test.py +++ b/test/unit/io_managers_test.py @@ -6,7 +6,9 @@ import pandas as pd import pytest import sqlalchemy as sa -from dagster import AssetKey, build_input_context, build_output_context +from dagster import AssetKey, DagsterInstance, build_input_context, build_output_context +from dagster._core.execution.context.input import InputContext +from dagster._core.execution.context.output import OutputContext from sqlalchemy.exc import IntegrityError, OperationalError from pudl.etl.check_foreign_keys import ( @@ -14,13 +16,28 @@ ForeignKeyErrors, check_foreign_keys, ) +from pudl.ferc_sqlite_provenance import build_ferc_sqlite_provenance_metadata from pudl.io_managers import ( - FercXBRLSQLiteIOManager, + FercDbfSQLiteConfigurableIOManager, + FercDbfSQLiteIOManager, + FercXbrlSQLiteConfigurableIOManager, + FercXbrlSQLiteIOManager, + PudlMixedFormatIOManager, + PudlParquetIOManager, PudlSQLiteIOManager, SQLiteIOManager, + ferc1_dbf_sqlite_io_manager, + ferc1_xbrl_sqlite_io_manager, ) from pudl.metadata import PUDL_PACKAGE from pudl.metadata.classes import Package, Resource +from pudl.settings import ( + DatasetsSettings, + EtlSettings, + Ferc1Settings, + FercToSqliteSettings, +) +from pudl.workspace.datastore import ZenodoDoiSettings @pytest.fixture @@ -79,34 +96,34 @@ def test_pkg() -> Package: @pytest.fixture -def sqlite_io_manager_fixture(tmp_path, test_pkg): +def sqlite_io_manager_fixture(tmp_path, test_pkg) -> SQLiteIOManager: """Create a SQLiteIOManager fixture with a simple database schema.""" - md = test_pkg.to_sql() + md: sa.MetaData = test_pkg.to_sql() return SQLiteIOManager(base_dir=tmp_path, db_name="pudl", md=md) def test_sqlite_io_manager_delete_stmt(sqlite_io_manager_fixture): """Test we are replacing the data without dropping the table schema.""" - manager = sqlite_io_manager_fixture + manager: SQLiteIOManager = sqlite_io_manager_fixture asset_key = "artist" artist = pd.DataFrame({"artistid": [1], "artistname": ["Co-op Mop"]}) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) manager.handle_output(output_context, artist) # Read the table back into pandas - input_context = build_input_context(asset_key=AssetKey(asset_key)) + input_context: InputContext = build_input_context(asset_key=AssetKey(asset_key)) returned_df = manager.load_input(input_context) assert len(returned_df) == 1 # Rerun the asset # Load the dataframe to a sqlite table - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) manager.handle_output(output_context, artist) # Read the table back into pandas - input_context = build_input_context(asset_key=AssetKey(asset_key)) - returned_df = manager.load_input(input_context) + input_context: InputContext = build_input_context(asset_key=AssetKey(asset_key)) + returned_df: pd.DataFrame = manager.load_input(input_context) assert len(returned_df) == 1 @@ -116,14 +133,14 @@ def test_foreign_key_failure(sqlite_io_manager_fixture): asset_key = "artist" artist = pd.DataFrame({"artistid": [1], "artistname": ["Co-op Mop"]}) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) manager.handle_output(output_context, artist) asset_key = "track" track = pd.DataFrame( {"trackid": [1], "trackname": ["FERC Ya!"], "trackartist": [2]} ) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) manager.handle_output(output_context, track) with pytest.raises(ForeignKeyErrors) as excinfo: @@ -139,20 +156,20 @@ def test_foreign_key_failure(sqlite_io_manager_fixture): def test_extra_column_error(sqlite_io_manager_fixture): """Ensure an error is thrown when there is an extra column in the dataframe.""" - manager = sqlite_io_manager_fixture + manager: SQLiteIOManager = sqlite_io_manager_fixture asset_key = "artist" artist = pd.DataFrame( {"artistid": [1], "artistname": ["Co-op Mop"], "artistmanager": [1]} ) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) with pytest.raises(OperationalError): manager.handle_output(output_context, artist) def test_missing_column_error(sqlite_io_manager_fixture): """Ensure an error is thrown when a dataframe is missing a column in the schema.""" - manager = sqlite_io_manager_fixture + manager: SQLiteIOManager = sqlite_io_manager_fixture asset_key = "artist" artist = pd.DataFrame( @@ -160,18 +177,18 @@ def test_missing_column_error(sqlite_io_manager_fixture): "artistid": [1], } ) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) with pytest.raises(ValueError): manager.handle_output(output_context, artist) def test_nullable_column_error(sqlite_io_manager_fixture): """Ensure an error is thrown when a non nullable column is missing data.""" - manager = sqlite_io_manager_fixture + manager: SQLiteIOManager = sqlite_io_manager_fixture asset_key = "artist" artist = pd.DataFrame({"artistid": [1, 2], "artistname": ["Co-op Mop", pd.NA]}) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) with pytest.raises(IntegrityError): manager.handle_output(output_context, artist) @@ -180,84 +197,86 @@ def test_nullable_column_error(sqlite_io_manager_fixture): @pytest.mark.xfail(reason="SQLite autoincrement behvior is breaking this test.") def test_null_primary_key_column_error(sqlite_io_manager_fixture): """Ensure an error is thrown when a primary key contains a nullable value.""" - manager = sqlite_io_manager_fixture + manager: SQLiteIOManager = sqlite_io_manager_fixture asset_key = "artist" artist = pd.DataFrame( {"artistid": [1, pd.NA], "artistname": ["Co-op Mop", "Cxtxlyst"]} ) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) with pytest.raises(IntegrityError): manager.handle_output(output_context, artist) def test_primary_key_column_error(sqlite_io_manager_fixture): """Ensure an error is thrown when a primary key is violated.""" - manager = sqlite_io_manager_fixture + manager: SQLiteIOManager = sqlite_io_manager_fixture asset_key = "artist" artist = pd.DataFrame({"artistid": [1, 1], "artistname": ["Co-op Mop", "Cxtxlyst"]}) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) with pytest.raises(IntegrityError): manager.handle_output(output_context, artist) def test_incorrect_type_error(sqlite_io_manager_fixture): """Ensure an error is thrown when dataframe type doesn't match the table schema.""" - manager = sqlite_io_manager_fixture + manager: SQLiteIOManager = sqlite_io_manager_fixture asset_key = "artist" artist = pd.DataFrame({"artistid": ["abc"], "artistname": ["Co-op Mop"]}) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) with pytest.raises(IntegrityError): manager.handle_output(output_context, artist) def test_missing_schema_error(sqlite_io_manager_fixture): """Test a ValueError is raised when a table without a schema is loaded.""" - manager = sqlite_io_manager_fixture + manager: SQLiteIOManager = sqlite_io_manager_fixture asset_key = "venues" venue = pd.DataFrame({"venueid": [1], "venuename": "Vans Dive Bar"}) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) with pytest.raises(ValueError): manager.handle_output(output_context, venue) @pytest.fixture -def fake_pudl_sqlite_io_manager_fixture(tmp_path, test_pkg, monkeypatch): +def fake_pudl_sqlite_io_manager_fixture( + tmp_path, test_pkg, monkeypatch +) -> PudlSQLiteIOManager: """Create a SQLiteIOManager fixture with a fake database schema.""" - db_path = tmp_path / "fake.sqlite" + db_path: Path = tmp_path / "fake.sqlite" # Create the database and schemas - engine = sa.create_engine(f"sqlite:///{db_path}") - md = test_pkg.to_sql() + engine: sa.Engine = sa.create_engine(f"sqlite:///{db_path}") + md: sa.MetaData = test_pkg.to_sql() md.create_all(engine) return PudlSQLiteIOManager(base_dir=tmp_path, db_name="fake", package=test_pkg) def test_pudl_sqlite_io_manager_delete_stmt(fake_pudl_sqlite_io_manager_fixture): """Test we are replacing the data without dropping the table schema.""" - manager = fake_pudl_sqlite_io_manager_fixture + manager: PudlSQLiteIOManager = fake_pudl_sqlite_io_manager_fixture asset_key = "artist" artist = pd.DataFrame({"artistid": [1], "artistname": ["Co-op Mop"]}) - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) manager.handle_output(output_context, artist) # Read the table back into pandas - input_context = build_input_context(asset_key=AssetKey(asset_key)) - returned_df = manager.load_input(input_context) + input_context: InputContext = build_input_context(asset_key=AssetKey(asset_key)) + returned_df: pd.DataFrame = manager.load_input(input_context) assert len(returned_df) == 1 # Rerun the asset # Load the dataframe to a sqlite table - output_context = build_output_context(asset_key=AssetKey(asset_key)) + output_context: OutputContext = build_output_context(asset_key=AssetKey(asset_key)) manager.handle_output(output_context, artist) # Read the table back into pandas - input_context = build_input_context(asset_key=AssetKey(asset_key)) - returned_df = manager.load_input(input_context) + input_context: InputContext = build_input_context(asset_key=AssetKey(asset_key)) + returned_df: pd.DataFrame = manager.load_input(input_context) assert len(returned_df) == 1 @@ -286,31 +305,217 @@ def test_migrations_match_metadata(tmp_path, monkeypatch): assert True -def test_error_when_handling_view_without_metadata(fake_pudl_sqlite_io_manager_fixture): - """Make sure an error is thrown when a user creates a view without metadata.""" - asset_key = "track_view" - sql_stmt = "CREATE VIEW track_view AS SELECT * FROM track;" - output_context = build_output_context(asset_key=AssetKey(asset_key)) - with pytest.raises(ValueError): - fake_pudl_sqlite_io_manager_fixture.handle_output(output_context, sql_stmt) - - def test_empty_read_fails(fake_pudl_sqlite_io_manager_fixture): """Reading empty table fails.""" with pytest.raises(AssertionError): - context = build_input_context(asset_key=AssetKey("artist")) + context: InputContext = build_input_context(asset_key=AssetKey("artist")) fake_pudl_sqlite_io_manager_fixture.load_input(context) +def test_mixed_format_io_manager_invalid_config(): + """The mixed-format manager should reject parquet-read without parquet-write.""" + with pytest.raises(RuntimeError): + PudlMixedFormatIOManager( + write_to_parquet=False, + read_from_parquet=True, + ) + + +def test_mixed_format_io_manager_initializes_backends(mocker): + """The migrated mixed-format IO manager should lazily expose both backends.""" + sqlite_manager: PudlSQLiteIOManager = mocker.MagicMock(spec=PudlSQLiteIOManager) + parquet_manager: PudlParquetIOManager = mocker.MagicMock() + mocker.patch("pudl.io_managers.PudlSQLiteIOManager", return_value=sqlite_manager) + mocker.patch("pudl.io_managers.PudlParquetIOManager", return_value=parquet_manager) + + manager = PudlMixedFormatIOManager() + + assert manager._sqlite_io_manager is sqlite_manager + assert manager._parquet_io_manager is parquet_manager + + +def test_ferc_dbf_io_manager_uses_injected_dataset_settings(mocker): + """The migrated FERC DBF IO manager should read years from injected settings.""" + dataset_settings = DatasetsSettings(ferc1=Ferc1Settings(years=[2020, 2021])) + etl_settings: EtlSettings = EtlSettings( + datasets=dataset_settings, + ferc_to_sqlite_settings=FercToSqliteSettings(), + ) + zenodo_dois: ZenodoDoiSettings = ZenodoDoiSettings() + fake_manager: FercDbfSQLiteIOManager = mocker.MagicMock() + fake_manager._query.return_value = pd.DataFrame( + {"sched_table_name": ["f1_respondent_id"]} + ) + mocker.patch("pudl.io_managers.FercDbfSQLiteIOManager", return_value=fake_manager) + + manager: FercDbfSQLiteConfigurableIOManager = ( + ferc1_dbf_sqlite_io_manager.model_copy( + update={"etl_settings": etl_settings, "zenodo_dois": zenodo_dois} + ) + ) + instance: DagsterInstance = mocker.MagicMock() + instance.get_latest_materialization_event.return_value = mocker.MagicMock( + asset_materialization=mocker.MagicMock( + metadata=build_ferc_sqlite_provenance_metadata( + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + sqlite_path=Path("test-data/ferc1_dbf.sqlite"), + status="complete", + ) + ) + ) + context: InputContext = build_input_context( + asset_key=AssetKey("raw_ferc1_dbf__f1_respondent_id"), + instance=instance, + ) + + observed: pd.DataFrame = manager.load_input(context) + + assert observed["sched_table_name"].eq("f1_respondent_id").all() + fake_manager._query.assert_called_once_with( + "f1_respondent_id", + dataset_settings.ferc1.dbf_years, + ) + + +def test_ferc_xbrl_io_manager_uses_injected_dataset_settings(mocker): + """The migrated FERC XBRL IO manager should pass years from injected settings.""" + dataset_settings = DatasetsSettings(ferc1=Ferc1Settings(years=[2021])) + etl_settings = EtlSettings( + datasets=dataset_settings, + ferc_to_sqlite_settings=FercToSqliteSettings(), + ) + zenodo_dois = ZenodoDoiSettings() + fake_manager = mocker.MagicMock() + fake_manager._query.return_value = pd.DataFrame( + {"report_year": [2021], "sched_table_name": ["plant_in_service"]} + ) + mocker.patch("pudl.io_managers.FercXbrlSQLiteIOManager", return_value=fake_manager) + + manager: FercXbrlSQLiteConfigurableIOManager = ( + ferc1_xbrl_sqlite_io_manager.model_copy( + update={"etl_settings": etl_settings, "zenodo_dois": zenodo_dois} + ) + ) + instance: DagsterInstance = mocker.MagicMock() + instance.get_latest_materialization_event.return_value = mocker.MagicMock( + asset_materialization=mocker.MagicMock( + metadata=build_ferc_sqlite_provenance_metadata( + db_name="ferc1_xbrl", + etl_settings=etl_settings, + zenodo_dois=zenodo_dois, + sqlite_path=Path("test-data/ferc1_xbrl.sqlite"), + status="complete", + ) + ) + ) + context: InputContext = build_input_context( + asset_key=AssetKey("raw_ferc1_xbrl__plant_in_service_duration"), + instance=instance, + ) + + observed: pd.DataFrame = manager.load_input(context) + + assert observed["report_year"].eq(2021).all() + assert observed["sched_table_name"].eq("plant_in_service").all() + fake_manager._query.assert_called_once_with( + "plant_in_service_duration", + dataset_settings.ferc1.xbrl_years, + ) + + +def test_ferc_dbf_io_manager_rejects_incompatible_provenance(mocker): + """The migrated FERC DBF IO manager should fail fast on stale prerequisites.""" + dataset_settings = DatasetsSettings(ferc1=Ferc1Settings(years=[2020, 2021])) + etl_settings = EtlSettings( + datasets=dataset_settings, + ferc_to_sqlite_settings=FercToSqliteSettings(), + ) + zenodo_dois = ZenodoDoiSettings() + stale_zenodo_dois = ZenodoDoiSettings(ferc1="10.5281/zenodo.9999999") + + fake_engine = mocker.MagicMock() + fake_engine.begin.return_value.__enter__.return_value = mocker.MagicMock() + fake_manager = mocker.MagicMock() + fake_manager.engine: sa.Engine = fake_engine + mocker.patch("pudl.io_managers.FercDbfSQLiteIOManager", return_value=fake_manager) + read_sql_query = mocker.patch("pudl.io_managers.pd.read_sql_query") + + manager: FercDbfSQLiteConfigurableIOManager = ( + ferc1_dbf_sqlite_io_manager.model_copy( + update={"etl_settings": etl_settings, "zenodo_dois": zenodo_dois} + ) + ) + stale_metadata = build_ferc_sqlite_provenance_metadata( + db_name="ferc1_dbf", + etl_settings=etl_settings, + zenodo_dois=stale_zenodo_dois, + sqlite_path=Path("test-data/ferc1_dbf.sqlite"), + status="complete", + ) + instance: DagsterInstance = mocker.MagicMock() + instance.get_latest_materialization_event.return_value = mocker.MagicMock( + asset_materialization=mocker.MagicMock(metadata=stale_metadata) + ) + context: InputContext = build_input_context( + asset_key=AssetKey("raw_ferc1_dbf__f1_respondent_id"), + instance=instance, + ) + + with pytest.raises(RuntimeError, match="Zenodo DOI mismatch"): + manager.load_input(context) + + read_sql_query.assert_not_called() + + +def test_ferc_dbf_io_manager_requires_provenance_metadata(mocker): + """The migrated FERC DBF IO manager should fail fast when no provenance exists.""" + dataset_settings = DatasetsSettings.model_validate( + {"ferc1": {"years": [2020, 2021]}} + ) + etl_settings = EtlSettings( + datasets=dataset_settings, + ferc_to_sqlite_settings=FercToSqliteSettings(), + ) + zenodo_dois = ZenodoDoiSettings() + + fake_engine: sa.Engine = mocker.MagicMock() + fake_engine.begin.return_value.__enter__.return_value = mocker.MagicMock() + fake_manager = mocker.MagicMock() + fake_manager.engine = fake_engine + mocker.patch("pudl.io_managers.FercDbfSQLiteIOManager", return_value=fake_manager) + read_sql_query = mocker.patch("pudl.io_managers.pd.read_sql_query") + + manager: FercDbfSQLiteConfigurableIOManager = ( + ferc1_dbf_sqlite_io_manager.model_copy( + update={"etl_settings": etl_settings, "zenodo_dois": zenodo_dois} + ) + ) + instance: DagsterInstance = mocker.MagicMock() + instance.get_latest_materialization_event.return_value = None + context: InputContext = build_input_context( + asset_key=AssetKey("raw_ferc1_dbf__f1_respondent_id"), + instance=instance, + ) + + with pytest.raises(RuntimeError, match="No Dagster provenance metadata"): + manager.load_input(context) + + read_sql_query.assert_not_called() + + def test_replace_on_insert(fake_pudl_sqlite_io_manager_fixture): """Tests that two runs of the same asset overwrite existing contents.""" artist_df = pd.DataFrame({"artistid": [1], "artistname": ["Co-op Mop"]}) - output_context = build_output_context(asset_key=AssetKey("artist")) - input_context = build_input_context(asset_key=AssetKey("artist")) + output_context: OutputContext = build_output_context(asset_key=AssetKey("artist")) + input_context: InputContext = build_input_context(asset_key=AssetKey("artist")) # Write then read. fake_pudl_sqlite_io_manager_fixture.handle_output(output_context, artist_df) - read_df = fake_pudl_sqlite_io_manager_fixture.load_input(input_context) + read_df: pd.DataFrame = fake_pudl_sqlite_io_manager_fixture.load_input( + input_context + ) pd.testing.assert_frame_equal(artist_df, read_df, check_dtype=False) # check_dtype=False, because int64 != Int64. /o\ @@ -318,45 +523,14 @@ def test_replace_on_insert(fake_pudl_sqlite_io_manager_fixture): # one artist in the database. new_artist_df = pd.DataFrame({"artistid": [2], "artistname": ["Cxtxlyst"]}) fake_pudl_sqlite_io_manager_fixture.handle_output(output_context, new_artist_df) - read_df = fake_pudl_sqlite_io_manager_fixture.load_input(input_context) + read_df: pd.DataFrame = fake_pudl_sqlite_io_manager_fixture.load_input( + input_context + ) pd.testing.assert_frame_equal(new_artist_df, read_df, check_dtype=False) -@pytest.mark.skip(reason="SQLAlchemy is not finding the view. Debug or remove.") -def test_handling_view_with_metadata(fake_pudl_sqlite_io_manager_fixture): - """Make sure an users can create and load views when it has metadata.""" - # Create some sample data - asset_key = "artist" - artist = pd.DataFrame({"artistid": [1], "artistname": ["Co-op Mop"]}) - output_context = build_output_context(asset_key=AssetKey(asset_key)) - fake_pudl_sqlite_io_manager_fixture.handle_output(output_context, artist) - - # create the view - asset_key = "artist_view" - sql_stmt = "CREATE VIEW artist_view AS SELECT * FROM artist;" - output_context = build_output_context(asset_key=AssetKey(asset_key)) - fake_pudl_sqlite_io_manager_fixture.handle_output(output_context, sql_stmt) - - # read the view data as a dataframe - input_context = build_input_context(asset_key=AssetKey(asset_key)) - # print(input_context) - # This is failing, not sure why - # sqlalchemy.exc.InvalidRequestError: Could not reflect: requested table(s) not available in - # Engine(sqlite:////private/var/folders/pg/zrqnq8l113q57bndc5__h2640000gn/ - # # T/pytest-of-nelsonauner/pytest-38/test_handling_view_with_metada0/pudl.sqlite): (artist_view) - fake_pudl_sqlite_io_manager_fixture.load_input(input_context) - - -def test_error_when_reading_view_without_metadata(fake_pudl_sqlite_io_manager_fixture): - """Make sure and error is thrown when a user loads a view without metadata.""" - asset_key = "track_view" - input_context = build_input_context(asset_key=AssetKey(asset_key)) - with pytest.raises(ValueError): - fake_pudl_sqlite_io_manager_fixture.load_input(input_context) - - def test_report_year_fixing_instant(): - instant_df = pd.DataFrame.from_records( + instant_df: pd.DataFrame = pd.DataFrame.from_records( [ { "entity_id": "123", @@ -367,7 +541,7 @@ def test_report_year_fixing_instant(): ] ) - observed = FercXBRLSQLiteIOManager.refine_report_year( + observed: pd.Series = FercXbrlSQLiteIOManager.refine_report_year( instant_df, xbrl_years=[2021, 2022] ).report_year expected = pd.Series([2020]) @@ -375,7 +549,7 @@ def test_report_year_fixing_instant(): def test_report_year_fixing_duration(): - duration_df = pd.DataFrame.from_records( + duration_df: pd.DataFrame = pd.DataFrame.from_records( [ { "entity_id": "123", @@ -394,10 +568,10 @@ def test_report_year_fixing_duration(): ] ) - observed = FercXBRLSQLiteIOManager.refine_report_year( + observed: pd.Series = FercXbrlSQLiteIOManager.refine_report_year( duration_df, xbrl_years=[2021, 2022] ).report_year - expected = pd.Series([2021]) + expected: pd.Series = pd.Series([2021]) assert (observed == expected).all() @@ -453,4 +627,4 @@ def test_report_year_fixing_duration(): ) def test_report_year_fixing_bad_values(df, match): with pytest.raises(ValueError, match=match): - FercXBRLSQLiteIOManager.refine_report_year(df, xbrl_years=[2021, 2022]) + FercXbrlSQLiteIOManager.refine_report_year(df, xbrl_years=[2021, 2022]) diff --git a/test/unit/settings_test.py b/test/unit/settings_test.py index 53f8848198..0851a6ee31 100644 --- a/test/unit/settings_test.py +++ b/test/unit/settings_test.py @@ -1,17 +1,23 @@ """Tests for settings validation.""" +import importlib.resources import inspect from typing import Self import pandas as pd import pytest -from dagster import DagsterInvalidConfigError, Field, build_init_resource_context +from dagster import build_init_resource_context +from dagster._core.execution.context.init import UnboundInitResourceContext from pandas import json_normalize from pydantic import BaseModel, ValidationError import pudl.settings as _settings_module from pudl.metadata.classes import DataSource -from pudl.resources import dataset_settings +from pudl.resources import ( + DatastoreResource, + PudlEtlSettingsResource, + ZenodoDoiSettingsResource, +) from pudl.settings import ( DatasetsSettings, Eia860mSettings, @@ -20,14 +26,12 @@ EiaSettings, EpaCemsSettings, EtlSettings, - Ferc1DbfToSqliteSettings, Ferc1Settings, Ferc1XbrlToSqliteSettings, FercToSqliteSettings, GenericDatasetSettings, GridPathRAToolkitSettings, - _convert_settings_to_dagster_config, - create_dagster_config, + load_etl_settings, ) from pudl.workspace.datastore import Datastore from pudl.workspace.setup import PudlPaths @@ -46,23 +50,14 @@ def test_missing_field_error(self: Self): working_tables = ["table"] class Test(GenericDatasetSettings): - data_source: DataSource = DataSource( + data_source: DataSource = DataSource( # type: ignore # noqa: PGH003 working_partitions=working_partitions, - working_tables=working_tables, + working_tables=working_tables, # type: ignore # noqa: PGH003 ) Test() -class TestFerc1DbfToSqliteSettings: - """Test Ferc1DbfToSqliteSettings.""" - - def test_ref_year(self: Self): - """Test reference year is within working years.""" - with pytest.raises(ValidationError): - Ferc1DbfToSqliteSettings(ferc1_to_sqlite_refyear=1990) - - class TestFerc1Settings: """Test Ferc1 settings validation. @@ -80,21 +75,27 @@ def test_duplicate_sort_years(self: Self): _ = Ferc1Settings(years=[2001, 2001, 2000]) def test_none_years_raise(self: Self): - """Test years are sorted and deduplicated.""" + """Test that null years raise a validation error.""" with pytest.raises(ValidationError): - _ = Ferc1Settings(years=None) + _ = Ferc1Settings(years=None) # type: ignore # noqa: PGH003 def test_default_years(self: Self): """Test all years are used as default.""" returned_settings = Ferc1Settings() - expected_years = DataSource.from_id("ferc1").working_partitions["years"] + expected_years: list[int] = DataSource.from_id("ferc1").working_partitions[ + "years" + ] assert expected_years == returned_settings.years - dbf_expected_years = [year for year in expected_years if year <= 2020] + dbf_expected_years: list[int] = [ + year for year in expected_years if year <= 2020 + ] assert dbf_expected_years == returned_settings.dbf_years - xbrl_expected_years = [year for year in expected_years if year >= 2021] + xbrl_expected_years: list[int] = [ + year for year in expected_years if year >= 2021 + ] assert xbrl_expected_years == returned_settings.xbrl_years @@ -115,23 +116,23 @@ def test_default_quarters(self: Self): """Test all quarters are used as default.""" returned_settings = EpaCemsSettings() - expected_year_quarters = DataSource.from_id("epacems").working_partitions[ - "year_quarters" - ] + expected_year_quarters: list[str] = DataSource.from_id( + "epacems" + ).working_partitions["year_quarters"] assert expected_year_quarters == returned_settings.year_quarters def test_all_year_quarters(self: Self): """Test the `all` option for the cems settings.""" epacems_settings_all = EpaCemsSettings(year_quarters=["all"]) - working_partitions_all = DataSource.from_id("epacems").working_partitions[ - "year_quarters" - ] + working_partitions_all: list[str] = DataSource.from_id( + "epacems" + ).working_partitions["year_quarters"] assert epacems_settings_all.year_quarters == working_partitions_all def test_none_quarters_raise(self: Self): """Test that setting a required partition to None raises an error.""" with pytest.raises(ValidationError): - _ = EpaCemsSettings(quarters=None) + _ = EpaCemsSettings(quarters=None) # type: ignore # noqa: PGH003 class TestEia860Settings: @@ -140,7 +141,7 @@ class TestEia860Settings: def test_eia860_years_overlap_eia860m_years(self: Self): """Test validation error is raised when eia860m date is within eia860 years.""" # Identify the last valid EIA-860 year: - max_eia860_year = max(Eia860Settings().years) + max_eia860_year: int = max(Eia860Settings().years) # Use that year to construct an EIA-860M year that overlaps the EIA-860 years: bad_eia860m_year_month = f"{max_eia860_year}-01" @@ -155,9 +156,9 @@ def test_eia860_years_overlap_eia860m_years(self: Self): def test_eia860m_years_overlap_eia860m_years(self: Self): """Test validation error is raised when eia860m years overlap.""" - max_eia860_year = max(Eia860Settings().years) - acceptable_eia860m_year = max_eia860_year + 1 - bad_eia860m_year_months = [ + max_eia860_year: int = max(Eia860Settings().years) + acceptable_eia860m_year: int = max_eia860_year + 1 + bad_eia860m_year_months: list[str] = [ f"{acceptable_eia860m_year}-01", f"{acceptable_eia860m_year}-02", ] @@ -175,7 +176,7 @@ def test_eia860m_after_eia860(self: Self): max_eia860m = pd.to_datetime( max(DataSource.from_id("eia860m").working_partitions["year_months"]) ).year - settings_eia860m_years = [ + settings_eia860m_years: list[int] = [ pd.to_datetime(date).year for date in settings_eia860.eia860m_year_months ] # Assert that the default eia860m settings years are a complete range between the @@ -195,7 +196,7 @@ class TestEia860mSettings: def test_all_year_quarters(self: Self): """Test the `all` option for the eia860m settings.""" - settings_all = Eia860mSettings(year_months=["all"]).year_months + settings_all: list[str] = Eia860mSettings(year_months=["all"]).year_months partitions_all = DataSource.from_id("eia860m").working_partitions["year_months"] assert settings_all == partitions_all @@ -207,12 +208,13 @@ def test_eia923_dependency(self: Self): """Test that there is some overlap between EIA860 and EIA923 data.""" eia923_settings = Eia923Settings() settings = EiaSettings(eia923=eia923_settings) - data_source = DataSource.from_id("eia860") - assert settings.eia860 + data_source: DataSource = DataSource.from_id("eia860") + assert settings.eia860 is not None + assert settings.eia923 is not None # assign both EIA form years - eia860_years = settings.eia860.years - eia923_years_partition = data_source.working_partitions["years"] - eia923_years_settings = settings.eia923.years + eia860_years: list[int] = settings.eia860.years + eia923_years_partition: list[int] = data_source.working_partitions["years"] + eia923_years_settings: list[int] = settings.eia923.years # assert that there is some overlap between EIA years assert not set(eia860_years).isdisjoint(eia923_years_partition) assert not set(eia860_years).isdisjoint(eia923_years_settings) @@ -221,12 +223,13 @@ def test_eia860_dependency(self: Self): """Test that there is some overlap between EIA860 and EIA923 data.""" eia860_settings = Eia860Settings() settings = EiaSettings(eia860=eia860_settings) - data_source = DataSource.from_id("eia923") - assert settings.eia923 + data_source: DataSource = DataSource.from_id("eia923") + assert settings.eia923 is not None + assert settings.eia860 is not None # assign both EIA form years - eia923_years = settings.eia923.years - eia860_years_partition = data_source.working_partitions["years"] - eia860_years_settings = settings.eia860.years + eia923_years: list[int] = settings.eia923.years + eia860_years_partition: list[int] = data_source.working_partitions["years"] + eia860_years_settings: list[int] = settings.eia860.years # assert that there is some overlap between EIA years assert not set(eia923_years).isdisjoint(eia860_years_partition) assert not set(eia923_years).isdisjoint(eia860_years_settings) @@ -238,10 +241,11 @@ class TestDatasetsSettings: def test_default_behavior(self: Self): """Make sure all of the years are added if nothing is specified.""" settings = DatasetsSettings() - data_source = DataSource.from_id("ferc1") + data_source: DataSource = DataSource.from_id("ferc1") - expected_years = data_source.working_partitions["years"] - returned_years = settings.ferc1.years + expected_years: list[int] = data_source.working_partitions["years"] + assert settings.ferc1 is not None + returned_years: list[int] = settings.ferc1.years assert expected_years == returned_years assert settings.eia, "EIA settings were not added." @@ -254,27 +258,6 @@ def test_glue(self: Self): assert settings.glue.eia assert settings.glue.ferc1 - def test_convert_settings_to_dagster_config(self: Self): - """Test conversion of dictionary to Dagster config.""" - dct = { - "eia": { - "eia860": {"years": [2021, 2022]}, - "eia923": {"years": [2021, 2022]}, - } - } - expected_dct = { - "eia": { - "eia860": {"years": Field(list, default_value=[2021, 2022])}, - "eia923": {"years": Field(list, default_value=[2021, 2022])}, - } - } - - _convert_settings_to_dagster_config(dct) - assert dct.keys() == expected_dct.keys() - assert dct["eia"].keys() == expected_dct["eia"].keys() - assert isinstance(dct["eia"]["eia860"]["years"], Field) - assert isinstance(dct["eia"]["eia923"]["years"], Field) - class TestGridPathRAToolkitSettings: """Test GridPath RA Toolkit settings validation and part selection.""" @@ -295,12 +278,16 @@ def test_parts_compiled_from_selected_options(self: Self): def test_fast_profile_gridpath_parts_not_empty(self: Self): """Ensure packaged fast settings yield GridPath parts used by Dagster assets.""" - etl_settings = EtlSettings.from_yaml( - "src/pudl/package_data/settings/etl_fast.yml" - ) + with importlib.resources.as_file( + importlib.resources.files("pudl.package_data.settings") / "etl_fast.yml" + ) as path: + etl_settings: EtlSettings = load_etl_settings(str(path)) assert etl_settings.datasets is not None - gridpath_settings = etl_settings.datasets.gridpathratoolkit + gridpath_settings: GridPathRAToolkitSettings | None = ( + etl_settings.datasets.gridpathratoolkit + ) + assert gridpath_settings is not None assert gridpath_settings.parts assert "aggregated_extended_wind_capacity" in gridpath_settings.parts @@ -317,17 +304,6 @@ def test_model_dump_round_trip(self: Self): # parts must not be present; if it is, reconstruction will raise ValidationError GridPathRAToolkitSettings(**dumped) - def test_dagster_config_excludes_computed_parts(self: Self): - """The Dagster config schema must not include the computed ``parts`` field. - - Regression: create_dagster_config(DatasetsSettings()) included ``parts`` as a - configurable Dagster field, causing DatasetsSettings(**resource_config) to fail - with extra="forbid" when Dagster reconstructed the settings from that schema. - """ - config = create_dagster_config(DatasetsSettings()) - gridpath_config = config.get("gridpathratoolkit", {}) - assert "parts" not in gridpath_config - class TestEtlSettings: """Test pydantic model that validates all the full ETL Settings.""" @@ -368,31 +344,50 @@ def test_immutability(self: Self): settings.eia860 = Eia860Settings() -class TestDatasetsSettingsResource: - """Test the DatasetsSettings dagster resource.""" +class TestPudlEtlSettingsResource: + """Test the ETL settings Dagster resource.""" + + def test_invalid_field_type(self: Self): + """Test an error is thrown when the ETL settings path has the wrong type.""" + init_context: UnboundInitResourceContext = build_init_resource_context( + config={"etl_settings_path": 2021} + ) + with pytest.raises(ValidationError): + _ = PudlEtlSettingsResource.from_resource_context(init_context) + + def test_loads_from_file(self: Self): + """Test that ETL settings are loaded from the shared ETL settings file.""" + with importlib.resources.as_file( + importlib.resources.files("pudl.package_data.settings") / "etl_fast.yml" + ) as path: + init_context: UnboundInitResourceContext = build_init_resource_context( + config={"etl_settings_path": str(path)} + ) - def test_invalid_datasource(self: Self): - """Test an error is thrown when there is an invalid datasource in the config.""" - init_context = build_init_resource_context( - config={"new_datasource": {"years": [1990]}} + loaded_settings: EtlSettings = PudlEtlSettingsResource.from_resource_context( + init_context ) - with pytest.raises(DagsterInvalidConfigError): - _ = dataset_settings(init_context) - def test_invalid_field_type(self: Self): - """Test an error is thrown when there is an incorrect type in the config.""" - init_context = build_init_resource_context(config={"ferc1": {"years": 2021}}) - with pytest.raises(DagsterInvalidConfigError): - _ = dataset_settings(init_context) - - def test_default_values(self: Self): - """Test the correct default values are created for dagster config.""" - expected_year_quarters = EpaCemsSettings().year_quarters - assert ( - dataset_settings.config_schema.default_value["epacems"]["year_quarters"] - == expected_year_quarters + assert isinstance(loaded_settings, EtlSettings) + assert loaded_settings.dataset_settings.ferc1 is not None + + +def test_datastore_resource_loads() -> None: + """Test that the migrated datastore resource creates a runtime Datastore.""" + with ZenodoDoiSettingsResource.from_resource_context_cm( + build_init_resource_context() + ) as zenodo_dois: + init_context: UnboundInitResourceContext = build_init_resource_context( + config={ + "cloud_cache_path": "s3://pudl.catalyst.coop/zenodo", + "use_local_cache": False, + }, + resources={"zenodo_dois": zenodo_dois}, ) + with DatastoreResource.from_resource_context_cm(init_context) as datastore: + assert isinstance(datastore, Datastore) + def _all_settings_instances() -> list[BaseModel]: """Return one default instance of every concrete settings class in pudl.settings. @@ -410,6 +405,7 @@ def _all_settings_instances() -> list[BaseModel]: skip = { _settings_module.FrozenBaseModel, _settings_module.GenericDatasetSettings, + _settings_module.FercDbfToSqliteSettings, _settings_module.FercGenericXbrlToSqliteSettings, } instances: list[BaseModel] = [] @@ -445,15 +441,14 @@ def test_all_settings_model_dump_round_trip(instance: BaseModel) -> None: def test_partitions_with_json_normalize(pudl_etl_settings): """Ensure the FERC1 and CEMS partitions normalize.""" datasets = pudl_etl_settings.get_datasets() - - ferc_parts = json_normalize(datasets["ferc1"].partitions) + ferc_parts: pd.DataFrame = json_normalize(datasets["ferc1"].partitions) if list(ferc_parts.columns) != ["year"]: raise AssertionError( "FERC1 paritions should have year and state columns only, found:" f"{ferc_parts}" ) - cems_parts = json_normalize(datasets["epacems"].partitions) + cems_parts: pd.DataFrame = json_normalize(datasets["epacems"].partitions) if list(cems_parts.columns) != ["year_quarter"]: raise AssertionError( f"CEMS paritions should have year_quarter columns only, found:{cems_parts}"