diff --git a/ingest/Snakefile b/ingest/Snakefile index 4c6056c..5d1e323 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -5,11 +5,13 @@ and defines its default outputs. # Utility functions shared across all workflows. include: "../shared/vendored/snakemake/config.smk" -# The workflow filepaths are written relative to this Snakefile's base directory -workdir: workflow.current_basedir +# Use default configuration values. Extend with Snakemake's --configfile/--config options. +configfile: os.path.join(workflow.basedir, "defaults/config.yaml") + +# Use custom configuration from analysis directory (i.e. working dir), if any. +if os.path.exists("config.yaml"): + configfile: "config.yaml" -# Use default configuration values. Override with Snakemake's --configfile/--config options. -configfile: "defaults/config.yaml" # This is the default rule that Snakemake will run when there are no specified targets. # The default output of the ingest workflow is usually the curated metadata and sequences. @@ -74,4 +76,10 @@ else: if "custom_rules" in config: for rule_file in config["custom_rules"]: - include: rule_file + # Relative custom rule paths in the config are relative to the analysis + # directory (i.e. the current working directory, or workdir, usually + # given by --directory), but the "include" directive treats relative + # paths as relative to the workflow (e.g. workflow.current_basedir). + # Convert to an absolute path based on the analysis/current directory + # to avoid this mismatch of expectations. + include: os.path.join(os.getcwd(), rule_file) diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml index 29a7c99..087129b 100644 --- a/ingest/defaults/config.yaml +++ b/ingest/defaults/config.yaml @@ -35,9 +35,10 @@ ncbi_datasets_fields: # Config parameters related to the curate pipeline curate: - # The path to the local geolocation rules within the pathogen repo - # The path should be relative to the ingest directory. - local_geolocation_rules: "defaults/geolocation_rules.tsv" + # The path to the local geolocation rules for this pathogen. + # The path should be relative to the working directory (e.g. --directory). + # If the path doesn't exist in the working directory, the file in the workflow's defaults/ directory it used instead (if it exists). + local_geolocation_rules: "geolocation_rules.tsv" # List of field names to change where the key is the original field name and the value is the new field name # The original field names should match the ncbi_datasets_fields provided above. # This is the first step in the pipeline, so any references to field names in the configs below should use the new field names @@ -90,8 +91,9 @@ curate: # Name to use for the generated abbreviated authors field abbr_authors_field: "authors" # Path to the manual annotations file - # The path should be relative to the ingest directory - annotations: "defaults/annotations.tsv" + # The path should be relative to the working directory (e.g. --directory). + # If the path doesn't exist in the working directory, the file in the workflow's defaults/ directory it used instead (if it exists). + annotations: "annotations.tsv" # The ID field in the metadata to use to merge the manual annotations annotations_id: "accession" # The ID field in the metadata to use as the sequence id in the output FASTA file diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk index d1acb45..decdbb5 100644 --- a/ingest/rules/curate.smk +++ b/ingest/rules/curate.smk @@ -32,8 +32,8 @@ def format_field_map(field_map: dict[str, str]) -> list[str]: rule curate: input: sequences_ndjson="data/ncbi.ndjson", - geolocation_rules=config["curate"]["local_geolocation_rules"], - annotations=config["curate"]["annotations"], + geolocation_rules=resolve_config_path(config["curate"]["local_geolocation_rules"]), + annotations=resolve_config_path(config["curate"]["annotations"]), output: metadata="data/all_metadata.tsv", sequences="results/sequences.fasta", diff --git a/nextclade/Snakefile b/nextclade/Snakefile index b7bc586..9c766a9 100644 --- a/nextclade/Snakefile +++ b/nextclade/Snakefile @@ -2,11 +2,12 @@ This is the main Nextclade Snakefile that orchestrates the workflow to produce a Nextclade dataset. """ -# The workflow filepaths are written relative to this Snakefile's base directory -workdir: workflow.current_basedir +# Use default configuration values. Extend with Snakemake's --configfile/--config options. +configfile: os.path.join(workflow.basedir, "defaults/config.yaml") -# Use default configuration values. Override with Snakemake's --configfile/--config options. -configfile: "defaults/config.yaml" +# Use custom configuration from analysis directory (i.e. working dir), if any. +if os.path.exists("config.yaml"): + configfile: "config.yaml" # This is the default rule that Snakemake will run when there are no specified targets. # The default output of the Nextclade workflow is usually the produced Nextclade dataset. @@ -17,6 +18,10 @@ rule all: # Fill in paths to the final exported Nextclade dataset. +# Shared Snakemake files with generic functions are shared across pathogens +# Use `resolve_config_path` to resolve file paths for config files +include: "../shared/vendored/snakemake/config.smk" + # These rules are imported in the order that they are expected to run. # Each Snakefile will have documented inputs and outputs that should be kept as # consistent interfaces across pathogen repos. This allows us to define typical @@ -46,4 +51,10 @@ include: "rules/export.smk" if "custom_rules" in config: for rule_file in config["custom_rules"]: - include: rule_file + # Relative custom rule paths in the config are relative to the analysis + # directory (i.e. the current working directory, or workdir, usually + # given by --directory), but the "include" directive treats relative + # paths as relative to the workflow (e.g. workflow.current_basedir). + # Convert to an absolute path based on the analysis/current directory + # to avoid this mismatch of expectations. + include: os.path.join(os.getcwd(), rule_file) diff --git a/nextstrain-pathogen.yaml b/nextstrain-pathogen.yaml index b74c50d..080e025 100644 --- a/nextstrain-pathogen.yaml +++ b/nextstrain-pathogen.yaml @@ -1,5 +1,13 @@ -# This is currently an empty file to indicate the top level pathogen repo. -# The inclusion of this file allows the Nextstrain CLI to run the -# `nextstrain build` from any directory regardless of runtime. +# This file's *existence* marks the top level of a Nextstrain pathogen repo, +# which allows `nextstrain build` to be run from any subdirectory of the repo +# regardless of runtime. For more details, see +# . # -# See https://github.com/nextstrain/cli/releases/tag/8.2.0 for more details. +# This file's *contents* is the "registration metadata" for the pathogen repo, +# used by `nextstrain setup` and `nextstrain run`. +--- +$schema: https://nextstrain.org/schemas/pathogen/v0 +workflows: + ingest: + compatibility: + nextstrain run: true diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index bcf3939..43bc040 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -2,11 +2,12 @@ This is the main phylogenetic Snakefile that orchestrates the full phylogenetic workflow and defines its default output(s). """ -# The workflow filepaths are written relative to this Snakefile's base directory -workdir: workflow.current_basedir +# Use default configuration values. Extend with Snakemake's --configfile/--config options. +configfile: os.path.join(workflow.basedir, "defaults/config.yaml") -# Use default configuration values. Override with Snakemake's --configfile/--config options. -configfile: "defaults/config.yaml" +# Use custom configuration from analysis directory (i.e. working dir), if any. +if os.path.exists("config.yaml"): + configfile: "config.yaml" # This is the default rule that Snakemake will run when there are no specified targets. @@ -21,6 +22,7 @@ rule all: # Shared Snakemake files with generic functions are shared across pathogens +# Use `resolve_config_path` to resolve file paths for config files include: "../shared/vendored/snakemake/config.smk" include: "../shared/vendored/snakemake/remote_files.smk" @@ -54,4 +56,10 @@ include: "rules/export.smk" if "custom_rules" in config: for rule_file in config["custom_rules"]: - include: rule_file + # Relative custom rule paths in the config are relative to the analysis + # directory (i.e. the current working directory, or workdir, usually + # given by --directory), but the "include" directive treats relative + # paths as relative to the workflow (e.g. workflow.current_basedir). + # Convert to an absolute path based on the analysis/current directory + # to avoid this mismatch of expectations. + include: os.path.join(os.getcwd(), rule_file)