nextstrain · joverlee521 · Nov 4, 2025 · Sep 22, 2025 · victorlin · Nov 3, 2025
diff --git a/ingest/Snakefile b/ingest/Snakefile
@@ -5,11 +5,13 @@ and defines its default outputs.
 # Utility functions shared across all workflows.
 include: "../shared/vendored/snakemake/config.smk"
 
-# The workflow filepaths are written relative to this Snakefile's base directory
-workdir: workflow.current_basedir
+# Use default configuration values. Extend with Snakemake's --configfile/--config options.
+configfile: os.path.join(workflow.basedir, "defaults/config.yaml")
+
+# Use custom configuration from analysis directory (i.e. working dir), if any.
+if os.path.exists("config.yaml"):
+    configfile: "config.yaml"
 
-# Use default configuration values. Override with Snakemake's --configfile/--config options.
-configfile: "defaults/config.yaml"
 
 # This is the default rule that Snakemake will run when there are no specified targets.
 # The default output of the ingest workflow is usually the curated metadata and sequences.
@@ -74,4 +76,10 @@ else:
 if "custom_rules" in config:
     for rule_file in config["custom_rules"]:
 
-        include: rule_file
+        # Relative custom rule paths in the config are relative to the analysis
+        # directory (i.e. the current working directory, or workdir, usually
+        # given by --directory), but the "include" directive treats relative
+        # paths as relative to the workflow (e.g. workflow.current_basedir).
+        # Convert to an absolute path based on the analysis/current directory
+        # to avoid this mismatch of expectations.
+        include: os.path.join(os.getcwd(), rule_file)
diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml
@@ -35,9 +35,10 @@ ncbi_datasets_fields:
 
 # Config parameters related to the curate pipeline
 curate:
-  # The path to the local geolocation rules within the pathogen repo
-  # The path should be relative to the ingest directory.
-  local_geolocation_rules: "defaults/geolocation_rules.tsv"
+  # The path to the local geolocation rules for this pathogen.
+  # The path should be relative to the working directory (e.g. --directory).
+  # If the path doesn't exist in the working directory, the file in the workflow's defaults/ directory it used instead (if it exists).
+  local_geolocation_rules: "geolocation_rules.tsv"
   # List of field names to change where the key is the original field name and the value is the new field name
   # The original field names should match the ncbi_datasets_fields provided above.
   # This is the first step in the pipeline, so any references to field names in the configs below should use the new field names
@@ -90,8 +91,9 @@ curate:
   # Name to use for the generated abbreviated authors field
   abbr_authors_field: "authors"
   # Path to the manual annotations file
-  # The path should be relative to the ingest directory
-  annotations: "defaults/annotations.tsv"
+  # The path should be relative to the working directory (e.g. --directory).
+  # If the path doesn't exist in the working directory, the file in the workflow's defaults/ directory it used instead (if it exists).
+  annotations: "annotations.tsv"
   # The ID field in the metadata to use to merge the manual annotations
   annotations_id: "accession"
   # The ID field in the metadata to use as the sequence id in the output FASTA file

diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk
@@ -32,8 +32,8 @@ def format_field_map(field_map: dict[str, str]) -> list[str]:
 rule curate:
     input:
         sequences_ndjson="data/ncbi.ndjson",
-        geolocation_rules=config["curate"]["local_geolocation_rules"],
-        annotations=config["curate"]["annotations"],
+        geolocation_rules=resolve_config_path(config["curate"]["local_geolocation_rules"]),
+        annotations=resolve_config_path(config["curate"]["annotations"]),
     output:
         metadata="data/all_metadata.tsv",
         sequences="results/sequences.fasta",

diff --git a/nextclade/Snakefile b/nextclade/Snakefile
@@ -2,11 +2,12 @@
 This is the main Nextclade Snakefile that orchestrates the workflow to produce
 a Nextclade dataset.
 """
-# The workflow filepaths are written relative to this Snakefile's base directory
-workdir: workflow.current_basedir
+# Use default configuration values. Extend with Snakemake's --configfile/--config options.
+configfile: os.path.join(workflow.basedir, "defaults/config.yaml")
 
-# Use default configuration values. Override with Snakemake's --configfile/--config options.
-configfile: "defaults/config.yaml"
+# Use custom configuration from analysis directory (i.e. working dir), if any.
+if os.path.exists("config.yaml"):
+    configfile: "config.yaml"
 
 # This is the default rule that Snakemake will run when there are no specified targets.
 # The default output of the Nextclade workflow is usually the produced Nextclade dataset.
@@ -17,6 +18,10 @@ rule all:
         # Fill in paths to the final exported Nextclade dataset.
 
 
+# Shared Snakemake files with generic functions are shared across pathogens
+# Use `resolve_config_path` to resolve file paths for config files
+include: "../shared/vendored/snakemake/config.smk"
+
 # These rules are imported in the order that they are expected to run.
 # Each Snakefile will have documented inputs and outputs that should be kept as
 # consistent interfaces across pathogen repos. This allows us to define typical
@@ -46,4 +51,10 @@ include: "rules/export.smk"
 if "custom_rules" in config:
     for rule_file in config["custom_rules"]:
 
-        include: rule_file
+        # Relative custom rule paths in the config are relative to the analysis
+        # directory (i.e. the current working directory, or workdir, usually
+        # given by --directory), but the "include" directive treats relative
+        # paths as relative to the workflow (e.g. workflow.current_basedir).
+        # Convert to an absolute path based on the analysis/current directory
+        # to avoid this mismatch of expectations.
+        include: os.path.join(os.getcwd(), rule_file)
diff --git a/nextstrain-pathogen.yaml b/nextstrain-pathogen.yaml
@@ -1,5 +1,13 @@
-# This is currently an empty file to indicate the top level pathogen repo.
-# The inclusion of this file allows the Nextstrain CLI to run the
-# `nextstrain build` from any directory regardless of runtime.
+# This file's *existence* marks the top level of a Nextstrain pathogen repo,
+# which allows `nextstrain build` to be run from any subdirectory of the repo
+# regardless of runtime.  For more details, see
+# <https://github.com/nextstrain/cli/releases/tag/8.2.0>.
 #
-# See https://github.com/nextstrain/cli/releases/tag/8.2.0 for more details.
+# This file's *contents* is the "registration metadata" for the pathogen repo,
+# used by `nextstrain setup` and `nextstrain run`.
+---
+$schema: https://nextstrain.org/schemas/pathogen/v0
+workflows:
+  ingest:
+    compatibility:
+      nextstrain run: true
diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile
@@ -2,11 +2,12 @@
 This is the main phylogenetic Snakefile that orchestrates the full phylogenetic
 workflow and defines its default output(s).
 """
-# The workflow filepaths are written relative to this Snakefile's base directory
-workdir: workflow.current_basedir
+# Use default configuration values. Extend with Snakemake's --configfile/--config options.
+configfile: os.path.join(workflow.basedir, "defaults/config.yaml")
 
-# Use default configuration values. Override with Snakemake's --configfile/--config options.
-configfile: "defaults/config.yaml"
+# Use custom configuration from analysis directory (i.e. working dir), if any.
+if os.path.exists("config.yaml"):
+    configfile: "config.yaml"
 
 
 # This is the default rule that Snakemake will run when there are no specified targets.
@@ -21,6 +22,7 @@ rule all:
 
 
 # Shared Snakemake files with generic functions are shared across pathogens
+# Use `resolve_config_path` to resolve file paths for config files
 include: "../shared/vendored/snakemake/config.smk"
 include: "../shared/vendored/snakemake/remote_files.smk"
 
@@ -54,4 +56,10 @@ include: "rules/export.smk"
 if "custom_rules" in config:
     for rule_file in config["custom_rules"]:
 
-        include: rule_file
+        # Relative custom rule paths in the config are relative to the analysis
+        # directory (i.e. the current working directory, or workdir, usually
+        # given by --directory), but the "include" directive treats relative
+        # paths as relative to the workflow (e.g. workflow.current_basedir).
+        # Convert to an absolute path based on the analysis/current directory
+        # to avoid this mismatch of expectations.
+        include: os.path.join(os.getcwd(), rule_file)