Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions ingest/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ and defines its default outputs.
# Utility functions shared across all workflows.
include: "../shared/vendored/snakemake/config.smk"

# The workflow filepaths are written relative to this Snakefile's base directory
workdir: workflow.current_basedir
# Use default configuration values. Extend with Snakemake's --configfile/--config options.
configfile: os.path.join(workflow.basedir, "defaults/config.yaml")

# Use custom configuration from analysis directory (i.e. working dir), if any.
if os.path.exists("config.yaml"):
configfile: "config.yaml"

# Use default configuration values. Override with Snakemake's --configfile/--config options.
configfile: "defaults/config.yaml"

# This is the default rule that Snakemake will run when there are no specified targets.
# The default output of the ingest workflow is usually the curated metadata and sequences.
Expand Down Expand Up @@ -74,4 +76,10 @@ else:
if "custom_rules" in config:
for rule_file in config["custom_rules"]:

include: rule_file
# Relative custom rule paths in the config are relative to the analysis
# directory (i.e. the current working directory, or workdir, usually
# given by --directory), but the "include" directive treats relative
# paths as relative to the workflow (e.g. workflow.current_basedir).
# Convert to an absolute path based on the analysis/current directory
# to avoid this mismatch of expectations.
include: os.path.join(os.getcwd(), rule_file)
12 changes: 7 additions & 5 deletions ingest/defaults/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ ncbi_datasets_fields:

# Config parameters related to the curate pipeline
curate:
# The path to the local geolocation rules within the pathogen repo
# The path should be relative to the ingest directory.
local_geolocation_rules: "defaults/geolocation_rules.tsv"
# The path to the local geolocation rules for this pathogen.
# The path should be relative to the working directory (e.g. --directory).
# If the path doesn't exist in the working directory, the file in the workflow's defaults/ directory it used instead (if it exists).
local_geolocation_rules: "geolocation_rules.tsv"
# List of field names to change where the key is the original field name and the value is the new field name
# The original field names should match the ncbi_datasets_fields provided above.
# This is the first step in the pipeline, so any references to field names in the configs below should use the new field names
Expand Down Expand Up @@ -90,8 +91,9 @@ curate:
# Name to use for the generated abbreviated authors field
abbr_authors_field: "authors"
# Path to the manual annotations file
# The path should be relative to the ingest directory
annotations: "defaults/annotations.tsv"
# The path should be relative to the working directory (e.g. --directory).
# If the path doesn't exist in the working directory, the file in the workflow's defaults/ directory it used instead (if it exists).
annotations: "annotations.tsv"
# The ID field in the metadata to use to merge the manual annotations
annotations_id: "accession"
# The ID field in the metadata to use as the sequence id in the output FASTA file
Expand Down
4 changes: 2 additions & 2 deletions ingest/rules/curate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def format_field_map(field_map: dict[str, str]) -> list[str]:
rule curate:
input:
sequences_ndjson="data/ncbi.ndjson",
geolocation_rules=config["curate"]["local_geolocation_rules"],
annotations=config["curate"]["annotations"],
geolocation_rules=resolve_config_path(config["curate"]["local_geolocation_rules"]),
annotations=resolve_config_path(config["curate"]["annotations"]),
output:
metadata="data/all_metadata.tsv",
sequences="results/sequences.fasta",
Expand Down
21 changes: 16 additions & 5 deletions nextclade/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
This is the main Nextclade Snakefile that orchestrates the workflow to produce
a Nextclade dataset.
"""
# The workflow filepaths are written relative to this Snakefile's base directory
workdir: workflow.current_basedir
# Use default configuration values. Extend with Snakemake's --configfile/--config options.
configfile: os.path.join(workflow.basedir, "defaults/config.yaml")

# Use default configuration values. Override with Snakemake's --configfile/--config options.
configfile: "defaults/config.yaml"
# Use custom configuration from analysis directory (i.e. working dir), if any.
if os.path.exists("config.yaml"):
configfile: "config.yaml"

# This is the default rule that Snakemake will run when there are no specified targets.
# The default output of the Nextclade workflow is usually the produced Nextclade dataset.
Expand All @@ -17,6 +18,10 @@ rule all:
# Fill in paths to the final exported Nextclade dataset.


# Shared Snakemake files with generic functions are shared across pathogens
# Use `resolve_config_path` to resolve file paths for config files
include: "../shared/vendored/snakemake/config.smk"

# These rules are imported in the order that they are expected to run.
# Each Snakefile will have documented inputs and outputs that should be kept as
# consistent interfaces across pathogen repos. This allows us to define typical
Expand Down Expand Up @@ -46,4 +51,10 @@ include: "rules/export.smk"
if "custom_rules" in config:
for rule_file in config["custom_rules"]:

include: rule_file
# Relative custom rule paths in the config are relative to the analysis
# directory (i.e. the current working directory, or workdir, usually
# given by --directory), but the "include" directive treats relative
# paths as relative to the workflow (e.g. workflow.current_basedir).
# Convert to an absolute path based on the analysis/current directory
# to avoid this mismatch of expectations.
include: os.path.join(os.getcwd(), rule_file)
16 changes: 12 additions & 4 deletions nextstrain-pathogen.yaml
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commit messages are outdated:

  • 1deaceb – workflow specific compatibility is already supported
  • ecca55d – "Add support for nextstrain run" (386f5d9) references an old dangling commit

I think it'd be fine to squash both into a single commit.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thanks for flagging. Will squash them.

Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# This is currently an empty file to indicate the top level pathogen repo.
# The inclusion of this file allows the Nextstrain CLI to run the
# `nextstrain build` from any directory regardless of runtime.
# This file's *existence* marks the top level of a Nextstrain pathogen repo,
# which allows `nextstrain build` to be run from any subdirectory of the repo
# regardless of runtime. For more details, see
# <https://github.com/nextstrain/cli/releases/tag/8.2.0>.
#
# See https://github.com/nextstrain/cli/releases/tag/8.2.0 for more details.
# This file's *contents* is the "registration metadata" for the pathogen repo,
# used by `nextstrain setup` and `nextstrain run`.
---
$schema: https://nextstrain.org/schemas/pathogen/v0
workflows:
ingest:
compatibility:
nextstrain run: true
18 changes: 13 additions & 5 deletions phylogenetic/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
This is the main phylogenetic Snakefile that orchestrates the full phylogenetic
workflow and defines its default output(s).
"""
# The workflow filepaths are written relative to this Snakefile's base directory
workdir: workflow.current_basedir
# Use default configuration values. Extend with Snakemake's --configfile/--config options.
configfile: os.path.join(workflow.basedir, "defaults/config.yaml")

# Use default configuration values. Override with Snakemake's --configfile/--config options.
configfile: "defaults/config.yaml"
# Use custom configuration from analysis directory (i.e. working dir), if any.
if os.path.exists("config.yaml"):
configfile: "config.yaml"


# This is the default rule that Snakemake will run when there are no specified targets.
Expand All @@ -21,6 +22,7 @@ rule all:


# Shared Snakemake files with generic functions are shared across pathogens
# Use `resolve_config_path` to resolve file paths for config files
include: "../shared/vendored/snakemake/config.smk"
include: "../shared/vendored/snakemake/remote_files.smk"

Expand Down Expand Up @@ -54,4 +56,10 @@ include: "rules/export.smk"
if "custom_rules" in config:
for rule_file in config["custom_rules"]:

include: rule_file
# Relative custom rule paths in the config are relative to the analysis
# directory (i.e. the current working directory, or workdir, usually
# given by --directory), but the "include" directive treats relative
# paths as relative to the workflow (e.g. workflow.current_basedir).
# Convert to an absolute path based on the analysis/current directory
# to avoid this mismatch of expectations.
include: os.path.join(os.getcwd(), rule_file)