Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ To resolve this, manually link the upstream and pull all tags::
git remote add upstream https://github.com/con/duct
git fetch upstream



Testing
-------
Expand Down
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ usage: con-duct run [-h] [-l {NONE,CRITICAL,ERROR,WARNING,INFO,DEBUG}] [-q]
[-o {all,none,stdout,stderr}]
[-t {all,system-summary,processes-samples}] [-m MESSAGE]
[--mode {new-session,current-session}]
[--instruments INSTRUMENTS]
[--gpu-sample-interval GPU_SAMPLE_INTERVAL]
[--gpu-timeout GPU_TIMEOUT]
command [command_args ...] ...

duct is a lightweight wrapper that collects execution data for an arbitrary
Expand Down Expand Up @@ -105,6 +108,9 @@ environment variables:
DUCT_REPORT_INTERVAL: see --report-interval
DUCT_CAPTURE_OUTPUTS: see --capture-outputs
DUCT_MESSAGE: see --message
DUCT_INSTRUMENTS: see --instruments (e.g., "cpu,mem,gpu" or "all")
DUCT_GPU_SAMPLE_INTERVAL: see --gpu-sample-interval
DUCT_GPU_TIMEOUT: see --gpu-timeout
DUCT_CONFIG_PATHS: paths to .env files separated by platform path separator
(':' on Unix) (see below)

Expand Down Expand Up @@ -235,6 +241,21 @@ options:
current session instead of starting a new one. Useful
for tracking slurm jobs or other commands that should
run in the current session. (default: new-session)
--instruments INSTRUMENTS
Comma-separated list of instruments to enable: cpu,
mem, gpu, or 'all'. You can also provide value via
DUCT_INSTRUMENTS env variable. (default: cpu,mem)
(default: {<Instruments.CPU: 'cpu'>, <Instruments.MEM:
'mem'>})
--gpu-sample-interval GPU_SAMPLE_INTERVAL
Interval in seconds between GPU status checks. If not
specified or 0, uses --sample-interval. Useful when
nvidia-smi calls are slow. You can also provide value
via DUCT_GPU_SAMPLE_INTERVAL env variable. (default:
0.0)
--gpu-timeout GPU_TIMEOUT
Timeout in seconds for nvidia-smi calls. (default:
5.0) (default: 5.0)

```
<!-- END HELP -->
Expand Down
29 changes: 29 additions & 0 deletions src/con_duct/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
from con_duct.duct_main import (
DUCT_OUTPUT_PREFIX,
EXECUTION_SUMMARY_FORMAT,
GPU_SAMPLE_TIMEOUT,
Outputs,
RecordTypes,
SessionMode,
)
from con_duct.duct_main import execute as duct_execute
from con_duct.duct_main import instruments_from_str
from con_duct.ls import LS_FIELD_CHOICES, ls
from con_duct.plot import matplotlib_plot
from con_duct.pprint_json import pprint_json
Expand Down Expand Up @@ -146,6 +148,9 @@ def _replay_early_logs(log_buffer: List[tuple[str, str]]) -> None:
DUCT_REPORT_INTERVAL: see --report-interval
DUCT_CAPTURE_OUTPUTS: see --capture-outputs
DUCT_MESSAGE: see --message
DUCT_INSTRUMENTS: see --instruments (e.g., "cpu,mem,gpu" or "all")
DUCT_GPU_SAMPLE_INTERVAL: see --gpu-sample-interval
DUCT_GPU_TIMEOUT: see --gpu-timeout
DUCT_CONFIG_PATHS: paths to .env files separated by platform path separator
(':' on Unix) (see below)

Expand Down Expand Up @@ -366,6 +371,30 @@ def _create_run_parser() -> argparse.ArgumentParser:
"'current-session' tracks the current session instead of starting a new one. "
"Useful for tracking slurm jobs or other commands that should run in the current session.",
)
parser.add_argument(
"--instruments",
type=instruments_from_str,
default=instruments_from_str(os.getenv("DUCT_INSTRUMENTS", "cpu,mem")),
help="Comma-separated list of instruments to enable: cpu, mem, gpu, or 'all'. "
"You can also provide value via DUCT_INSTRUMENTS env variable. "
"(default: cpu,mem)",
)
parser.add_argument(
"--gpu-sample-interval",
type=float,
default=float(os.getenv("DUCT_GPU_SAMPLE_INTERVAL", "0")),
help="Interval in seconds between GPU status checks. "
"If not specified or 0, uses --sample-interval. "
"Useful when nvidia-smi calls are slow. "
"You can also provide value via DUCT_GPU_SAMPLE_INTERVAL env variable.",
)
parser.add_argument(
"--gpu-timeout",
type=float,
default=float(os.getenv("DUCT_GPU_TIMEOUT", str(GPU_SAMPLE_TIMEOUT))),
help="Timeout in seconds for nvidia-smi calls. "
f"(default: {GPU_SAMPLE_TIMEOUT})",
)
return parser


Expand Down
Loading
Loading