Skip to content
Open
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 122 additions & 7 deletions modelopt/onnx/quantization/autotune/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@
import importlib.util
import os
import re
import shlex
import shutil
import subprocess # nosec B404
import tempfile
import time
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any
from urllib.parse import parse_qs, urlparse

import numpy as np
import torch
Expand Down Expand Up @@ -145,6 +147,13 @@ def _write_log_file(self, file: Path | str | None, content: str) -> None:
self.logger.warning(f"Failed to save logs to {file}: {e}")


safe_pattern = (
r"\[\d{2}/\d{2}/\d{4}-\d{2}:\d{2}:\d{2}\]\s+\[I\]\s+"
r"Average over \d+ runs - GPU latency:\s*([\d.]+)\s*ms"
)
std_pattern = r"\[I\]\s+GPU Compute Time:.*?median\s*=\s*([\d.]+)\s*ms"


class TrtExecBenchmark(Benchmark):
"""TensorRT benchmark using trtexec command-line tool.

Expand Down Expand Up @@ -183,7 +192,6 @@ def __init__(
self.temp_model_path = os.path.join(self.temp_dir, "temp_model.onnx")
self.logger.debug(f"Created temporary engine directory: {self.temp_dir}")
self.logger.debug(f"Temporary model path: {self.temp_model_path}")
self.latency_pattern = r"\[I\]\s+Latency:.*?median\s*=\s*([\d.]+)\s*ms"

self._base_cmd = [
self.trtexec_path,
Expand All @@ -204,9 +212,65 @@ def __init__(
self.logger.debug(f"Added plugin library: {plugin_path}")

trtexec_args = self.trtexec_args or []
has_remote_config = any("--remoteAutoTuningConfig" in arg for arg in trtexec_args)

if has_remote_config:
self.has_remote_config = any("--remoteAutoTuningConfig" in arg for arg in trtexec_args)
self.remote_ip: str | None = None
self.remote_port: int = 22
self.remote_user: str = "root"
self.remote_password: str = ""
self.remote_engine_path: str = "trtexec_benchmark_model.trt"
self.remote_bin_path: str = "trtexec"

if self.has_remote_config:
remote_config = [arg for arg in trtexec_args if "--remoteAutoTuningConfig" in arg]
if len(remote_config) != 1:
raise ValueError("Exactly one --remoteAutoTuningConfig argument is required")
# Parse --remoteAutoTuningConfig argument, which may be given as:
# ('--remoteAutoTuningConfig=ssh://user:pass@host:port?...') or
# ('--remoteAutoTuningConfig', 'ssh://user:pass@host:port?...')
#
# The logic: find the arg starting with '--remoteAutoTuningConfig'
# If formatted as '--remoteAutoTuningConfig=...', split off the '='
# Otherwise, grab the next argument.
config_arg_value: str | None = None
for i, arg in enumerate(trtexec_args):
if arg.startswith("--remoteAutoTuningConfig"):
if arg == "--remoteAutoTuningConfig":
# Value should be the next argument
if i + 1 < len(trtexec_args):
config_arg_value = trtexec_args[i + 1]
else:
raise ValueError("Missing value for --remoteAutoTuningConfig")
elif arg.startswith("--remoteAutoTuningConfig="):
config_arg_value = arg.split("=", 1)[1]
else:
raise ValueError(f"Malformed --remoteAutoTuningConfig argument: {arg}")
break
if not config_arg_value:
raise ValueError("Could not parse --remoteAutoTuningConfig argument")
remote_config_str: str = config_arg_value

if not remote_config_str.startswith("ssh://"):
raise ValueError("Only 'ssh://' remote autotuning config URLs are supported")
parsed = urlparse(remote_config_str)
# parsed.username, parsed.password, parsed.hostname, parsed.port, parsed.query
self.remote_user = parsed.username
self.remote_password = parsed.password
self.remote_ip = parsed.hostname
self.remote_port = parsed.port
if self.remote_port is None:
self.remote_port = 22
Comment thread
coderabbitai[bot] marked this conversation as resolved.
# Parse query options into a dict
self.remote_options = {
k: v[0] if len(v) == 1 else v for k, v in parse_qs(parsed.query).items()
}
required_params = ["remote_exec_path", "remote_lib_path"]
missing = [p for p in required_params if p not in self.remote_options]
if missing:
raise ValueError(
f"Missing required query parameters in --remoteAutoTuningConfig: {missing}"
)
self.remote_bin_path = os.path.dirname(str(self.remote_options["remote_exec_path"]))
Comment thread
coderabbitai[bot] marked this conversation as resolved.
self.remote_lib_path = str(self.remote_options["remote_lib_path"])
try:
_check_for_trtexec(min_version="10.15")
self.logger.debug("TensorRT Python API version >= 10.15 detected")
Expand All @@ -215,6 +279,7 @@ def __init__(
"Remote autotuning requires '--safe' to be set. Adding it to trtexec arguments."
)
self.trtexec_args.append("--safe")
self.is_safe = True
if "--skipInference" not in trtexec_args:
self.logger.warning(
"Remote autotuning requires '--skipInference' to be set. Adding it to trtexec arguments."
Expand All @@ -228,6 +293,7 @@ def __init__(
trtexec_args = [
arg for arg in trtexec_args if "--remoteAutoTuningConfig" not in arg
]
self.is_safe = "--safe" in trtexec_args
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
self._base_cmd.extend(trtexec_args)

self.logger.debug(f"Base command template: {' '.join(self._base_cmd)}")
Expand Down Expand Up @@ -292,10 +358,59 @@ def run(
self.logger.error(f"trtexec failed with return code {result.returncode}")
self.logger.error(f"stderr: {result.stderr}")
return float("inf")
latency_pattern = std_pattern
if self.has_remote_config and self.is_safe:
ssh_pass = []
if self.remote_password:
ssh_pass.append("sshpass")
ssh_pass.append("-p")
ssh_pass.append(self.remote_password)
# need to push the model to the device and use trtexec_safe to run
scp_cmd = [
"scp",
f"-P{self.remote_port}",
self.engine_path,
f"{self.remote_user}@{self.remote_ip}:{shlex.quote(self.remote_engine_path)}",
]
scp_cmd = ssh_pass + scp_cmd
result = subprocess.run(scp_cmd, capture_output=True, text=True) # nosec B603
if result.returncode != 0:
self.logger.error(f"Failed to push engine to remote device: {result.stderr}")
return float("inf")
ld_path = f"LD_LIBRARY_PATH={shlex.quote(self.remote_lib_path)}:$LD_LIBRARY_PATH"
trt_path = f"{os.path.join(self.remote_bin_path, 'trtexec_safe')}"
trtexec_safe_cmd = [
"ssh",
"-p",
f"{self.remote_port}",
f"{self.remote_user}@{self.remote_ip}",
f"{ld_path} {shlex.quote(trt_path)} --loadEngine={shlex.quote(self.remote_engine_path)}",
]
trtexec_safe_cmd = ssh_pass + trtexec_safe_cmd
result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603
Comment on lines +363 to +392
Copy link
Copy Markdown
Contributor

@coderabbitai coderabbitai Bot Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | 🏗️ Heavy lift

CRITICAL: Multiple security violations and incorrect SSH implementation.

Several issues violate the security coding guidelines:

  1. # nosec comments are not allowed (lines 363, 378): Per SECURITY.md and coding guidelines, # nosec bypasses are forbidden. If the pattern is necessary, it requires @NVIDIA/modelopt-setup-codeowners approval with explicit PR justification.

  2. Invalid SSH authentication syntax: {self.remote_user}:{self.remote_password}@{self.remote_ip} (line 375) is not valid SSH syntax. SSH does not accept passwords on the command line. Use SSH keys, sshpass, or SSH_ASKPASS instead.

  3. Password exposure: Even if the syntax were valid, including the password in command arguments makes it visible to other users via ps.

  4. Command injection risk: Line 376 passes a command string to the remote shell. If self.remote_engine_path, self.remote_options['remote_lib_path'], or self.remote_bin_path contain shell metacharacters, they will be interpreted by the remote shell.

  5. Missing null check for remote_port: If the URL doesn't specify a port, self.remote_port is None, causing -PNone (line 359) and None (line 374) which are invalid.

Proposed fix direction
         if self.has_remote_config and self.is_safe:
             # need to push the model to the device and use trtexec_safe to run
+            if self.remote_port is None:
+                self.logger.error("Remote port is required for SCP/SSH")
+                return float("inf")
+            if self.remote_password:
+                self.logger.warning(
+                    "Password-based SSH auth is not recommended. Consider using SSH keys."
+                )
             scp_cmd = [
-                "scp",
-                f"-P{self.remote_port}",
+                "sshpass", "-p", self.remote_password,
+                "scp", "-P", str(self.remote_port),
                 self.engine_path,
                 f"{self.remote_user}@{self.remote_ip}:{self.remote_engine_path}",
             ]
-            result = subprocess.run(scp_cmd)  # nosec B603
+            result = subprocess.run(scp_cmd, capture_output=True, text=True)
             # ... similar changes for SSH command

Consider using SSH key-based authentication and passing command arguments separately to avoid shell interpretation on the remote side.

As per coding guidelines: "Any use of '# nosec' comments to bypass Bandit security checks is not allowed."

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@modelopt/onnx/quantization/autotune/benchmark.py` around lines 355 - 378, The
code in benchmark.py (scp_cmd, trtexec_safe_cmd, subprocess.run calls) has
multiple security and correctness issues: remove all "# nosec" comments,
validate self.remote_port (ensure not None and use the correct ssh/scp port
flags), never embed self.remote_password into command arguments, and stop
passing a single shell-interpreted string to SSH (trtexec_safe_cmd) which risks
command injection from self.remote_engine_path,
self.remote_options['remote_lib_path'], and self.remote_bin_path. Fix by using
key-based SSH authentication (or only use sshpass with explicit approver
consent), build scp_cmd and ssh target as "user@host" (no password), pass port
via the correct flag, and pass remote commands/LD_LIBRARY_PATH safely: either
set environment via subprocess.run(env=...) when running locally or construct a
safely-escaped remote command using shlex.quote for each component (or send a
prepared remote script) and invoke subprocess.run with a list of args (not a
single concatenated shell string); also check subprocess.returncode and capture
output/errors for logging. Ensure changes touch scp_cmd, trtexec_safe_cmd,
subprocess.run usages, and validations for
remote_port/remote_user/remote_engine_path.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Partially addressed.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dthienan-nv, thanks for the partial fix. Here's what's been addressed and what still needs attention:

Fixed:

  • Invalid SSH target syntax (user:password@hostuser@host)
  • sshpass is now used for password-based authentication

Still remaining:

1. # nosec comments (critical per guidelines):
The new subprocess calls still carry # nosec B603. Per SECURITY.md and the coding guidelines, these are not allowed. If the pattern is genuinely necessary, the PR must be reviewed and approved by @NVIDIA/modelopt-setup-codeowners with explicit justification in the PR description.

2. Command injection risk (line ~376 in current code):
The remote command is still a single interpolated string:

f"{ld_path} {trt_path} --loadEngine={self.remote_engine_path}",

If any of remote_lib_path, remote_bin_path, or remote_engine_path contain shell metacharacters, they'll be interpreted by the remote shell. Use shlex.quote on each component:

import shlex
remote_cmd = (
    f"LD_LIBRARY_PATH={shlex.quote(str(self.remote_options['remote_lib_path']))}:$LD_LIBRARY_PATH"
    f" {shlex.quote(trt_path)}"
    f" --loadEngine={shlex.quote(str(self.remote_engine_path))}"
)

3. Missing remote_port null check:
urlparse returns None for .port when no port is specified in the URL. This would produce -PNone in the scp command and None as the SSH -p argument. Add a guard:

if self.remote_port is None:
    self.remote_port = 22  # default SSH port

or raise a ValueError if a port is required.


🧠 Learnings used
Learnt from: kevalmorabia97
Repo: NVIDIA/Model-Optimizer PR: 974
File: modelopt/torch/puzzletron/pruning/pruning_utils.py:195-201
Timestamp: 2026-03-06T14:26:21.439Z
Learning: In NVIDIA/Model-Optimizer, for PyTorch >= 2.6, torch.load() calls without an explicit weights_only argument are safe. Do not flag bare torch.load(...) as a security issue in files under the modelopt package (e.g., modelopt/torch/puzzletron/pruning/pruning_utils.py) as long as the PyTorch version constraint is maintained. If supporting PyTorch < 2.6, require an explicit weights_only argument to torch.load() to avoid potential issues.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Partially addressed.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dthienan-nv, thanks for the continued updates! Here's an accurate status check against the current code:

Now fixed:

  • shlex.quote applied to remote_lib_path, trt_path, and remote_engine_path — command injection risk resolved.
  • remote_port null check added in the constructor — defaults to 22 if not specified in the URL.
  • sshpass with ssh_pass list applied correctly for both scp and ssh commands.

Still remaining:

# nosec comments (critical per guidelines): Three # nosec B603 comments remain in the run() method. Per SECURITY.md and the coding guidelines, these are not allowed without explicit review and approval by @NVIDIA/modelopt-setup-codeowners with justification in the PR description. Please either:

  1. Remove the # nosec comments and seek @NVIDIA/modelopt-setup-codeowners approval with a justification in the PR description explaining why subprocess.run with external commands is necessary and safe here, or
  2. If the project has existing approved patterns for subprocess use, align with those.

⚠️ Minor: The scp_cmd subprocess call still uses subprocess.run(scp_cmd) # nosec B603 without capture_output=True, text=True, so SCP failures won't have their stderr captured for logging. Consider adding it for consistency with the other calls.


🧠 Learnings used
Learnt from: kevalmorabia97
Repo: NVIDIA/Model-Optimizer PR: 974
File: modelopt/torch/puzzletron/pruning/pruning_utils.py:195-201
Timestamp: 2026-03-06T14:26:21.439Z
Learning: In NVIDIA/Model-Optimizer, for PyTorch >= 2.6, torch.load() calls without an explicit weights_only argument are safe. Do not flag bare torch.load(...) as a security issue in files under the modelopt package (e.g., modelopt/torch/puzzletron/pruning/pruning_utils.py) as long as the PyTorch version constraint is maintained. If supporting PyTorch < 2.6, require an explicit weights_only argument to torch.load() to avoid potential issues.

latency_pattern = safe_pattern
if result.returncode != 0:
# fallback and try trtexec with "--safe"
trt_path = f"{os.path.join(self.remote_bin_path, 'trtexec')}"
trtexec_safe_cmd = [
"ssh",
"-p",
f"{self.remote_port}",
f"{self.remote_user}@{self.remote_ip}",
f"{ld_path} {shlex.quote(trt_path)} --safe --loadEngine={shlex.quote(self.remote_engine_path)}",
]
trtexec_safe_cmd = ssh_pass + trtexec_safe_cmd

if not (match := re.search(self.latency_pattern, result.stdout, re.IGNORECASE)):
self.logger.warning("Could not parse median latency from trtexec output")
self.logger.debug(f"trtexec stdout:\n{result.stdout}")
result = subprocess.run(trtexec_safe_cmd, capture_output=True, text=True) # nosec B603
Comment thread
coderabbitai[bot] marked this conversation as resolved.
latency_pattern = std_pattern
if result.returncode != 0:
self.logger.error(
f"Failed to run trtexec_safe or trtexec with '--safe'\n {result.stdout}"
)
return float("inf")
if not (match := re.search(latency_pattern, result.stdout, re.IGNORECASE)):
self.logger.warning(f"trtexec stdout:\n{result.stdout}")
self.logger.error("Could not parse median latency from trtexec output")
return float("inf")
latency = float(match.group(1))
self.logger.info(f"TrtExec benchmark (median): {latency:.2f} ms")
Expand Down