Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
4c91fd6
Add function to retrieve comma-separated list of strings from environ…
dkosowski87 Mar 31, 2026
a1e7605
Add function to retrieve a string from environment variables with opt…
dkosowski87 Mar 31, 2026
b3b25cb
Add default input name configurations for YOLO Ultralytics in configu…
dkosowski87 Mar 31, 2026
f4aef5c
Enhance YOLOv8 ONNX model input handling by adding support for multip…
dkosowski87 Mar 31, 2026
9164869
Refactor YOLOv8 ONNX model input validation to enhance error handling…
dkosowski87 Apr 1, 2026
f53b1e8
Add a new script for running fused NMS inference with YOLOv8 model. T…
dkosowski87 Apr 1, 2026
91b1359
Add a new script for running fused NMS inference with YOLOv8 model. T…
dkosowski87 Apr 1, 2026
33acdfc
Integrate fused NMS post-processing in YOLOv8 ONNX model. Update the …
dkosowski87 Apr 1, 2026
f825854
Add benchmarking options to fused NMS inference script. Introduced co…
dkosowski87 Apr 1, 2026
ad6e986
Enhance fused NMS inference script with JSON reporting. Added functio…
dkosowski87 Apr 1, 2026
178a6fe
Add command-line options for run name and target directory in fused N…
dkosowski87 Apr 1, 2026
cbef2c6
Add ONNX execution provider options to fused NMS inference script. In…
dkosowski87 Apr 1, 2026
161a073
Add batch processing capability to fused NMS inference script. Introd…
dkosowski87 Apr 1, 2026
3b6e7ee
Refactor fused NMS inference script to support directory input for im…
dkosowski87 Apr 1, 2026
a8aa807
Update fused NMS inference script to improve batch processing and lat…
dkosowski87 Apr 1, 2026
245a56b
Refactor input handling in YOLOv8 ONNX object detection to use single…
dkosowski87 Apr 1, 2026
8e0dd2d
Add new script for running fused NMS inference with YOLOv8 model. Inc…
dkosowski87 Apr 1, 2026
768077b
Add new script for benchmarking fused NMS inference with YOLOv8 model…
dkosowski87 Apr 1, 2026
e8b26ac
Refactor ONNX input handling to support broadcast scalars and improve…
dkosowski87 Apr 2, 2026
c7b11a9
Add ONNX execution provider options to the fused NMS inference script…
dkosowski87 Apr 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
import json
import time
from pathlib import Path
from typing import Any, Optional

import click
import cv2
import numpy as np

from inference_models import AutoModel

TEST_BATCH_SIZE = 4


def _onnx_ep_preset_to_providers_and_device(
preset: str,
) -> tuple[list[str], str]:
"""Map CLI preset to ONNX Runtime provider chain and PyTorch device string."""
if preset == "cpu":
return (["CPUExecutionProvider"], "cpu")
if preset == "cuda":
return (["CUDAExecutionProvider", "CPUExecutionProvider"], "cuda")
if preset == "tensorrt":
return (
[
"TensorrtExecutionProvider",
"CUDAExecutionProvider",
"CPUExecutionProvider",
],
"cuda",
)
raise click.ClickException(f"Unknown onnx-execution-providers preset: {preset!r}")


def _latency_report_dict(
*,
model_path: Path,
warmup_runs: int,
latencies_ms: list[float],
onnx_execution_providers_preset: str,
onnx_execution_providers: list[str],
device: str,
batch_size: int,
images: list[Path],
) -> dict[str, Any]:
return {
"model_path": str(model_path.resolve()),
"images": [str(image.resolve()) for image in images],
"onnx_execution_providers_preset": onnx_execution_providers_preset,
"onnx_execution_providers": onnx_execution_providers,
"device": device,
"batch_size": batch_size,
"warmup_runs": warmup_runs,
"timed_runs": len(latencies_ms),
"mean_ms": np.mean(latencies_ms),
"p_50_ms": np.percentile(latencies_ms, 50),
"p_95_ms": np.percentile(latencies_ms, 95),
"p_99_ms": np.percentile(latencies_ms, 99),
"mean_per_image_ms": np.mean(latencies_ms) / batch_size,
"throughput_fps": (batch_size * len(latencies_ms)) / (np.sum(latencies_ms) / 1000),
}


def _write_json(path: Path, payload: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload, indent=2) + "\n")


@click.command()
@click.option(
"--run-name",
type=str,
required=True,
help="Name of the run for reporting. Will be used as a subdirectory in the target directory.",
)
@click.option(
"--image-dir",
type=click.Path(path_type=Path, exists=True, dir_okay=True, readable=True),
required=True,
help="Path to the input image directory.",
)
@click.option(
"--model-path",
type=click.Path(path_type=Path, exists=True, dir_okay=True, readable=True),
required=True,
help="Path to the model directory.",
)
@click.option(
"--target-dir",
type=click.Path(path_type=Path, file_okay=False),
required=True,
help="Directory for latency.json (created if missing).",
)
@click.option(
"--confidence",
type=float,
help="Confidence threshold used by post-processing.",
)
@click.option(
"--iou-threshold",
type=float,
help="IOU threshold used by post-processing.",
)
@click.option(
"--max-detections",
type=int,
help="Maximum number of detections used by post-processing.",
)
@click.option(
"-n",
"--benchmark-iters",
type=click.IntRange(min=1),
default=200,
show_default=True,
help=(
"Number of timed inference runs for benchmarking (mean/median/std in ms). "
"0 runs inference once without benchmark stats."
),
)
@click.option(
"--warmup",
type=click.IntRange(min=0),
default=20,
show_default=True,
help="Untimed warmup runs before timed iterations.",
)
@click.option(
"--onnx-execution-providers",
"onnx_ep_preset",
type=click.Choice(["cpu", "cuda", "tensorrt"], case_sensitive=False),
default="cpu",
show_default=True,
help=(
"ONNX Runtime execution provider chain: "
"cpu (CPUExecutionProvider); "
"cuda (CUDAExecutionProvider then CPUExecutionProvider); "
"tensorrt (TensorrtExecutionProvider, CUDA, then CPU fallbacks)."
),
)
def main(
run_name: str,
image_dir: Path,
model_path: Path,
target_dir: Path,
confidence: Optional[float] = None,
iou_threshold: Optional[float] = None,
max_detections: Optional[int] = None,
benchmark_iters: int = 200,
warmup: int = 20,
onnx_ep_preset: str = "cpu",
) -> None:
onnx_ep_preset = onnx_ep_preset.lower()
onnx_providers, device_str = _onnx_ep_preset_to_providers_and_device(onnx_ep_preset)

click.echo(
f"Loading model: {model_path} "
f"(onnx_execution_providers={onnx_providers!r}, device={device_str!r})"
)
model = AutoModel.from_pretrained(
model_path,
onnx_execution_providers=list(onnx_providers),
device=device_str,
)

click.echo(f"Fused NMS available: {model._inference_config.post_processing.fused}")

nms_params = {
"confidence": confidence,
"iou_threshold": iou_threshold,
"max_detections": max_detections,
}
nms_params = {name: value for name, value in nms_params.items() if value is not None}

if nms_params:
click.echo(f"User provided NMS parameters: {nms_params}")

forward_pass = model._inference_config.forward_pass
use_batching = forward_pass.static_batch_size is None

if use_batching:
click.echo(f"Model exported as dynamic. Using image batch")
else:
click.echo(f"Model exported as static. Using single image inference")

image_paths = list(image_dir.glob("*.jpg"))
batched_image_paths = image_paths[:TEST_BATCH_SIZE] if use_batching else image_paths[:1]

images = []
for image_path in batched_image_paths:
image = cv2.imread(str(image_path))
if image is None:
raise click.ClickException(f"Could not load image from: {image_path}")
images.append(image)

inputs = images[:TEST_BATCH_SIZE] if use_batching else images[0]

click.echo(f"Warmup: {warmup} untimed runs..." if warmup > 0 else "No warmup runs.")

for _ in range(warmup):
predictions = model(inputs, **nms_params)
_ = predictions[0].to_supervision()

click.echo(f"Benchmarking: {benchmark_iters} timed runs...")

latencies_ms: list[float] = []
for _ in range(benchmark_iters):
t0 = time.perf_counter()
predictions = model(inputs, **nms_params)
_ = predictions[0].to_supervision()
latencies_ms.append((time.perf_counter() - t0) * 1000.0)

click.echo("Writing reports ...")

target_dir.mkdir(parents=True, exist_ok=True)
latency_path = target_dir / run_name / "latency.json"
nms_params_path = target_dir / run_name / "nms_params.json"
inference_config_path = target_dir / run_name / "inference_config.json"

_write_json(
latency_path,
_latency_report_dict(
model_path=model_path,
warmup_runs=warmup,
latencies_ms=latencies_ms,
onnx_execution_providers_preset=onnx_ep_preset,
onnx_execution_providers=list(onnx_providers),
device=device_str,
batch_size=len(inputs) if isinstance(inputs, list) else 1,
images=batched_image_paths,
),
)
_write_json(inference_config_path, model._inference_config.model_dump_json())
_write_json(nms_params_path, nms_params)

click.echo("Done!")

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from pathlib import Path
from typing import Optional

import click
import cv2

from inference_models import AutoModel


def _onnx_ep_preset_to_providers_and_device(
preset: str,
) -> tuple[list[str], str]:
"""Map CLI preset to ONNX Runtime provider chain and PyTorch device string."""
if preset == "cpu":
return (["CPUExecutionProvider"], "cpu")
if preset == "cuda":
return (["CUDAExecutionProvider", "CPUExecutionProvider"], "cuda")
if preset == "tensorrt":
return (
[
"TensorrtExecutionProvider",
"CUDAExecutionProvider",
"CPUExecutionProvider",
],
"cuda",
)
raise click.ClickException(f"Unknown onnx-execution-providers preset: {preset!r}")


@click.command()
@click.option(
"--image-path",
type=click.Path(path_type=Path, exists=True, dir_okay=False, readable=True),
required=True,
help="Path to the input image.",
)
@click.option(
"--model-path",
type=click.Path(path_type=Path, exists=True, dir_okay=True, readable=True),
required=True,
help="Path to the model directory.",
)
@click.option(
"--confidence",
type=float,
help="Confidence threshold used by post-processing.",
)
@click.option(
"--iou-threshold",
type=float,
help="IOU threshold used by post-processing.",
)
@click.option(
"--max-detections",
type=int,
help="Maximum number of detections used by post-processing.",
)
@click.option(
"--onnx-execution-providers",
"onnx_ep_preset",
type=click.Choice(["cpu", "cuda", "tensorrt"], case_sensitive=False),
default="cpu",
show_default=True,
help=(
"ONNX Runtime execution provider chain: "
"cpu (CPUExecutionProvider); "
"cuda (CUDAExecutionProvider then CPUExecutionProvider); "
"tensorrt (TensorrtExecutionProvider, CUDA, then CPU fallbacks)."
),
)
def main(
image_path: Path,
model_path: Path,
confidence: Optional[float] = None,
iou_threshold: Optional[float] = None,
max_detections: Optional[int] = None,
onnx_ep_preset: str = "cpu",
) -> None:
image = cv2.imread(str(image_path))
if image is None:
raise click.ClickException(f"Could not load image from: {image_path}")

nms_params = {
"confidence": confidence,
"iou_threshold": iou_threshold,
"max_detections": max_detections,
}

nms_params = {name: value for name, value in nms_params.items() if value is not None}
if nms_params:
click.echo(f"User provided NMS parameters: {nms_params}")

onnx_ep_preset = onnx_ep_preset.lower()
onnx_providers, device_str = _onnx_ep_preset_to_providers_and_device(onnx_ep_preset)

click.echo(
f"Loading model: {model_path} "
f"(onnx_execution_providers={onnx_providers!r}, device={device_str!r})"
)
model = AutoModel.from_pretrained(
model_path,
onnx_execution_providers=list(onnx_providers),
device=device_str,
)

click.echo(f"Fused NMS available: {model._inference_config.post_processing.fused}")

forward_pass = model._inference_config.forward_pass
if forward_pass.static_batch_size is None:
max_dyn = forward_pass.max_dynamic_batch_size
if max_dyn is not None:
click.echo(
"Batching: dynamic mode (no static batch size); "
f"maximum batch size is {max_dyn}."
)
else:
click.echo(
"Batching: dynamic mode (no static batch size); "
"max_dynamic_batch_size is not set in the model config."
)

click.echo("Running inference...")
predictions = model(image, **nms_params)
detections = predictions[0].to_supervision()

click.echo(f"Detected {len(detections)} objects")
for idx, (xyxy, class_id, conf) in enumerate(
zip(detections.xyxy, detections.class_id, detections.confidence), start=1
):
x1, y1, x2, y2 = [int(v) for v in xyxy.tolist()]
click.echo(
f"[{idx}] class_id={int(class_id)} confidence={float(conf):.4f} "
f"bbox=({x1}, {y1}, {x2}, {y2})"
)


if __name__ == "__main__":
main()
Loading
Loading