diff --git a/inference_models/examples/fused-nms/run_fused_nms_inference_benchmarking.py b/inference_models/examples/fused-nms/run_fused_nms_inference_benchmarking.py new file mode 100644 index 0000000000..d59be4276b --- /dev/null +++ b/inference_models/examples/fused-nms/run_fused_nms_inference_benchmarking.py @@ -0,0 +1,238 @@ +import json +import time +from pathlib import Path +from typing import Any, Optional + +import click +import cv2 +import numpy as np + +from inference_models import AutoModel + +TEST_BATCH_SIZE = 4 + + +def _onnx_ep_preset_to_providers_and_device( + preset: str, +) -> tuple[list[str], str]: + """Map CLI preset to ONNX Runtime provider chain and PyTorch device string.""" + if preset == "cpu": + return (["CPUExecutionProvider"], "cpu") + if preset == "cuda": + return (["CUDAExecutionProvider", "CPUExecutionProvider"], "cuda") + if preset == "tensorrt": + return ( + [ + "TensorrtExecutionProvider", + "CUDAExecutionProvider", + "CPUExecutionProvider", + ], + "cuda", + ) + raise click.ClickException(f"Unknown onnx-execution-providers preset: {preset!r}") + + +def _latency_report_dict( + *, + model_path: Path, + warmup_runs: int, + latencies_ms: list[float], + onnx_execution_providers_preset: str, + onnx_execution_providers: list[str], + device: str, + batch_size: int, + images: list[Path], +) -> dict[str, Any]: + return { + "model_path": str(model_path.resolve()), + "images": [str(image.resolve()) for image in images], + "onnx_execution_providers_preset": onnx_execution_providers_preset, + "onnx_execution_providers": onnx_execution_providers, + "device": device, + "batch_size": batch_size, + "warmup_runs": warmup_runs, + "timed_runs": len(latencies_ms), + "mean_ms": np.mean(latencies_ms), + "p_50_ms": np.percentile(latencies_ms, 50), + "p_95_ms": np.percentile(latencies_ms, 95), + "p_99_ms": np.percentile(latencies_ms, 99), + "mean_per_image_ms": np.mean(latencies_ms) / batch_size, + "throughput_fps": (batch_size * len(latencies_ms)) / (np.sum(latencies_ms) / 1000), + } + + +def _write_json(path: Path, payload: dict[str, Any]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload, indent=2) + "\n") + + +@click.command() +@click.option( + "--run-name", + type=str, + required=True, + help="Name of the run for reporting. Will be used as a subdirectory in the target directory.", +) +@click.option( + "--image-dir", + type=click.Path(path_type=Path, exists=True, dir_okay=True, readable=True), + required=True, + help="Path to the input image directory.", +) +@click.option( + "--model-path", + type=click.Path(path_type=Path, exists=True, dir_okay=True, readable=True), + required=True, + help="Path to the model directory.", +) +@click.option( + "--target-dir", + type=click.Path(path_type=Path, file_okay=False), + required=True, + help="Directory for latency.json (created if missing).", +) +@click.option( + "--confidence", + type=float, + help="Confidence threshold used by post-processing.", +) +@click.option( + "--iou-threshold", + type=float, + help="IOU threshold used by post-processing.", +) +@click.option( + "--max-detections", + type=int, + help="Maximum number of detections used by post-processing.", +) +@click.option( + "-n", + "--benchmark-iters", + type=click.IntRange(min=1), + default=200, + show_default=True, + help=( + "Number of timed inference runs for benchmarking (mean/median/std in ms). " + "0 runs inference once without benchmark stats." + ), +) +@click.option( + "--warmup", + type=click.IntRange(min=0), + default=20, + show_default=True, + help="Untimed warmup runs before timed iterations.", +) +@click.option( + "--onnx-execution-providers", + "onnx_ep_preset", + type=click.Choice(["cpu", "cuda", "tensorrt"], case_sensitive=False), + default="cpu", + show_default=True, + help=( + "ONNX Runtime execution provider chain: " + "cpu (CPUExecutionProvider); " + "cuda (CUDAExecutionProvider then CPUExecutionProvider); " + "tensorrt (TensorrtExecutionProvider, CUDA, then CPU fallbacks)." + ), +) +def main( + run_name: str, + image_dir: Path, + model_path: Path, + target_dir: Path, + confidence: Optional[float] = None, + iou_threshold: Optional[float] = None, + max_detections: Optional[int] = None, + benchmark_iters: int = 200, + warmup: int = 20, + onnx_ep_preset: str = "cpu", +) -> None: + onnx_ep_preset = onnx_ep_preset.lower() + onnx_providers, device_str = _onnx_ep_preset_to_providers_and_device(onnx_ep_preset) + + click.echo( + f"Loading model: {model_path} " + f"(onnx_execution_providers={onnx_providers!r}, device={device_str!r})" + ) + model = AutoModel.from_pretrained( + model_path, + onnx_execution_providers=list(onnx_providers), + device=device_str, + ) + + click.echo(f"Fused NMS available: {model._inference_config.post_processing.fused}") + + nms_params = { + "confidence": confidence, + "iou_threshold": iou_threshold, + "max_detections": max_detections, + } + nms_params = {name: value for name, value in nms_params.items() if value is not None} + + if nms_params: + click.echo(f"User provided NMS parameters: {nms_params}") + + forward_pass = model._inference_config.forward_pass + use_batching = forward_pass.static_batch_size is None + + if use_batching: + click.echo(f"Model exported as dynamic. Using image batch") + else: + click.echo(f"Model exported as static. Using single image inference") + + image_paths = list(image_dir.glob("*.jpg")) + batched_image_paths = image_paths[:TEST_BATCH_SIZE] if use_batching else image_paths[:1] + + images = [] + for image_path in batched_image_paths: + image = cv2.imread(str(image_path)) + if image is None: + raise click.ClickException(f"Could not load image from: {image_path}") + images.append(image) + + inputs = images[:TEST_BATCH_SIZE] if use_batching else images[0] + + click.echo(f"Warmup: {warmup} untimed runs..." if warmup > 0 else "No warmup runs.") + + for _ in range(warmup): + predictions = model(inputs, **nms_params) + _ = predictions[0].to_supervision() + + click.echo(f"Benchmarking: {benchmark_iters} timed runs...") + + latencies_ms: list[float] = [] + for _ in range(benchmark_iters): + t0 = time.perf_counter() + predictions = model(inputs, **nms_params) + _ = predictions[0].to_supervision() + latencies_ms.append((time.perf_counter() - t0) * 1000.0) + + click.echo("Writing reports ...") + + target_dir.mkdir(parents=True, exist_ok=True) + latency_path = target_dir / run_name / "latency.json" + nms_params_path = target_dir / run_name / "nms_params.json" + inference_config_path = target_dir / run_name / "inference_config.json" + + _write_json( + latency_path, + _latency_report_dict( + model_path=model_path, + warmup_runs=warmup, + latencies_ms=latencies_ms, + onnx_execution_providers_preset=onnx_ep_preset, + onnx_execution_providers=list(onnx_providers), + device=device_str, + batch_size=len(inputs) if isinstance(inputs, list) else 1, + images=batched_image_paths, + ), + ) + _write_json(inference_config_path, model._inference_config.model_dump_json()) + _write_json(nms_params_path, nms_params) + + click.echo("Done!") + +if __name__ == "__main__": + main() diff --git a/inference_models/examples/fused-nms/run_single_fused_nms_inference.py b/inference_models/examples/fused-nms/run_single_fused_nms_inference.py new file mode 100644 index 0000000000..6c5f0b48c5 --- /dev/null +++ b/inference_models/examples/fused-nms/run_single_fused_nms_inference.py @@ -0,0 +1,138 @@ +from pathlib import Path +from typing import Optional + +import click +import cv2 + +from inference_models import AutoModel + + +def _onnx_ep_preset_to_providers_and_device( + preset: str, +) -> tuple[list[str], str]: + """Map CLI preset to ONNX Runtime provider chain and PyTorch device string.""" + if preset == "cpu": + return (["CPUExecutionProvider"], "cpu") + if preset == "cuda": + return (["CUDAExecutionProvider", "CPUExecutionProvider"], "cuda") + if preset == "tensorrt": + return ( + [ + "TensorrtExecutionProvider", + "CUDAExecutionProvider", + "CPUExecutionProvider", + ], + "cuda", + ) + raise click.ClickException(f"Unknown onnx-execution-providers preset: {preset!r}") + + +@click.command() +@click.option( + "--image-path", + type=click.Path(path_type=Path, exists=True, dir_okay=False, readable=True), + required=True, + help="Path to the input image.", +) +@click.option( + "--model-path", + type=click.Path(path_type=Path, exists=True, dir_okay=True, readable=True), + required=True, + help="Path to the model directory.", +) +@click.option( + "--confidence", + type=float, + help="Confidence threshold used by post-processing.", +) +@click.option( + "--iou-threshold", + type=float, + help="IOU threshold used by post-processing.", +) +@click.option( + "--max-detections", + type=int, + help="Maximum number of detections used by post-processing.", +) +@click.option( + "--onnx-execution-providers", + "onnx_ep_preset", + type=click.Choice(["cpu", "cuda", "tensorrt"], case_sensitive=False), + default="cpu", + show_default=True, + help=( + "ONNX Runtime execution provider chain: " + "cpu (CPUExecutionProvider); " + "cuda (CUDAExecutionProvider then CPUExecutionProvider); " + "tensorrt (TensorrtExecutionProvider, CUDA, then CPU fallbacks)." + ), +) +def main( + image_path: Path, + model_path: Path, + confidence: Optional[float] = None, + iou_threshold: Optional[float] = None, + max_detections: Optional[int] = None, + onnx_ep_preset: str = "cpu", +) -> None: + image = cv2.imread(str(image_path)) + if image is None: + raise click.ClickException(f"Could not load image from: {image_path}") + + nms_params = { + "confidence": confidence, + "iou_threshold": iou_threshold, + "max_detections": max_detections, + } + + nms_params = {name: value for name, value in nms_params.items() if value is not None} + if nms_params: + click.echo(f"User provided NMS parameters: {nms_params}") + + onnx_ep_preset = onnx_ep_preset.lower() + onnx_providers, device_str = _onnx_ep_preset_to_providers_and_device(onnx_ep_preset) + + click.echo( + f"Loading model: {model_path} " + f"(onnx_execution_providers={onnx_providers!r}, device={device_str!r})" + ) + model = AutoModel.from_pretrained( + model_path, + onnx_execution_providers=list(onnx_providers), + device=device_str, + ) + + click.echo(f"Fused NMS available: {model._inference_config.post_processing.fused}") + + forward_pass = model._inference_config.forward_pass + if forward_pass.static_batch_size is None: + max_dyn = forward_pass.max_dynamic_batch_size + if max_dyn is not None: + click.echo( + "Batching: dynamic mode (no static batch size); " + f"maximum batch size is {max_dyn}." + ) + else: + click.echo( + "Batching: dynamic mode (no static batch size); " + "max_dynamic_batch_size is not set in the model config." + ) + + click.echo("Running inference...") + predictions = model(image, **nms_params) + detections = predictions[0].to_supervision() + + click.echo(f"Detected {len(detections)} objects") + for idx, (xyxy, class_id, conf) in enumerate( + zip(detections.xyxy, detections.class_id, detections.confidence), start=1 + ): + x1, y1, x2, y2 = [int(v) for v in xyxy.tolist()] + click.echo( + f"[{idx}] class_id={int(class_id)} confidence={float(conf):.4f} " + f"bbox=({x1}, {y1}, {x2}, {y2})" + ) + + +if __name__ == "__main__": + main() diff --git a/inference_models/inference_models/configuration.py b/inference_models/inference_models/configuration.py index a0a34b39fb..21d8b88b0d 100644 --- a/inference_models/inference_models/configuration.py +++ b/inference_models/inference_models/configuration.py @@ -5,8 +5,10 @@ from inference_models.utils.environment import ( get_boolean_from_env, get_comma_separated_list_of_integers_from_env, + get_comma_separated_list_of_strings_from_env, get_float_from_env, get_integer_from_env, + get_string_from_env, parse_comma_separated_values, ) @@ -350,6 +352,31 @@ variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CLASS_AGNOSTIC_NMS", default=INFERENCE_MODELS_DEFAULT_CLASS_AGNOSTIC_NMS, ) +INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IMAGES_INPUT_NAME = get_string_from_env( + variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IMAGES_INPUT_NAME", + default="images", +) +INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE_INPUT_NAME = get_string_from_env( + variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE_INPUT_NAME", + default="confidence", +) +INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD_INPUT_NAME = get_string_from_env( + variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD_INPUT_NAME", + default="iou_threshold", +) +INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS_INPUT_NAME = get_string_from_env( + variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS_INPUT_NAME", + default="max_output_boxes_per_class", +) +INFERENCE_MODELS_YOLO_ULTRALYTICS_DECLARED_FUSED_NMS_INPUT_NAMES = get_comma_separated_list_of_strings_from_env( + variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DECLARED_FUSED_NMS_INPUT_NAMES", + default=[ + INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IMAGES_INPUT_NAME, + INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE_INPUT_NAME, + INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD_INPUT_NAME, + INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS_INPUT_NAME, + ], +) INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_KEY_POINTS_THRESHOLD = get_float_from_env( variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_KEY_POINTS_THRESHOLD", default=0.0, diff --git a/inference_models/inference_models/models/common/onnx.py b/inference_models/inference_models/models/common/onnx.py index 510e1d1682..b99f5289b7 100644 --- a/inference_models/inference_models/models/common/onnx.py +++ b/inference_models/inference_models/models/common/onnx.py @@ -198,8 +198,10 @@ def run_onnx_session_with_batch_size_limit( Args: session: ONNX Runtime inference session. - inputs: Dictionary mapping input names to PyTorch tensors. All tensors - must have the same batch size (first dimension). + inputs: Dictionary mapping input names to PyTorch tensors. Tensors that + participate in the main batch must share the same size on dimension 0. + Tensors with dimension 0 equal to 1 are treated as broadcast scalars + and are not split across chunks. output_shape_mapping: Optional dictionary mapping output names to their expected shapes. Used for pre-allocating output buffers. If None, @@ -217,7 +219,8 @@ def run_onnx_session_with_batch_size_limit( the model's output specification. Raises: - ModelRuntimeError: If input tensors have different batch sizes. + ModelInputError: If dimension-0 sizes are incompatible (for example two + different batch sizes both greater than 1). Examples: Run inference with batch size limit: @@ -257,7 +260,7 @@ def run_onnx_session_with_batch_size_limit( - Automatically handles batch splitting and result concatenation - Pads the last chunk if min_batch_size is specified - Uses `run_onnx_session_via_iobinding()` internally for efficiency - - All input tensors must have the same batch size + - Broadcast inputs with batch dimension 1 are supported alongside batched tensors See Also: - `run_onnx_session_via_iobinding()`: Lower-level ONNX execution @@ -269,20 +272,24 @@ def run_onnx_session_with_batch_size_limit( inputs=inputs, output_shape_mapping=output_shape_mapping, ) - input_batch_sizes = set() - for input_tensor in inputs.values(): - input_batch_sizes.add(input_tensor.shape[0]) - if len(input_batch_sizes) != 1: + + batch_input_sizes = [tensor.shape[0] for tensor in inputs.values() if tensor.numel() != 1] + batch_size = max(batch_input_sizes) + is_incompatible_batch_size_set = [ + size for size in batch_input_sizes if size != batch_size + ] + if is_incompatible_batch_size_set: raise ModelInputError( - message="When running forward pass through ONNX model detected inputs with different batch sizes. " - "This is the error with the model you run. If the model was trained or exported " - "on Roboflow platform - contact us to get help. Otherwise, verify your model package or " - "implementation of the model class.", + message="When running forward pass through ONNX model detected inputs with incompatible sizes on " + "dimension 0. Expected each tensor to have either size 1 (scalar/broadcast inputs) or the same " + f"primary batch size ({batch_size}). Got distinct sizes: {sorted(set(batch_input_sizes))!r}. " + "If the model was trained or exported on Roboflow platform, contact us for help. Otherwise, " + "verify your model package or implementation of the model class.", help_url="https://inference-models.roboflow.com/errors/input-validation/#modelinputerror", ) - input_batch_size = input_batch_sizes.pop() - if input_batch_size <= max_batch_size and ( - min_batch_size is None or input_batch_size >= min_batch_size + + if batch_size <= max_batch_size and ( + min_batch_size is None or batch_size >= min_batch_size ): # no point iterating return run_onnx_session_via_iobinding( @@ -293,25 +300,28 @@ def run_onnx_session_with_batch_size_limit( all_results = [] for _ in session.get_outputs(): all_results.append([]) - for i in range(0, input_batch_size, max_batch_size): + for i in range(0, batch_size, max_batch_size): batch_inputs = {} reminder = 0 for name, value in inputs.items(): - batched_value = value[i : i + max_batch_size] - if min_batch_size is not None: - reminder = min_batch_size - batched_value.shape[0] - if reminder > 0: - batched_value = torch.cat( - ( - batched_value, - torch.zeros( - (reminder,) + batched_value.shape[1:], - dtype=batched_value.dtype, - device=batched_value.device, + if value.shape[0] == batch_size: + batched_value = value[i : i + max_batch_size] + if min_batch_size is not None: + reminder = min_batch_size - batched_value.shape[0] + if reminder > 0: + batched_value = torch.cat( + ( + batched_value, + torch.zeros( + (reminder,) + batched_value.shape[1:], + dtype=batched_value.dtype, + device=batched_value.device, + ), ), - ), - dim=0, - ) + dim=0, + ) + else: + batched_value = value batched_value = batched_value.contiguous() batch_inputs[name] = batched_value batch_output_shape_mapping = None diff --git a/inference_models/inference_models/models/yolov8/yolov8_object_detection_onnx.py b/inference_models/inference_models/models/yolov8/yolov8_object_detection_onnx.py index e7607a7c12..d6ef1a65de 100644 --- a/inference_models/inference_models/models/yolov8/yolov8_object_detection_onnx.py +++ b/inference_models/inference_models/models/yolov8/yolov8_object_detection_onnx.py @@ -11,8 +11,14 @@ INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE, INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD, INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS, + INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IMAGES_INPUT_NAME, + INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE_INPUT_NAME, + INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD_INPUT_NAME, + INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS_INPUT_NAME, + INFERENCE_MODELS_YOLO_ULTRALYTICS_DECLARED_FUSED_NMS_INPUT_NAMES, ) from inference_models.entities import ColorFormat +from inference_models.logger import LOGGER from inference_models.errors import ( CorruptedModelPackageError, EnvironmentConfigurationError, @@ -127,13 +133,61 @@ def from_pretrained( path_or_bytes=model_package_content["weights.onnx"], providers=onnx_execution_providers, ) - input_batch_size = session.get_inputs()[0].shape[0] + onnx_graph_inputs = session.get_inputs() + input_names = [input.name for input in onnx_graph_inputs] + + if inference_config.post_processing.fused: + expected_fused_nms_inputs = ( + INFERENCE_MODELS_YOLO_ULTRALYTICS_DECLARED_FUSED_NMS_INPUT_NAMES + ) + expected_fused_nms_input_set = set(expected_fused_nms_inputs) + if ( + INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IMAGES_INPUT_NAME + not in input_names + ): + raise CorruptedModelPackageError( + message=( + f"Fused NMS YOLOv8 ONNX model must declare the images input " + f"({INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IMAGES_INPUT_NAME!r}). " + f"Got: {input_names}" + ), + help_url="https://inference-models.roboflow.com/errors/model-loading/#corruptedmodelpackageerror", + ) + + unexpected_inputs = [ + name for name in input_names if name not in expected_fused_nms_input_set + ] + if unexpected_inputs: + raise CorruptedModelPackageError( + message=( + f"Fused NMS YOLOv8 ONNX model has unexpected inputs {unexpected_inputs}. " + f"Expected each name to be one of: {expected_fused_nms_input_set}" + ), + help_url="https://inference-models.roboflow.com/errors/model-loading/#corruptedmodelpackageerror", + ) + + missing_inputs = [ + name for name in expected_fused_nms_inputs if name not in input_names + ] + if missing_inputs: + LOGGER.warning( + "Fused NMS ONNX graph omits inputs %s; they will not be passed at " + "inference time and ONNX Runtime will use graph initializer defaults for those parameters. " + "Python arguments matching omitted inputs (e.g. confidence, iou_threshold, max_detections) " + "will not affect the fused NMS stage.", + missing_inputs, + ) + + images_input = [graph_input for graph_input in onnx_graph_inputs if graph_input.name == INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IMAGES_INPUT_NAME][0] + input_batch_size = images_input.shape[0] + + # Dynamic batch size export results in "batch" string as dimension 0 representation if isinstance(input_batch_size, str): input_batch_size = None - input_name = session.get_inputs()[0].name + return cls( session=session, - input_name=input_name, + input_names=input_names, class_names=class_names, inference_config=inference_config, device=device, @@ -143,14 +197,14 @@ def from_pretrained( def __init__( self, session: onnxruntime.InferenceSession, - input_name: str, + input_names: List[str], inference_config: InferenceConfig, class_names: List[str], device: torch.device, input_batch_size: Optional[int], ): self._session = session - self._input_name = input_name + self._input_names = input_names self._inference_config = inference_config self._class_names = class_names self._device = device @@ -184,11 +238,40 @@ def pre_process( pre_processing_overrides=pre_processing_overrides, ) - def forward(self, pre_processed_images: torch.Tensor, **kwargs) -> torch.Tensor: + def forward( + self, + pre_processed_images: torch.Tensor, + confidence: Optional[float] = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE, + iou_threshold: Optional[float] = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD, + max_detections: Optional[int] = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS, + **kwargs, + ) -> torch.Tensor: with self._session_thread_lock: + device = pre_processed_images.device + + input_builders = { + INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IMAGES_INPUT_NAME: lambda: pre_processed_images, + } + + if self._inference_config.post_processing.fused: + if INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE_INPUT_NAME in self._input_names: + input_builders[INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE_INPUT_NAME] = lambda: torch.tensor( + float(confidence), dtype=torch.float32, device=device + ) + if INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD_INPUT_NAME in self._input_names: + input_builders[INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD_INPUT_NAME] = lambda: torch.tensor( + [float(iou_threshold)], dtype=torch.float32, device=device + ) + if INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS_INPUT_NAME in self._input_names: + input_builders[INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS_INPUT_NAME] = lambda: torch.tensor( + [int(max_detections)], dtype=torch.int64, device=device + ) + + inputs = {name: builder_fn() for name, builder_fn in input_builders.items()} + return run_onnx_session_with_batch_size_limit( session=self._session, - inputs={self._input_name: pre_processed_images}, + inputs=inputs, min_batch_size=self._min_batch_size, max_batch_size=self._max_batch_size, )[0] diff --git a/inference_models/inference_models/utils/environment.py b/inference_models/inference_models/utils/environment.py index e43fd35553..b23c007d17 100644 --- a/inference_models/inference_models/utils/environment.py +++ b/inference_models/inference_models/utils/environment.py @@ -72,6 +72,39 @@ def get_comma_separated_list_of_integers_from_env( ) +def get_comma_separated_list_of_strings_from_env( + variable_name: str, + default: Optional[List[str]] = None, +) -> List[str]: + value = os.getenv(variable_name) + if value is None: + if default is None: + raise InvalidEnvVariable( + message=f"Environment variable {variable_name} is required", + help_url="https://inference-models.roboflow.com/errors/runtime-environment/#invalidenvvariable", + ) + return default + try: + return [v.strip() for v in parse_comma_separated_values(value)] + except ValueError: + raise InvalidEnvVariable( + message=f"Expected a environment variable `{variable_name}` to be comma separated list of strings but got '{value}'", + help_url="https://inference-models.roboflow.com/errors/runtime-environment/#invalidenvvariable", + ) + + +def get_string_from_env(variable_name: str, default: Optional[str] = None) -> str: + value = os.getenv(variable_name) + if value is None: + if default is None: + raise InvalidEnvVariable( + message=f"Environment variable {variable_name} is required", + help_url="https://inference-models.roboflow.com/errors/runtime-environment/#invalidenvvariable", + ) + return default + return value.strip() + + def parse_comma_separated_values(values: str) -> List[str]: if not values: return []