From d2fb4ffe3bdf1fc0dcb7224fdc7288c38f85bf06 Mon Sep 17 00:00:00 2001 From: Lee Clement Date: Mon, 6 Apr 2026 14:11:12 -0230 Subject: [PATCH 1/4] backwards compatible support yololite onnx fused nms --- .../models/auto_loaders/models_registry.py | 9 ++- .../yololite_object_detection_onnx.py | 77 +++++++++++-------- 2 files changed, 51 insertions(+), 35 deletions(-) diff --git a/inference_models/inference_models/models/auto_loaders/models_registry.py b/inference_models/inference_models/models/auto_loaders/models_registry.py index c892e3499b..b89b21828e 100644 --- a/inference_models/inference_models/models/auto_loaders/models_registry.py +++ b/inference_models/inference_models/models/auto_loaders/models_registry.py @@ -255,9 +255,12 @@ class RegistryEntry: module_name="inference_models.models.yolo26.yolo26_instance_segmentation_trt", class_name="YOLO26ForInstanceSegmentationTRT", ), - ("yololite", OBJECT_DETECTION_TASK, BackendType.ONNX): LazyClass( - module_name="inference_models.models.yololite.yololite_object_detection_onnx", - class_name="YOLOLiteForObjectDetectionOnnx", + ("yololite", OBJECT_DETECTION_TASK, BackendType.ONNX): RegistryEntry( + model_class=LazyClass( + module_name="inference_models.models.yololite.yololite_object_detection_onnx", + class_name="YOLOLiteForObjectDetectionOnnx", + ), + supported_model_features={"nms_fused"}, ), ("paligemma-2", VLM_TASK, BackendType.HF): LazyClass( module_name="inference_models.models.paligemma.paligemma_hf", diff --git a/inference_models/inference_models/models/yololite/yololite_object_detection_onnx.py b/inference_models/inference_models/models/yololite/yololite_object_detection_onnx.py index a888279c2c..4daa741690 100644 --- a/inference_models/inference_models/models/yololite/yololite_object_detection_onnx.py +++ b/inference_models/inference_models/models/yololite/yololite_object_detection_onnx.py @@ -30,6 +30,7 @@ parse_inference_config, ) from inference_models.models.common.roboflow.post_processing import ( + post_process_nms_fused_model_output, rescale_detections, run_nms_for_object_detection, ) @@ -198,38 +199,12 @@ def post_process( class_agnostic_nms: bool = INFERENCE_MODELS_YOLOLITE_DEFAULT_CLASS_AGNOSTIC_NMS, **kwargs, ) -> List[Detections]: - # YOLOLite decoded export outputs 3 tensors: - # boxes_xyxy: [B, N, 4] - decoded bounding boxes in xyxy pixel coords - # obj_logits: [B, N, 1] - objectness logits (pre-sigmoid) - # cls_logits: [B, N, C] - class logits (pre-sigmoid) - boxes_xyxy, obj_logits, cls_logits = ( - model_results[0], - model_results[1], - model_results[2], - ) - - # Apply sigmoid to convert logits to probabilities - obj_conf = torch.sigmoid(obj_logits) # [B, N, 1] - cls_conf = torch.sigmoid(cls_logits) # [B, N, C] - - # Combined score: objectness * class confidence - combined_scores = obj_conf * cls_conf # [B, N, C] - - # Reshape to [B, 4+C, N] format expected by run_nms_for_object_detection: - # channels 0-3: box coords (xyxy) - # channels 4+: class scores - boxes_t = boxes_xyxy.permute(0, 2, 1) # [B, 4, N] - scores_t = combined_scores.permute(0, 2, 1) # [B, C, N] - nms_input = torch.cat([boxes_t, scores_t], dim=1) # [B, 4+C, N] - - nms_results = run_nms_for_object_detection( - output=nms_input, - conf_thresh=confidence, - iou_thresh=iou_threshold, - max_detections=max_detections, - class_agnostic=class_agnostic_nms, - box_format="xyxy", - ) + if self._inference_config.post_processing.fused: + nms_results = self._post_process_fused(model_results, confidence) + else: + nms_results = self._post_process_unfused( + model_results, confidence, iou_threshold, max_detections, class_agnostic_nms, + ) rescaled_results = rescale_detections( detections=nms_results, images_metadata=pre_processing_meta, @@ -244,3 +219,41 @@ def post_process( ) ) return results + + def _post_process_fused( + self, + model_results: Tuple[torch.Tensor, ...], + confidence: float, + ) -> List[torch.Tensor]: + # Single output tensor [B, max_det, 6]: x1, y1, x2, y2, conf, class_id + output = model_results[0] + return post_process_nms_fused_model_output(output=output, conf_thresh=confidence) + + def _post_process_unfused( + self, + model_results: Tuple[torch.Tensor, ...], + confidence: float, + iou_threshold: float, + max_detections: int, + class_agnostic_nms: bool, + ) -> List[torch.Tensor]: + # Decoded outputs without fused NMS: boxes_xyxy [B,N,4], obj_logits [B,N,1], cls_logits [B,N,C] + boxes_xyxy, obj_logits, cls_logits = ( + model_results[0], model_results[1], model_results[2], + ) + obj_conf = torch.sigmoid(obj_logits) + cls_conf = torch.sigmoid(cls_logits) + combined_scores = obj_conf * cls_conf + + boxes_t = boxes_xyxy.permute(0, 2, 1) + scores_t = combined_scores.permute(0, 2, 1) + nms_input = torch.cat([boxes_t, scores_t], dim=1) + + return run_nms_for_object_detection( + output=nms_input, + conf_thresh=confidence, + iou_thresh=iou_threshold, + max_detections=max_detections, + class_agnostic=class_agnostic_nms, + box_format="xyxy", + ) From 725008255bc550a41698517aac1f40d554179eeb Mon Sep 17 00:00:00 2001 From: Lee Clement Date: Mon, 6 Apr 2026 14:58:07 -0230 Subject: [PATCH 2/4] update tests --- .../integration_tests/models/conftest.py | 26 ++- ...olite_object_detection_predictions_onnx.py | 187 ++++++++++++------ 2 files changed, 148 insertions(+), 65 deletions(-) diff --git a/inference_models/tests/integration_tests/models/conftest.py b/inference_models/tests/integration_tests/models/conftest.py index 78bed02c8e..e004ae0427 100644 --- a/inference_models/tests/integration_tests/models/conftest.py +++ b/inference_models/tests/integration_tests/models/conftest.py @@ -73,7 +73,9 @@ COIN_COUNTING_YOLACT_ONNX_STATIC_BS_STATIC_CROP_STRETCH_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/yolact-static-bs-static-crop-stretch-onnx.zip" COIN_COUNTING_YOLACT_ONNX_STATIC_BS_STRETCH_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/yolact-static-bs-stretch-onnx.zip" -COIN_COUNTING_YOLOLITE_N_ONNX_DYNAMIC_BS_LETTERBOX_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/coin-counting-yololite-n-onnx-dynamic-bs-letterbox.zip" +COIN_COUNTING_YOLOLITE_EDGE_N_ONNX_STATIC_BS_STRETCH_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/coin-counting-yololite-edge-n-onnx-static-bs-stretch.zip" +COIN_COUNTING_YOLOLITE_EDGE_N_ONNX_DYNAMIC_BS_STRETCH_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/coin-counting-yololite-edge-n-onnx-dynamic-bs-stretch.zip" +COIN_COUNTING_YOLOLITE_EDGE_N_ONNX_DYNAMIC_BS_STRETCH_FUSED_NMS_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/coin-counting-yololite-edge-n-onnx-dynamic-bs-stretch-fused-nms.zip" ASL_YOLOV8N_SEG_ONNX_DYNAMIC_BS_STRETCH_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/yolov8n-seg-onnx-dynamic-bs-stretch.zip" ASL_YOLOV8N_SEG_ONNX_DYNAMIC_BS_STRETCH_FUSED_NMS_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/yolov8n-seg-onnx-dynamic-bs-stretch-fused-nms.zip" @@ -700,10 +702,26 @@ def coin_counting_yolo_nas_onnx_static_bs_center_crop_package() -> str: @pytest.fixture(scope="module") -def coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package() -> str: +def coin_counting_yololite_edge_n_onnx_static_bs_stretch_package() -> str: return download_model_package( - model_package_zip_url=COIN_COUNTING_YOLOLITE_N_ONNX_DYNAMIC_BS_LETTERBOX_URL, - package_name="coin-counting-yololite-n-onnx-dynamic-bs-letterbox", + model_package_zip_url=COIN_COUNTING_YOLOLITE_EDGE_N_ONNX_STATIC_BS_STRETCH_URL, + package_name="coin-counting-yololite-edge-n-onnx-static-bs-stretch", + ) + + +@pytest.fixture(scope="module") +def coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package() -> str: + return download_model_package( + model_package_zip_url=COIN_COUNTING_YOLOLITE_EDGE_N_ONNX_DYNAMIC_BS_STRETCH_URL, + package_name="coin-counting-yololite-edge-n-onnx-dynamic-bs-stretch", + ) + + +@pytest.fixture(scope="module") +def coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_fused_nms_package() -> str: + return download_model_package( + model_package_zip_url=COIN_COUNTING_YOLOLITE_EDGE_N_ONNX_DYNAMIC_BS_STRETCH_FUSED_NMS_URL, + package_name="coin-counting-yololite-edge-n-onnx-dynamic-bs-stretch-fused-nms", ) diff --git a/inference_models/tests/integration_tests/models/test_yololite_object_detection_predictions_onnx.py b/inference_models/tests/integration_tests/models/test_yololite_object_detection_predictions_onnx.py index aaa135dae6..669c49f64b 100644 --- a/inference_models/tests/integration_tests/models/test_yololite_object_detection_predictions_onnx.py +++ b/inference_models/tests/integration_tests/models/test_yololite_object_detection_predictions_onnx.py @@ -3,23 +3,51 @@ import torch +# ── Static non-fused (batch=1) ────────────────────────────────────────────── + + @pytest.mark.slow @pytest.mark.onnx_extras -def test_onnx_package_with_dynamic_batch_size_and_letterbox_numpy( - coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package: str, +def test_static_non_fused_numpy( + coin_counting_yololite_edge_n_onnx_static_bs_stretch_package: str, coins_counting_image_numpy: np.ndarray, ) -> None: - # given from inference_models.models.yololite.yololite_object_detection_onnx import ( YOLOLiteForObjectDetectionOnnx, ) model = YOLOLiteForObjectDetectionOnnx.from_pretrained( - model_name_or_path=coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package, + model_name_or_path=coin_counting_yololite_edge_n_onnx_static_bs_stretch_package, onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"], ) + predictions = model( + coins_counting_image_numpy, confidence=0.25, iou_threshold=0.45 + ) + + assert len(predictions) == 1 + assert predictions[0].xyxy.shape[1] == 4 + assert len(predictions[0].confidence) > 0 + assert torch.all(predictions[0].confidence >= 0.25) + assert torch.all(predictions[0].confidence <= 1.0) + + +# ── Dynamic non-fused ──────────────────────────────────────────────────────── + + +@pytest.mark.slow +@pytest.mark.onnx_extras +def test_dynamic_non_fused_numpy( + coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package: str, + coins_counting_image_numpy: np.ndarray, +) -> None: + from inference_models.models.yololite.yololite_object_detection_onnx import ( + YOLOLiteForObjectDetectionOnnx, + ) - # when + model = YOLOLiteForObjectDetectionOnnx.from_pretrained( + model_name_or_path=coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package, + onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"], + ) predictions = model( coins_counting_image_numpy, confidence=0.25, @@ -27,8 +55,6 @@ def test_onnx_package_with_dynamic_batch_size_and_letterbox_numpy( max_detections=100, ) - # then - assert isinstance(predictions, list) assert len(predictions) == 1 assert predictions[0].xyxy.shape[1] == 4 assert predictions[0].xyxy.dtype == torch.int32 @@ -41,21 +67,18 @@ def test_onnx_package_with_dynamic_batch_size_and_letterbox_numpy( @pytest.mark.slow @pytest.mark.onnx_extras -def test_onnx_package_with_dynamic_batch_size_and_letterbox_batch_numpy( - coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package: str, +def test_dynamic_non_fused_batch_numpy( + coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package: str, coins_counting_image_numpy: np.ndarray, ) -> None: - # given from inference_models.models.yololite.yololite_object_detection_onnx import ( YOLOLiteForObjectDetectionOnnx, ) model = YOLOLiteForObjectDetectionOnnx.from_pretrained( - model_name_or_path=coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package, + model_name_or_path=coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package, onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"], ) - - # when predictions = model( [coins_counting_image_numpy, coins_counting_image_numpy], confidence=0.25, @@ -63,16 +86,11 @@ def test_onnx_package_with_dynamic_batch_size_and_letterbox_batch_numpy( max_detections=100, ) - # then - assert isinstance(predictions, list) assert len(predictions) == 2 for pred in predictions: assert pred.xyxy.shape[1] == 4 - assert pred.xyxy.dtype == torch.int32 assert len(pred.confidence) > 0 assert torch.all(pred.confidence >= 0.25) - assert torch.all(pred.confidence <= 1.0) - # Both images are identical, so detections should match assert predictions[0].xyxy.shape == predictions[1].xyxy.shape assert torch.allclose( predictions[0].confidence, predictions[1].confidence, atol=0.01 @@ -81,98 +99,145 @@ def test_onnx_package_with_dynamic_batch_size_and_letterbox_batch_numpy( @pytest.mark.slow @pytest.mark.onnx_extras -def test_onnx_package_with_dynamic_batch_size_and_letterbox_torch( - coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package: str, +def test_dynamic_non_fused_torch( + coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package: str, coins_counting_image_torch: torch.Tensor, ) -> None: - # given from inference_models.models.yololite.yololite_object_detection_onnx import ( YOLOLiteForObjectDetectionOnnx, ) model = YOLOLiteForObjectDetectionOnnx.from_pretrained( - model_name_or_path=coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package, + model_name_or_path=coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package, onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"], ) - - # when predictions = model( - coins_counting_image_torch, - confidence=0.25, - iou_threshold=0.45, - max_detections=100, + coins_counting_image_torch, confidence=0.25, iou_threshold=0.45 ) - # then - assert isinstance(predictions, list) assert len(predictions) == 1 assert predictions[0].xyxy.shape[1] == 4 - assert predictions[0].xyxy.dtype == torch.int32 assert len(predictions[0].confidence) > 0 assert torch.all(predictions[0].confidence >= 0.25) @pytest.mark.slow @pytest.mark.onnx_extras -def test_onnx_high_confidence_threshold_returns_fewer_detections( - coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package: str, +def test_dynamic_non_fused_high_confidence_returns_fewer( + coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package: str, coins_counting_image_numpy: np.ndarray, ) -> None: - # given from inference_models.models.yololite.yololite_object_detection_onnx import ( YOLOLiteForObjectDetectionOnnx, ) model = YOLOLiteForObjectDetectionOnnx.from_pretrained( - model_name_or_path=coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package, + model_name_or_path=coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package, onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"], ) + low = model(coins_counting_image_numpy, confidence=0.1, iou_threshold=0.45) + high = model(coins_counting_image_numpy, confidence=0.8, iou_threshold=0.45) + + assert len(low[0].confidence) >= len(high[0].confidence) + + +@pytest.mark.slow +@pytest.mark.onnx_extras +def test_dynamic_non_fused_class_agnostic_nms( + coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package: str, + coins_counting_image_numpy: np.ndarray, +) -> None: + from inference_models.models.yololite.yololite_object_detection_onnx import ( + YOLOLiteForObjectDetectionOnnx, + ) - # when - low_conf_predictions = model( - coins_counting_image_numpy, confidence=0.1, iou_threshold=0.45 + model = YOLOLiteForObjectDetectionOnnx.from_pretrained( + model_name_or_path=coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_package, + onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"], ) - high_conf_predictions = model( - coins_counting_image_numpy, confidence=0.8, iou_threshold=0.45 + standard = model( + coins_counting_image_numpy, confidence=0.25, iou_threshold=0.45, + class_agnostic_nms=False, + ) + agnostic = model( + coins_counting_image_numpy, confidence=0.25, iou_threshold=0.45, + class_agnostic_nms=True, ) - # then - assert len(low_conf_predictions[0].confidence) >= len( - high_conf_predictions[0].confidence + assert len(agnostic[0].confidence) <= len(standard[0].confidence) + + +# ── NMS-fused ──────────────────────────────────────────────────────────────── + + +@pytest.mark.slow +@pytest.mark.onnx_extras +def test_fused_nms_numpy( + coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_fused_nms_package: str, + coins_counting_image_numpy: np.ndarray, +) -> None: + from inference_models.models.yololite.yololite_object_detection_onnx import ( + YOLOLiteForObjectDetectionOnnx, + ) + + model = YOLOLiteForObjectDetectionOnnx.from_pretrained( + model_name_or_path=coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_fused_nms_package, + onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"], ) + predictions = model(coins_counting_image_numpy, confidence=0.25) + + assert len(predictions) == 1 + assert predictions[0].xyxy.shape[1] == 4 + assert predictions[0].xyxy.dtype == torch.int32 + assert len(predictions[0].confidence) > 0 + assert torch.all(predictions[0].confidence >= 0.25) + assert torch.all(predictions[0].confidence <= 1.0) @pytest.mark.slow @pytest.mark.onnx_extras -def test_onnx_class_agnostic_nms( - coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package: str, +def test_fused_nms_batch_numpy( + coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_fused_nms_package: str, coins_counting_image_numpy: np.ndarray, ) -> None: - # given from inference_models.models.yololite.yololite_object_detection_onnx import ( YOLOLiteForObjectDetectionOnnx, ) model = YOLOLiteForObjectDetectionOnnx.from_pretrained( - model_name_or_path=coin_counting_yololite_n_onnx_dynamic_bs_letterbox_package, + model_name_or_path=coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_fused_nms_package, onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"], ) + predictions = model( + [coins_counting_image_numpy, coins_counting_image_numpy], confidence=0.25, + ) - # when - standard_predictions = model( - coins_counting_image_numpy, - confidence=0.25, - iou_threshold=0.45, - class_agnostic_nms=False, + assert len(predictions) == 2 + for pred in predictions: + assert pred.xyxy.shape[1] == 4 + assert len(pred.confidence) > 0 + assert torch.all(pred.confidence >= 0.25) + assert predictions[0].xyxy.shape == predictions[1].xyxy.shape + assert torch.allclose( + predictions[0].confidence, predictions[1].confidence, atol=0.01 ) - agnostic_predictions = model( - coins_counting_image_numpy, - confidence=0.25, - iou_threshold=0.45, - class_agnostic_nms=True, + + +@pytest.mark.slow +@pytest.mark.onnx_extras +def test_fused_nms_high_confidence_returns_fewer( + coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_fused_nms_package: str, + coins_counting_image_numpy: np.ndarray, +) -> None: + from inference_models.models.yololite.yololite_object_detection_onnx import ( + YOLOLiteForObjectDetectionOnnx, ) - # then - class-agnostic NMS should suppress more overlapping boxes - assert len(agnostic_predictions[0].confidence) <= len( - standard_predictions[0].confidence + model = YOLOLiteForObjectDetectionOnnx.from_pretrained( + model_name_or_path=coin_counting_yololite_edge_n_onnx_dynamic_bs_stretch_fused_nms_package, + onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"], ) + low = model(coins_counting_image_numpy, confidence=0.1) + high = model(coins_counting_image_numpy, confidence=0.8) + + assert len(low[0].confidence) >= len(high[0].confidence) From 3dfcc7d49220bf9ace23841d3ab511d4a505fc60 Mon Sep 17 00:00:00 2001 From: Lee Clement Date: Mon, 6 Apr 2026 15:29:23 -0230 Subject: [PATCH 3/4] cleanup --- .../test_yololite_object_detection_predictions_onnx.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inference_models/tests/integration_tests/models/test_yololite_object_detection_predictions_onnx.py b/inference_models/tests/integration_tests/models/test_yololite_object_detection_predictions_onnx.py index 669c49f64b..d3f18bd186 100644 --- a/inference_models/tests/integration_tests/models/test_yololite_object_detection_predictions_onnx.py +++ b/inference_models/tests/integration_tests/models/test_yololite_object_detection_predictions_onnx.py @@ -26,6 +26,9 @@ def test_static_non_fused_numpy( assert len(predictions) == 1 assert predictions[0].xyxy.shape[1] == 4 + assert predictions[0].xyxy.dtype == torch.int32 + assert predictions[0].class_id.dtype == torch.int32 + assert predictions[0].confidence.dtype == torch.float32 assert len(predictions[0].confidence) > 0 assert torch.all(predictions[0].confidence >= 0.25) assert torch.all(predictions[0].confidence <= 1.0) @@ -189,6 +192,8 @@ def test_fused_nms_numpy( assert len(predictions) == 1 assert predictions[0].xyxy.shape[1] == 4 assert predictions[0].xyxy.dtype == torch.int32 + assert predictions[0].class_id.dtype == torch.int32 + assert predictions[0].confidence.dtype == torch.float32 assert len(predictions[0].confidence) > 0 assert torch.all(predictions[0].confidence >= 0.25) assert torch.all(predictions[0].confidence <= 1.0) From 920170d058524e00ef84d36cf892e099f5765b13 Mon Sep 17 00:00:00 2001 From: Lee Clement Date: Mon, 6 Apr 2026 16:34:00 -0230 Subject: [PATCH 4/4] fix for old models without post_processing --- .../models/yololite/yololite_object_detection_onnx.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inference_models/inference_models/models/yololite/yololite_object_detection_onnx.py b/inference_models/inference_models/models/yololite/yololite_object_detection_onnx.py index 4daa741690..bf2c54321e 100644 --- a/inference_models/inference_models/models/yololite/yololite_object_detection_onnx.py +++ b/inference_models/inference_models/models/yololite/yololite_object_detection_onnx.py @@ -199,7 +199,8 @@ def post_process( class_agnostic_nms: bool = INFERENCE_MODELS_YOLOLITE_DEFAULT_CLASS_AGNOSTIC_NMS, **kwargs, ) -> List[Detections]: - if self._inference_config.post_processing.fused: + # Backward compatibility: earlier model packages have no post_processing config — always unfused 3-tensor output + if self._inference_config.post_processing and self._inference_config.post_processing.fused: nms_results = self._post_process_fused(model_results, confidence) else: nms_results = self._post_process_unfused(