From 4968c4ed4d2786aa991eee62789e88bcb8296f16 Mon Sep 17 00:00:00 2001 From: siwoo-jung Date: Thu, 6 Nov 2025 22:05:06 +1100 Subject: [PATCH 1/3] Add `redact_and_return_bbox` method to ImageRedactorEngine for returning redacted images and bounding boxes. --- CHANGELOG.md | 1 + docs/image-redactor/index.md | 3 + .../image_redactor_engine.py | 45 ++++++++- .../integration/test_image_redactor_engine.py | 94 +++++++++++++++++++ 4 files changed, 138 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a42f7b44e4..a97e5cefb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file. ### Image Redactor #### Changed - DICOM: use_metadata will now use both is_patient and is_name to generate the PHI list of words via change to _make_phi_list. +- Image Redactor: Added `redact_and_return_bbox` method to `ImageRedactorEngine`, which returns both the redacted image and the detected bounding boxes for redacted regions. ## [2.2.360] - 2025-09-09 ### Analyzer diff --git a/docs/image-redactor/index.md b/docs/image-redactor/index.md index ebb96a96e6..ec7b8fa700 100644 --- a/docs/image-redactor/index.md +++ b/docs/image-redactor/index.md @@ -80,6 +80,9 @@ Pre-requisites: # Redact the image with pink color redacted_image = engine.redact(image, (255, 192, 203)) + + # Optional: Redact the image and return redacted regions + redacted_image, bboxes = engine.redact_and_return_bbox(image, (255, 192, 203)) # save the redacted image redacted_image.save("new_image.png") diff --git a/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py index 06a9ca1d21..42df694c46 100644 --- a/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py @@ -5,6 +5,7 @@ from presidio_analyzer import PatternRecognizer from presidio_image_redactor import BboxProcessor, ImageAnalyzerEngine +from presidio_image_redactor.entities import ImageRecognizerResult class ImageRedactorEngine: @@ -24,15 +25,15 @@ def __init__( self.bbox_processor = BboxProcessor() - def redact( + def redact_and_return_bbox( self, image: Image, fill: Union[int, Tuple[int, int, int]] = (0, 0, 0), ocr_kwargs: Optional[dict] = None, ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, **text_analyzer_kwargs, - ) -> Image: - """Redact method to redact the given image. + ) -> Tuple[Image, List[ImageRecognizerResult]]: + """Redact method to redact the given image and return the bboxes. Please notice, this method duplicates the image, creates a new instance and manipulate it. @@ -45,7 +46,7 @@ def redact( :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. - :return: the redacted image + :return: the redacted image and the list of bboxes """ image = ImageChops.duplicate(image) @@ -77,7 +78,41 @@ def redact( y1 = y0 + box.height draw.rectangle([x0, y0, x1, y1], fill=fill) - return image + return image, bboxes + + def redact( + self, + image: Image, + fill: Union[int, Tuple[int, int, int]] = (0, 0, 0), + ocr_kwargs: Optional[dict] = None, + ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, + **text_analyzer_kwargs, + ) -> Image: + """Redact method to redact the given image. + + Please notice, this method duplicates the image, creates a new instance and + manipulate it. + :param image: PIL Image to be processed. + :param fill: colour to fill the shape - int (0-255) for + grayscale or Tuple(R, G, B) for RGB. + :param ocr_kwargs: Additional params for OCR methods. + :param ad_hoc_recognizers: List of PatternRecognizer objects to use + for ad-hoc recognizer. + :param text_analyzer_kwargs: Additional values for the analyze method + in AnalyzerEngine. + + :return: the redacted image + """ + + redacted_image, _ = self.redact_and_return_bbox( + image=image, + fill=fill, + ocr_kwargs=ocr_kwargs, + ad_hoc_recognizers=ad_hoc_recognizers, + **text_analyzer_kwargs, + ) + + return redacted_image @staticmethod def _check_ad_hoc_recognizer_list( diff --git a/presidio-image-redactor/tests/integration/test_image_redactor_engine.py b/presidio-image-redactor/tests/integration/test_image_redactor_engine.py index c119178f59..3fadb02ef8 100644 --- a/presidio-image-redactor/tests/integration/test_image_redactor_engine.py +++ b/presidio-image-redactor/tests/integration/test_image_redactor_engine.py @@ -6,6 +6,8 @@ from tests.integration.methods import get_resource_image, compare_images, image_sim +from presidio_image_redactor.entities import ImageRecognizerResult + red_fill = (255, 0, 0) @@ -90,3 +92,95 @@ def test_given_analzyer_kwargs_then_different_entities_are_redacted(engine_build assert not compare_images(redacted_image_no_args, redacted_image_entities_args) assert not compare_images(redacted_image_no_args, redacted_image_score_args) assert not compare_images(redacted_image_entities_args, redacted_image_score_args) + + +@pytest.mark.parametrize("engine_builder", all_engines_required()) +def test_redact_and_return_bbox_returns_same_image_as_redact(engine_builder: Callable): + """Test that redact_and_return_bbox returns the same redacted image as redact.""" + + image = get_resource_image("ocr_test.png") + result_image = get_resource_image("ocr_test_redacted.png") + + engine = engine_builder() + redacted_image_from_bbox, _ = engine.redact_and_return_bbox(image, 1) + redacted_image_from_redact = engine.redact(image, 1) + + assert compare_images(redacted_image_from_bbox, result_image) + assert compare_images(redacted_image_from_bbox, redacted_image_from_redact) + + +@pytest.mark.parametrize("engine_builder", all_engines_required()) +def test_redact_and_return_bbox_returns_bboxes_for_image_with_text(engine_builder: Callable): + """Test that redact_and_return_bbox returns non-empty bboxes for image with PII.""" + + image = get_resource_image("ocr_test.png") + + engine = engine_builder() + redacted_image, bboxes = engine.redact_and_return_bbox(image, 1) + + assert isinstance(bboxes, list) + assert len(bboxes) > 0 + + for bbox in bboxes: + assert isinstance(bbox, ImageRecognizerResult) + assert hasattr(bbox, 'left') + assert hasattr(bbox, 'top') + assert hasattr(bbox, 'width') + assert hasattr(bbox, 'height') + assert hasattr(bbox, 'entity_type') + assert hasattr(bbox, 'score') + assert isinstance(bbox.left, int) + assert isinstance(bbox.top, int) + assert isinstance(bbox.width, int) + assert isinstance(bbox.height, int) + assert bbox.width > 0 + assert bbox.height > 0 + + +@pytest.mark.parametrize("engine_builder", all_engines_required()) +def test_redact_and_return_bbox_returns_empty_bboxes_for_image_without_text(engine_builder: Callable): + """Test that redact_and_return_bbox returns empty bboxes for image without PII.""" + + image = get_resource_image("no_ocr.jpg") + + engine = engine_builder() + redacted_image, bboxes = engine.redact_and_return_bbox(image, red_fill) + + assert isinstance(bboxes, list) + assert len(bboxes) == 0 + + +@pytest.mark.parametrize("engine_builder", all_engines_required()) +def test_redact_and_return_bbox_with_matrix_fill(engine_builder: Callable): + """Test redact_and_return_bbox with matrix fill color.""" + + image = get_resource_image("ocr_test.png") + expected_result_image = get_resource_image("ocr_test_redacted_matrix.png") + + engine = engine_builder() + redacted_image, bboxes = engine.redact_and_return_bbox(image, red_fill) + + assert image_sim(redacted_image, expected_result_image) > image_sim(redacted_image, image) + assert len(bboxes) > 0 + + +@pytest.mark.parametrize("engine_builder", all_engines_required()) +def test_redact_and_return_bbox_with_analyzer_kwargs(engine_builder: Callable): + """Test that redact_and_return_bbox works with analyzer kwargs like entities and score_threshold.""" + + image = get_resource_image("kwargs_test.jpg") + + engine = engine_builder() + redacted_image_no_args, bboxes_no_args = engine.redact_and_return_bbox(image) + redacted_image_entities, bboxes_entities = engine.redact_and_return_bbox( + image, entities=["PERSON", "LOCATION"] + ) + redacted_image_score, bboxes_score = engine.redact_and_return_bbox( + image, score_threshold=1 + ) + + assert not compare_images(redacted_image_no_args, redacted_image_entities) + assert not compare_images(redacted_image_no_args, redacted_image_score) + assert not compare_images(redacted_image_entities, redacted_image_score) + + assert len(bboxes_no_args) != len(bboxes_entities) or len(bboxes_no_args) != len(bboxes_score) From 0b0e1088ceff4e99788e6dd857f8536af28c29dd Mon Sep 17 00:00:00 2001 From: siwoo-jung Date: Thu, 6 Nov 2025 22:12:21 +1100 Subject: [PATCH 2/3] Update CHANGELOG.md to reflect the addition of redact_and_return_bbox method in ImageRedactorEngine for returning redacted images and bounding boxes. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a97e5cefb1..1e61abd768 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file. ### Image Redactor #### Changed - DICOM: use_metadata will now use both is_patient and is_name to generate the PHI list of words via change to _make_phi_list. -- Image Redactor: Added `redact_and_return_bbox` method to `ImageRedactorEngine`, which returns both the redacted image and the detected bounding boxes for redacted regions. +- Image Redactor: Added redact_and_return_bbox method to ImageRedactorEngine, which returns both the redacted image and the detected bounding boxes for redacted regions. ## [2.2.360] - 2025-09-09 ### Analyzer From d7ec1b1fb47f8a9ab88ceaf95c0651ba87875525 Mon Sep 17 00:00:00 2001 From: siwoo-jung Date: Fri, 7 Nov 2025 08:39:39 +1100 Subject: [PATCH 3/3] Refactor type hints in ImageRedactorEngine methods to specify Image.Image for better clarity and type safety. --- .../presidio_image_redactor/image_redactor_engine.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py index 42df694c46..bd663a793f 100644 --- a/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py @@ -27,12 +27,12 @@ def __init__( def redact_and_return_bbox( self, - image: Image, + image: Image.Image, fill: Union[int, Tuple[int, int, int]] = (0, 0, 0), ocr_kwargs: Optional[dict] = None, ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, **text_analyzer_kwargs, - ) -> Tuple[Image, List[ImageRecognizerResult]]: + ) -> Tuple[Image.Image, List[ImageRecognizerResult]]: """Redact method to redact the given image and return the bboxes. Please notice, this method duplicates the image, creates a new instance and @@ -82,12 +82,12 @@ def redact_and_return_bbox( def redact( self, - image: Image, + image: Image.Image, fill: Union[int, Tuple[int, int, int]] = (0, 0, 0), ocr_kwargs: Optional[dict] = None, ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, **text_analyzer_kwargs, - ) -> Image: + ) -> Image.Image: """Redact method to redact the given image. Please notice, this method duplicates the image, creates a new instance and