diff --git a/CHANGELOG.md b/CHANGELOG.md index a42f7b44e4..1e61abd768 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file. ### Image Redactor #### Changed - DICOM: use_metadata will now use both is_patient and is_name to generate the PHI list of words via change to _make_phi_list. +- Image Redactor: Added redact_and_return_bbox method to ImageRedactorEngine, which returns both the redacted image and the detected bounding boxes for redacted regions. ## [2.2.360] - 2025-09-09 ### Analyzer diff --git a/docs/image-redactor/index.md b/docs/image-redactor/index.md index ebb96a96e6..ec7b8fa700 100644 --- a/docs/image-redactor/index.md +++ b/docs/image-redactor/index.md @@ -80,6 +80,9 @@ Pre-requisites: # Redact the image with pink color redacted_image = engine.redact(image, (255, 192, 203)) + + # Optional: Redact the image and return redacted regions + redacted_image, bboxes = engine.redact_and_return_bbox(image, (255, 192, 203)) # save the redacted image redacted_image.save("new_image.png") diff --git a/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py index 06a9ca1d21..bd663a793f 100644 --- a/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py @@ -5,6 +5,7 @@ from presidio_analyzer import PatternRecognizer from presidio_image_redactor import BboxProcessor, ImageAnalyzerEngine +from presidio_image_redactor.entities import ImageRecognizerResult class ImageRedactorEngine: @@ -24,15 +25,15 @@ def __init__( self.bbox_processor = BboxProcessor() - def redact( + def redact_and_return_bbox( self, - image: Image, + image: Image.Image, fill: Union[int, Tuple[int, int, int]] = (0, 0, 0), ocr_kwargs: Optional[dict] = None, ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, **text_analyzer_kwargs, - ) -> Image: - """Redact method to redact the given image. + ) -> Tuple[Image.Image, List[ImageRecognizerResult]]: + """Redact method to redact the given image and return the bboxes. Please notice, this method duplicates the image, creates a new instance and manipulate it. @@ -45,7 +46,7 @@ def redact( :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. - :return: the redacted image + :return: the redacted image and the list of bboxes """ image = ImageChops.duplicate(image) @@ -77,7 +78,41 @@ def redact( y1 = y0 + box.height draw.rectangle([x0, y0, x1, y1], fill=fill) - return image + return image, bboxes + + def redact( + self, + image: Image.Image, + fill: Union[int, Tuple[int, int, int]] = (0, 0, 0), + ocr_kwargs: Optional[dict] = None, + ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, + **text_analyzer_kwargs, + ) -> Image.Image: + """Redact method to redact the given image. + + Please notice, this method duplicates the image, creates a new instance and + manipulate it. + :param image: PIL Image to be processed. + :param fill: colour to fill the shape - int (0-255) for + grayscale or Tuple(R, G, B) for RGB. + :param ocr_kwargs: Additional params for OCR methods. + :param ad_hoc_recognizers: List of PatternRecognizer objects to use + for ad-hoc recognizer. + :param text_analyzer_kwargs: Additional values for the analyze method + in AnalyzerEngine. + + :return: the redacted image + """ + + redacted_image, _ = self.redact_and_return_bbox( + image=image, + fill=fill, + ocr_kwargs=ocr_kwargs, + ad_hoc_recognizers=ad_hoc_recognizers, + **text_analyzer_kwargs, + ) + + return redacted_image @staticmethod def _check_ad_hoc_recognizer_list( diff --git a/presidio-image-redactor/tests/integration/test_image_redactor_engine.py b/presidio-image-redactor/tests/integration/test_image_redactor_engine.py index c119178f59..3fadb02ef8 100644 --- a/presidio-image-redactor/tests/integration/test_image_redactor_engine.py +++ b/presidio-image-redactor/tests/integration/test_image_redactor_engine.py @@ -6,6 +6,8 @@ from tests.integration.methods import get_resource_image, compare_images, image_sim +from presidio_image_redactor.entities import ImageRecognizerResult + red_fill = (255, 0, 0) @@ -90,3 +92,95 @@ def test_given_analzyer_kwargs_then_different_entities_are_redacted(engine_build assert not compare_images(redacted_image_no_args, redacted_image_entities_args) assert not compare_images(redacted_image_no_args, redacted_image_score_args) assert not compare_images(redacted_image_entities_args, redacted_image_score_args) + + +@pytest.mark.parametrize("engine_builder", all_engines_required()) +def test_redact_and_return_bbox_returns_same_image_as_redact(engine_builder: Callable): + """Test that redact_and_return_bbox returns the same redacted image as redact.""" + + image = get_resource_image("ocr_test.png") + result_image = get_resource_image("ocr_test_redacted.png") + + engine = engine_builder() + redacted_image_from_bbox, _ = engine.redact_and_return_bbox(image, 1) + redacted_image_from_redact = engine.redact(image, 1) + + assert compare_images(redacted_image_from_bbox, result_image) + assert compare_images(redacted_image_from_bbox, redacted_image_from_redact) + + +@pytest.mark.parametrize("engine_builder", all_engines_required()) +def test_redact_and_return_bbox_returns_bboxes_for_image_with_text(engine_builder: Callable): + """Test that redact_and_return_bbox returns non-empty bboxes for image with PII.""" + + image = get_resource_image("ocr_test.png") + + engine = engine_builder() + redacted_image, bboxes = engine.redact_and_return_bbox(image, 1) + + assert isinstance(bboxes, list) + assert len(bboxes) > 0 + + for bbox in bboxes: + assert isinstance(bbox, ImageRecognizerResult) + assert hasattr(bbox, 'left') + assert hasattr(bbox, 'top') + assert hasattr(bbox, 'width') + assert hasattr(bbox, 'height') + assert hasattr(bbox, 'entity_type') + assert hasattr(bbox, 'score') + assert isinstance(bbox.left, int) + assert isinstance(bbox.top, int) + assert isinstance(bbox.width, int) + assert isinstance(bbox.height, int) + assert bbox.width > 0 + assert bbox.height > 0 + + +@pytest.mark.parametrize("engine_builder", all_engines_required()) +def test_redact_and_return_bbox_returns_empty_bboxes_for_image_without_text(engine_builder: Callable): + """Test that redact_and_return_bbox returns empty bboxes for image without PII.""" + + image = get_resource_image("no_ocr.jpg") + + engine = engine_builder() + redacted_image, bboxes = engine.redact_and_return_bbox(image, red_fill) + + assert isinstance(bboxes, list) + assert len(bboxes) == 0 + + +@pytest.mark.parametrize("engine_builder", all_engines_required()) +def test_redact_and_return_bbox_with_matrix_fill(engine_builder: Callable): + """Test redact_and_return_bbox with matrix fill color.""" + + image = get_resource_image("ocr_test.png") + expected_result_image = get_resource_image("ocr_test_redacted_matrix.png") + + engine = engine_builder() + redacted_image, bboxes = engine.redact_and_return_bbox(image, red_fill) + + assert image_sim(redacted_image, expected_result_image) > image_sim(redacted_image, image) + assert len(bboxes) > 0 + + +@pytest.mark.parametrize("engine_builder", all_engines_required()) +def test_redact_and_return_bbox_with_analyzer_kwargs(engine_builder: Callable): + """Test that redact_and_return_bbox works with analyzer kwargs like entities and score_threshold.""" + + image = get_resource_image("kwargs_test.jpg") + + engine = engine_builder() + redacted_image_no_args, bboxes_no_args = engine.redact_and_return_bbox(image) + redacted_image_entities, bboxes_entities = engine.redact_and_return_bbox( + image, entities=["PERSON", "LOCATION"] + ) + redacted_image_score, bboxes_score = engine.redact_and_return_bbox( + image, score_threshold=1 + ) + + assert not compare_images(redacted_image_no_args, redacted_image_entities) + assert not compare_images(redacted_image_no_args, redacted_image_score) + assert not compare_images(redacted_image_entities, redacted_image_score) + + assert len(bboxes_no_args) != len(bboxes_entities) or len(bboxes_no_args) != len(bboxes_score)