Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file.
### Image Redactor
#### Changed
- DICOM: use_metadata will now use both is_patient and is_name to generate the PHI list of words via change to _make_phi_list.
- Image Redactor: Added redact_and_return_bbox method to ImageRedactorEngine, which returns both the redacted image and the detected bounding boxes for redacted regions.

## [2.2.360] - 2025-09-09
### Analyzer
Expand Down
3 changes: 3 additions & 0 deletions docs/image-redactor/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ Pre-requisites:

# Redact the image with pink color
redacted_image = engine.redact(image, (255, 192, 203))

# Optional: Redact the image and return redacted regions
redacted_image, bboxes = engine.redact_and_return_bbox(image, (255, 192, 203))

# save the redacted image
redacted_image.save("new_image.png")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from presidio_analyzer import PatternRecognizer

from presidio_image_redactor import BboxProcessor, ImageAnalyzerEngine
from presidio_image_redactor.entities import ImageRecognizerResult


class ImageRedactorEngine:
Expand All @@ -24,15 +25,15 @@ def __init__(

self.bbox_processor = BboxProcessor()

def redact(
def redact_and_return_bbox(
self,
image: Image,
image: Image.Image,
fill: Union[int, Tuple[int, int, int]] = (0, 0, 0),
ocr_kwargs: Optional[dict] = None,
ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None,
**text_analyzer_kwargs,
) -> Image:
"""Redact method to redact the given image.
) -> Tuple[Image.Image, List[ImageRecognizerResult]]:
"""Redact method to redact the given image and return the bboxes.

Please notice, this method duplicates the image, creates a new instance and
manipulate it.
Expand All @@ -45,7 +46,7 @@ def redact(
:param text_analyzer_kwargs: Additional values for the analyze method
in AnalyzerEngine.

:return: the redacted image
:return: the redacted image and the list of bboxes
"""

image = ImageChops.duplicate(image)
Expand Down Expand Up @@ -77,7 +78,41 @@ def redact(
y1 = y0 + box.height
draw.rectangle([x0, y0, x1, y1], fill=fill)

return image
return image, bboxes

def redact(
self,
image: Image.Image,
fill: Union[int, Tuple[int, int, int]] = (0, 0, 0),
ocr_kwargs: Optional[dict] = None,
ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None,
**text_analyzer_kwargs,
) -> Image.Image:
"""Redact method to redact the given image.

Please notice, this method duplicates the image, creates a new instance and
manipulate it.
:param image: PIL Image to be processed.
:param fill: colour to fill the shape - int (0-255) for
grayscale or Tuple(R, G, B) for RGB.
:param ocr_kwargs: Additional params for OCR methods.
:param ad_hoc_recognizers: List of PatternRecognizer objects to use
for ad-hoc recognizer.
:param text_analyzer_kwargs: Additional values for the analyze method
in AnalyzerEngine.

:return: the redacted image
"""

redacted_image, _ = self.redact_and_return_bbox(
image=image,
fill=fill,
ocr_kwargs=ocr_kwargs,
ad_hoc_recognizers=ad_hoc_recognizers,
**text_analyzer_kwargs,
)

return redacted_image

@staticmethod
def _check_ad_hoc_recognizer_list(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from tests.integration.methods import get_resource_image, compare_images, image_sim

from presidio_image_redactor.entities import ImageRecognizerResult

red_fill = (255, 0, 0)


Expand Down Expand Up @@ -90,3 +92,95 @@ def test_given_analzyer_kwargs_then_different_entities_are_redacted(engine_build
assert not compare_images(redacted_image_no_args, redacted_image_entities_args)
assert not compare_images(redacted_image_no_args, redacted_image_score_args)
assert not compare_images(redacted_image_entities_args, redacted_image_score_args)


@pytest.mark.parametrize("engine_builder", all_engines_required())
def test_redact_and_return_bbox_returns_same_image_as_redact(engine_builder: Callable):
"""Test that redact_and_return_bbox returns the same redacted image as redact."""

image = get_resource_image("ocr_test.png")
result_image = get_resource_image("ocr_test_redacted.png")

engine = engine_builder()
redacted_image_from_bbox, _ = engine.redact_and_return_bbox(image, 1)
redacted_image_from_redact = engine.redact(image, 1)

assert compare_images(redacted_image_from_bbox, result_image)
assert compare_images(redacted_image_from_bbox, redacted_image_from_redact)


@pytest.mark.parametrize("engine_builder", all_engines_required())
def test_redact_and_return_bbox_returns_bboxes_for_image_with_text(engine_builder: Callable):
"""Test that redact_and_return_bbox returns non-empty bboxes for image with PII."""

image = get_resource_image("ocr_test.png")

engine = engine_builder()
redacted_image, bboxes = engine.redact_and_return_bbox(image, 1)

assert isinstance(bboxes, list)
assert len(bboxes) > 0

for bbox in bboxes:
assert isinstance(bbox, ImageRecognizerResult)
assert hasattr(bbox, 'left')
assert hasattr(bbox, 'top')
assert hasattr(bbox, 'width')
assert hasattr(bbox, 'height')
assert hasattr(bbox, 'entity_type')
assert hasattr(bbox, 'score')
assert isinstance(bbox.left, int)
assert isinstance(bbox.top, int)
assert isinstance(bbox.width, int)
assert isinstance(bbox.height, int)
assert bbox.width > 0
assert bbox.height > 0


@pytest.mark.parametrize("engine_builder", all_engines_required())
def test_redact_and_return_bbox_returns_empty_bboxes_for_image_without_text(engine_builder: Callable):
"""Test that redact_and_return_bbox returns empty bboxes for image without PII."""

image = get_resource_image("no_ocr.jpg")

engine = engine_builder()
redacted_image, bboxes = engine.redact_and_return_bbox(image, red_fill)

assert isinstance(bboxes, list)
assert len(bboxes) == 0


@pytest.mark.parametrize("engine_builder", all_engines_required())
def test_redact_and_return_bbox_with_matrix_fill(engine_builder: Callable):
"""Test redact_and_return_bbox with matrix fill color."""

image = get_resource_image("ocr_test.png")
expected_result_image = get_resource_image("ocr_test_redacted_matrix.png")

engine = engine_builder()
redacted_image, bboxes = engine.redact_and_return_bbox(image, red_fill)

assert image_sim(redacted_image, expected_result_image) > image_sim(redacted_image, image)
assert len(bboxes) > 0


@pytest.mark.parametrize("engine_builder", all_engines_required())
def test_redact_and_return_bbox_with_analyzer_kwargs(engine_builder: Callable):
"""Test that redact_and_return_bbox works with analyzer kwargs like entities and score_threshold."""

image = get_resource_image("kwargs_test.jpg")

engine = engine_builder()
redacted_image_no_args, bboxes_no_args = engine.redact_and_return_bbox(image)
redacted_image_entities, bboxes_entities = engine.redact_and_return_bbox(
image, entities=["PERSON", "LOCATION"]
)
redacted_image_score, bboxes_score = engine.redact_and_return_bbox(
image, score_threshold=1
)

assert not compare_images(redacted_image_no_args, redacted_image_entities)
assert not compare_images(redacted_image_no_args, redacted_image_score)
assert not compare_images(redacted_image_entities, redacted_image_score)

assert len(bboxes_no_args) != len(bboxes_entities) or len(bboxes_no_args) != len(bboxes_score)