Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file.
### Image Redactor
#### Changed
- DICOM: use_metadata will now use both is_patient and is_name to generate the PHI list of words via change to _make_phi_list.
- Image Redactor: Added redact_and_return_bbox method to ImageRedactorEngine, which returns both the redacted image and the detected bounding boxes for redacted regions.

## [2.2.360] - 2025-09-09
### Analyzer
Expand Down
3 changes: 3 additions & 0 deletions docs/image-redactor/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ Pre-requisites:

# Redact the image with pink color
redacted_image = engine.redact(image, (255, 192, 203))

# Optional: Redact the image and return redacted regions
redacted_image, bboxes = engine.redact_and_return_bbox(image, (255, 192, 203))

# save the redacted image
redacted_image.save("new_image.png")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from presidio_analyzer import PatternRecognizer

from presidio_image_redactor import BboxProcessor, ImageAnalyzerEngine
from presidio_image_redactor.entities import ImageRecognizerResult


class ImageRedactorEngine:
Expand All @@ -24,15 +25,15 @@ def __init__(

self.bbox_processor = BboxProcessor()

def redact(
def redact_and_return_bbox(
self,
image: Image,
fill: Union[int, Tuple[int, int, int]] = (0, 0, 0),
ocr_kwargs: Optional[dict] = None,
ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None,
**text_analyzer_kwargs,
) -> Image:
"""Redact method to redact the given image.
) -> Tuple[Image, List[ImageRecognizerResult]]:
"""Redact method to redact the given image and return the bboxes.

Please notice, this method duplicates the image, creates a new instance and
manipulate it.
Expand All @@ -45,7 +46,7 @@ def redact(
:param text_analyzer_kwargs: Additional values for the analyze method
in AnalyzerEngine.

:return: the redacted image
:return: the redacted image and the list of bboxes
"""

image = ImageChops.duplicate(image)
Expand Down Expand Up @@ -77,7 +78,41 @@ def redact(
y1 = y0 + box.height
draw.rectangle([x0, y0, x1, y1], fill=fill)

return image
return image, bboxes

def redact(
self,
image: Image,
fill: Union[int, Tuple[int, int, int]] = (0, 0, 0),
ocr_kwargs: Optional[dict] = None,
ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None,
**text_analyzer_kwargs,
) -> Image:
"""Redact method to redact the given image.

Please notice, this method duplicates the image, creates a new instance and
manipulate it.
:param image: PIL Image to be processed.
:param fill: colour to fill the shape - int (0-255) for
grayscale or Tuple(R, G, B) for RGB.
:param ocr_kwargs: Additional params for OCR methods.
:param ad_hoc_recognizers: List of PatternRecognizer objects to use
for ad-hoc recognizer.
:param text_analyzer_kwargs: Additional values for the analyze method
in AnalyzerEngine.

:return: the redacted image
"""

redacted_image, _ = self.redact_and_return_bbox(
image=image,
fill=fill,
ocr_kwargs=ocr_kwargs,
ad_hoc_recognizers=ad_hoc_recognizers,
**text_analyzer_kwargs,
)

return redacted_image

@staticmethod
def _check_ad_hoc_recognizer_list(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from tests.integration.methods import get_resource_image, compare_images, image_sim

from presidio_image_redactor.entities import ImageRecognizerResult

red_fill = (255, 0, 0)


Expand Down Expand Up @@ -90,3 +92,95 @@ def test_given_analzyer_kwargs_then_different_entities_are_redacted(engine_build
assert not compare_images(redacted_image_no_args, redacted_image_entities_args)
assert not compare_images(redacted_image_no_args, redacted_image_score_args)
assert not compare_images(redacted_image_entities_args, redacted_image_score_args)


@pytest.mark.parametrize("engine_builder", all_engines_required())
def test_redact_and_return_bbox_returns_same_image_as_redact(engine_builder: Callable):
"""Test that redact_and_return_bbox returns the same redacted image as redact."""

image = get_resource_image("ocr_test.png")
result_image = get_resource_image("ocr_test_redacted.png")

engine = engine_builder()
redacted_image_from_bbox, _ = engine.redact_and_return_bbox(image, 1)
redacted_image_from_redact = engine.redact(image, 1)

assert compare_images(redacted_image_from_bbox, result_image)
assert compare_images(redacted_image_from_bbox, redacted_image_from_redact)


@pytest.mark.parametrize("engine_builder", all_engines_required())
def test_redact_and_return_bbox_returns_bboxes_for_image_with_text(engine_builder: Callable):
"""Test that redact_and_return_bbox returns non-empty bboxes for image with PII."""

image = get_resource_image("ocr_test.png")

engine = engine_builder()
redacted_image, bboxes = engine.redact_and_return_bbox(image, 1)

assert isinstance(bboxes, list)
assert len(bboxes) > 0

for bbox in bboxes:
assert isinstance(bbox, ImageRecognizerResult)
assert hasattr(bbox, 'left')
assert hasattr(bbox, 'top')
assert hasattr(bbox, 'width')
assert hasattr(bbox, 'height')
assert hasattr(bbox, 'entity_type')
assert hasattr(bbox, 'score')
assert isinstance(bbox.left, int)
assert isinstance(bbox.top, int)
assert isinstance(bbox.width, int)
assert isinstance(bbox.height, int)
assert bbox.width > 0
assert bbox.height > 0


@pytest.mark.parametrize("engine_builder", all_engines_required())
def test_redact_and_return_bbox_returns_empty_bboxes_for_image_without_text(engine_builder: Callable):
"""Test that redact_and_return_bbox returns empty bboxes for image without PII."""

image = get_resource_image("no_ocr.jpg")

engine = engine_builder()
redacted_image, bboxes = engine.redact_and_return_bbox(image, red_fill)

assert isinstance(bboxes, list)
assert len(bboxes) == 0


@pytest.mark.parametrize("engine_builder", all_engines_required())
def test_redact_and_return_bbox_with_matrix_fill(engine_builder: Callable):
"""Test redact_and_return_bbox with matrix fill color."""

image = get_resource_image("ocr_test.png")
expected_result_image = get_resource_image("ocr_test_redacted_matrix.png")

engine = engine_builder()
redacted_image, bboxes = engine.redact_and_return_bbox(image, red_fill)

assert image_sim(redacted_image, expected_result_image) > image_sim(redacted_image, image)
assert len(bboxes) > 0


@pytest.mark.parametrize("engine_builder", all_engines_required())
def test_redact_and_return_bbox_with_analyzer_kwargs(engine_builder: Callable):
"""Test that redact_and_return_bbox works with analyzer kwargs like entities and score_threshold."""

image = get_resource_image("kwargs_test.jpg")

engine = engine_builder()
redacted_image_no_args, bboxes_no_args = engine.redact_and_return_bbox(image)
redacted_image_entities, bboxes_entities = engine.redact_and_return_bbox(
image, entities=["PERSON", "LOCATION"]
)
redacted_image_score, bboxes_score = engine.redact_and_return_bbox(
image, score_threshold=1
)

assert not compare_images(redacted_image_no_args, redacted_image_entities)
assert not compare_images(redacted_image_no_args, redacted_image_score)
assert not compare_images(redacted_image_entities, redacted_image_score)

assert len(bboxes_no_args) != len(bboxes_entities) or len(bboxes_no_args) != len(bboxes_score)
Loading