diff --git a/e2e-tests/common/methods.py b/e2e-tests/common/methods.py index 74ed338eb2..352ab08bdc 100644 --- a/e2e-tests/common/methods.py +++ b/e2e-tests/common/methods.py @@ -5,8 +5,8 @@ import requests from common.constants import ( - ANONYMIZER_BASE_URL, ANALYZER_BASE_URL, + ANONYMIZER_BASE_URL, IMAGE_REDACTOR_BASE_URL, ) diff --git a/e2e-tests/resources/test_ollama_enabled_recognizers.yaml b/e2e-tests/resources/test_ollama_enabled_recognizers.yaml index 390f322533..6570995737 100644 --- a/e2e-tests/resources/test_ollama_enabled_recognizers.yaml +++ b/e2e-tests/resources/test_ollama_enabled_recognizers.yaml @@ -200,9 +200,10 @@ recognizers: type: predefined enabled: false - - name: OllamaLangExtractRecognizer + - name: e2eollama supported_languages: - en type: predefined + class_name: OllamaLangExtractRecognizer enabled: true config_path: e2e-tests/resources/ollama_test_config.yaml diff --git a/e2e-tests/tests/test_package_e2e_integration_flows.py b/e2e-tests/tests/test_package_e2e_integration_flows.py index 0bdcc12fdd..7d30426625 100644 --- a/e2e-tests/tests/test_package_e2e_integration_flows.py +++ b/e2e-tests/tests/test_package_e2e_integration_flows.py @@ -71,16 +71,18 @@ def test_given_text_with_pii_using_ollama_recognizer_then_detects_entities(tmp_p text_to_test = "Patient John Smith, SSN 123-45-6789, email john@example.com, phone 555-123-4567, lives at 123 Main St, works at Acme Corp" - # Use pre-configured config file with small model (qwen2.5:1.5b) import os config_path = os.path.join( os.path.dirname(__file__), "..", "resources", "ollama_test_config.yaml" ) - # Create Ollama recognizer with custom config - ollama_recognizer = OllamaLangExtractRecognizer(config_path=config_path) + ollama_recognizer = OllamaLangExtractRecognizer( + config_path=config_path, name="e2eollama" + ) + + assert ollama_recognizer.name == "e2eollama", \ + f"Expected recognizer name to be 'e2eollama', got '{ollama_recognizer.name}'" - # Create analyzer with ONLY Ollama recognizer (no NLP engine, no default recognizers) from presidio_analyzer.recognizer_registry import RecognizerRegistry registry = RecognizerRegistry() registry.add_recognizer(ollama_recognizer) @@ -90,13 +92,10 @@ def test_given_text_with_pii_using_ollama_recognizer_then_detects_entities(tmp_p supported_languages=["en"] ) - # Analyze text results = analyzer.analyze(text_to_test, language="en") - # Verify at least some entities were detected assert len(results) > 0, "Expected to detect at least one PII entity" - # Check which recognizers participated in detection recognizers_used = set() langextract_detected_at_least_one = False @@ -108,12 +107,11 @@ def test_given_text_with_pii_using_ollama_recognizer_then_detects_entities(tmp_p recognizers_used.add(recognizer_name) langextract_detected_at_least_one |= ( - recognizer_name == "Ollama LangExtract PII" + recognizer_name == "e2eollama" ) - # Verify that Ollama LangExtract recognizer participated in detection assert langextract_detected_at_least_one, \ - f"Expected 'Ollama LangExtract PII' recognizer to detect at least one entity. Recognizers used: {recognizers_used}" + f"Expected 'e2eollama' recognizer to detect at least one entity. Recognizers used: {recognizers_used}" @pytest.mark.package @@ -133,7 +131,6 @@ def test_ollama_recognizer_loads_from_yaml_configuration_when_enabled(): if not OLLAMA_RECOGNIZER_AVAILABLE: pytest.skip("LangExtract not installed") - # Check if Ollama is available import os try: import requests @@ -144,7 +141,6 @@ def test_ollama_recognizer_loads_from_yaml_configuration_when_enabled(): except Exception: pytest.skip("Ollama service not available") - # Load recognizer registry from YAML config with Ollama enabled from presidio_analyzer.recognizer_registry import RecognizerRegistryProvider config_path = os.path.join( @@ -155,33 +151,25 @@ def test_ollama_recognizer_loads_from_yaml_configuration_when_enabled(): provider = RecognizerRegistryProvider(conf_file=config_path) registry = provider.create_recognizer_registry() - # Verify Ollama recognizer was loaded - ollama_recognizers = [r for r in registry.recognizers if "Ollama" in r.name] + ollama_recognizers = [r for r in registry.recognizers if r.name == "e2eollama"] assert len(ollama_recognizers) == 1, \ - f"Expected exactly 1 Ollama recognizer, found {len(ollama_recognizers)}" + f"Expected exactly 1 recognizer with name 'e2eollama', found {len(ollama_recognizers)}" + + ollama_recognizer = ollama_recognizers[0] - ollama_rec = ollama_recognizers[0] - assert ollama_rec.name == "Ollama LangExtract PII" - assert ollama_rec.supported_language == "en" - assert len(ollama_rec.supported_entities) > 0 + assert ollama_recognizer.__class__.__name__ == "OllamaLangExtractRecognizer", \ + f"Expected class OllamaLangExtractRecognizer, got {ollama_recognizer.__class__.__name__}" + + assert ollama_recognizer.supported_language == "en" + assert len(ollama_recognizer.supported_entities) > 0 - # Test functionality: analyze text with the loaded recognizer analyzer = AnalyzerEngine(registry=registry, supported_languages=["en"]) text_to_test = "Patient John Smith, SSN 123-45-6789, email john@example.com, phone 555-123-4567, lives at 123 Main St, works at Acme Corp" results = analyzer.analyze(text_to_test, language="en") - # Should detect entities assert len(results) > 0, "Expected to detect at least one PII entity" - # Check if Ollama recognizer detected anything - ollama_detected = any( - r.recognition_metadata and - "Ollama" in r.recognition_metadata.get(RecognizerResult.RECOGNIZER_NAME_KEY, "") - for r in results - ) - - # At minimum, other recognizers should detect common entities entity_types = {r.entity_type for r in results} expected_entities = {"EMAIL_ADDRESS", "PERSON", "PHONE_NUMBER", "US_SSN"} detected_expected = entity_types & expected_entities @@ -189,6 +177,6 @@ def test_ollama_recognizer_loads_from_yaml_configuration_when_enabled(): assert len(detected_expected) >= 2, \ f"Expected at least 2 entities from {expected_entities}, detected: {entity_types}" - print(f"\n✓ Ollama recognizer loaded successfully from YAML config") + print(f"\n✓ Ollama recognizer 'e2eollama' loaded successfully from YAML config") + print(f" Class: {ollama_recognizer.__class__.__name__}") print(f" Detected entities: {entity_types}") - print(f" Ollama participated: {ollama_detected}") diff --git a/presidio-analyzer/presidio_analyzer/conf/default_recognizers.yaml b/presidio-analyzer/presidio_analyzer/conf/default_recognizers.yaml index 36305c898d..2472868abc 100644 --- a/presidio-analyzer/presidio_analyzer/conf/default_recognizers.yaml +++ b/presidio-analyzer/presidio_analyzer/conf/default_recognizers.yaml @@ -8,6 +8,7 @@ recognizers: # For predefined: # - If only a recognizer name is provided, a predefined recognizer with this name and default parameters will be loaded. # - If a parameter isn't provided, the default one would be loaded. + # - Use 'class_name' to specify the Python class when using a custom 'name' for display/metadata # For custom: # - See an example configuration here: https://github.com/microsoft/presidio/blob/main/presidio-analyzer/presidio_analyzer/conf/example_recognizers.yaml # - Custom pattern recognizers with this configuration can be added to this file, with type: custom @@ -206,8 +207,9 @@ recognizers: - en type: predefined enabled: false - - - name: OllamaLangExtractRecognizer + + - name: OllamaRecognizer + class_name: OllamaLangExtractRecognizer supported_languages: - en type: predefined diff --git a/presidio-analyzer/presidio_analyzer/input_validation/yaml_recognizer_models.py b/presidio-analyzer/presidio_analyzer/input_validation/yaml_recognizer_models.py index ea7c2c81f4..cff627f346 100644 --- a/presidio-analyzer/presidio_analyzer/input_validation/yaml_recognizer_models.py +++ b/presidio-analyzer/presidio_analyzer/input_validation/yaml_recognizer_models.py @@ -34,7 +34,8 @@ def validate_language_code(cls, v: str) -> str: class BaseRecognizerConfig(BaseModel): """Base validation for all recognizer configuration types. - :param name: Name of the recognizer + :param name: Instance name used in analysis results. Defaults to class name. + :param class_name: Python class name for lookup. If not provided, uses 'name'. :param enabled: Whether the recognizer is enabled :param type: Type of recognizer (predefined/custom) :param supported_language: Single supported language (legacy) @@ -50,7 +51,14 @@ class BaseRecognizerConfig(BaseModel): :param supported_entities: List of supported entities for this recognizer. """ - name: str = Field(..., description="Name of the recognizer") + name: str = Field(..., description="Instance name for the recognizer") + class_name: Optional[str] = Field( + default=None, + description=( + "Python class name for predefined recognizers " + "(if different from instance name)" + ), + ) enabled: bool = Field(default=True, description="Whether the recognizer is enabled") type: Optional[str] = Field( default="predefined", description="Type of recognizer (predefined/custom)" @@ -136,11 +144,12 @@ class PredefinedRecognizerConfig(BaseRecognizerConfig): @model_validator(mode="after") def validate_predefined_recognizer_exists(self): """Validate that the predefined recognizer class actually exists.""" + recognizer_class_name = self.class_name if self.class_name else self.name try: - RecognizerListLoader.get_existing_recognizer_cls(self.name) + RecognizerListLoader.get_existing_recognizer_cls(recognizer_class_name) except PredefinedRecognizerNotFoundError as e: raise ValueError( - f"Predefined recognizer '{self.name}' not found: {str(e)}" + f"Predefined recognizer '{recognizer_class_name}' not found: {str(e)}" ) from e return self @@ -201,8 +210,6 @@ def check_predefined_name_conflict(cls, data: Any) -> Any: f"for your custom recognizer." ) except PredefinedRecognizerNotFoundError: - # Name is not a predefined recognizer, - # which is fine for custom recognizers pass return data @@ -328,7 +335,6 @@ def parse_recognizers( parsed_recognizers = [] for recognizer in recognizers: if isinstance(recognizer, str): - # Simple string recognizer name - treat as predefined parsed_recognizers.append(recognizer) continue @@ -346,7 +352,6 @@ def parse_recognizers( f"Either use type: 'custom' or remove these fields." ) - # Auto-detect type if not provided if not recognizer_type: if "patterns" in recognizer or "deny_list" in recognizer: recognizer_type = "custom" @@ -357,7 +362,6 @@ def parse_recognizers( recognizer_type = "predefined" recognizer["type"] = recognizer_type - # Final append based on resolved type (only once) if recognizer_type == "predefined": parsed_recognizers.append(PredefinedRecognizerConfig(**recognizer)) elif recognizer_type == "custom": @@ -369,7 +373,6 @@ def parse_recognizers( ) continue - # Fallback: unrecognized structure, keep as-is parsed_recognizers.append(recognizer) return parsed_recognizers @@ -378,7 +381,6 @@ def parse_recognizers( def __check_if_predefined(cls, recognizer_name: Optional[Any]) -> None: try: RecognizerListLoader.get_existing_recognizer_cls(recognizer_name) - # If we reach here, it IS a predefined recognizer, so raise an error raise ValueError( f"Recognizer '{recognizer_name}' conflicts with a predefined " f"recognizer. " @@ -388,7 +390,6 @@ def __check_if_predefined(cls, recognizer_name: Optional[Any]) -> None: f"for your custom recognizer." ) except PredefinedRecognizerNotFoundError: - # Name is not a predefined recognizer, which is fine for custom recognizers pass @model_validator(mode="after") @@ -401,12 +402,10 @@ def validate_language_presence(self): custom_without_language_present = False for r in self.recognizers: if isinstance(r, (PredefinedRecognizerConfig, CustomRecognizerConfig)): - # Track if any language is defined if (r.supported_language and r.supported_language.strip()) or ( r.supported_languages and len(r.supported_languages) > 0 ): any_language_defined = True - # Track custom recognizers lacking language info if ( isinstance(r, CustomRecognizerConfig) and not r.supported_language diff --git a/presidio-analyzer/presidio_analyzer/lm_recognizer.py b/presidio-analyzer/presidio_analyzer/lm_recognizer.py index 7a79f0b163..b4a4a08680 100644 --- a/presidio-analyzer/presidio_analyzer/lm_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/lm_recognizer.py @@ -31,13 +31,13 @@ def __init__( self, supported_entities: Optional[List[str]] = None, supported_language: str = "en", - name: str = "Language Model PII Recognizer", + name: Optional[str] = None, version: str = "1.0.0", model_id: Optional[str] = None, temperature: Optional[float] = None, min_score: float = 0.5, labels_to_ignore: Optional[List[str]] = None, - enable_generic_consolidation: bool = True + enable_generic_consolidation: bool = True, ): """Initialize LM recognizer. diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_abn_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_abn_recognizer.py index 08c4d2a58c..a42914ff1c 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_abn_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_abn_recognizer.py @@ -50,6 +50,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "AU_ABN", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -61,6 +62,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_acn_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_acn_recognizer.py index 808cb34402..78fc125bb3 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_acn_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_acn_recognizer.py @@ -47,6 +47,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "AU_ACN", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -58,6 +59,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_medicare_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_medicare_recognizer.py index 7492a289dc..1a74a17e0e 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_medicare_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_medicare_recognizer.py @@ -47,6 +47,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "AU_MEDICARE", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -58,6 +59,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_tfn_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_tfn_recognizer.py index f5082e00d3..52f28caf8c 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_tfn_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/australia/au_tfn_recognizer.py @@ -53,6 +53,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "AU_TFN", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -64,6 +65,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/finland/fi_personal_identity_code_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/finland/fi_personal_identity_code_recognizer.py index a31314e994..dc3fcc38af 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/finland/fi_personal_identity_code_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/finland/fi_personal_identity_code_recognizer.py @@ -34,6 +34,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "fi", supported_entity: str = "FI_PERSONAL_IDENTITY_CODE", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -42,6 +43,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> Optional[bool]: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_aadhaar_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_aadhaar_recognizer.py index af235c6531..0e9677d381 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_aadhaar_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_aadhaar_recognizer.py @@ -41,6 +41,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "IN_AADHAAR", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ) -> None: self.replacement_pairs = ( replacement_pairs @@ -54,6 +55,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_gstin_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_gstin_recognizer.py index 14662e609b..9c9f8ba636 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_gstin_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_gstin_recognizer.py @@ -60,6 +60,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "IN_GSTIN", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -71,6 +72,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) self.supported_entity = supported_entity diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_pan_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_pan_recognizer.py index aadd705816..25c126703d 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_pan_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_pan_recognizer.py @@ -53,6 +53,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "IN_PAN", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -64,4 +65,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_passport_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_passport_recognizer.py index 16ba16f774..aa39a4d62d 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_passport_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_passport_recognizer.py @@ -34,6 +34,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "IN_PASSPORT", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -42,4 +43,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_vehicle_registration_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_vehicle_registration_recognizer.py index b090599e14..144ed9f304 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_vehicle_registration_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_vehicle_registration_recognizer.py @@ -331,6 +331,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "IN_VEHICLE_REGISTRATION", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs @@ -344,6 +345,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_voter_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_voter_recognizer.py index d4ebaf4685..6416e01e26 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_voter_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/india/in_voter_recognizer.py @@ -43,6 +43,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "IN_VOTER", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -51,4 +52,5 @@ def __init__( context=context, supported_language=supported_language, supported_entity=supported_entity, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_driver_license_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_driver_license_recognizer.py index ed90f11260..ed603cb43c 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_driver_license_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_driver_license_recognizer.py @@ -31,6 +31,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "it", supported_entity: str = "IT_DRIVER_LICENSE", + name: Optional[str] = None, ) -> None: patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -39,4 +40,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_fiscal_code_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_fiscal_code_recognizer.py index 29373a3319..f322509521 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_fiscal_code_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_fiscal_code_recognizer.py @@ -36,6 +36,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "it", supported_entity: str = "IT_FISCAL_CODE", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -44,6 +45,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> Optional[bool]: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_identity_card_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_identity_card_recognizer.py index 67d0c8ab3d..899cd2a5ff 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_identity_card_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_identity_card_recognizer.py @@ -59,6 +59,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "it", supported_entity: str = "IT_IDENTITY_CARD", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -67,4 +68,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_passport_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_passport_recognizer.py index 8f2a7fe165..3a4f111741 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_passport_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_passport_recognizer.py @@ -38,6 +38,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "it", supported_entity: str = "IT_PASSPORT", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -46,4 +47,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_vat_code.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_vat_code.py index acb7ba3287..58820c163f 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_vat_code.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/italy/it_vat_code.py @@ -37,6 +37,8 @@ def __init__( supported_language: str = "it", supported_entity: str = "IT_VAT_CODE", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, + version: str = "0.0.1", ): self.replacement_pairs = ( replacement_pairs @@ -50,6 +52,8 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, + version=version, ) def validate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/korea/kr_rrn_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/korea/kr_rrn_recognizer.py index 23e78e8e1b..0589bd1b5f 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/korea/kr_rrn_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/korea/kr_rrn_recognizer.py @@ -56,6 +56,7 @@ def __init__( supported_language: str = "ko", supported_entity: str = "KR_RRN", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = replacement_pairs if replacement_pairs else [("-", "")] @@ -66,6 +67,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> Union[bool, None]: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/poland/pl_pesel_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/poland/pl_pesel_recognizer.py index 0f75c5c849..ada430b89c 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/poland/pl_pesel_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/poland/pl_pesel_recognizer.py @@ -31,6 +31,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "pl", supported_entity: str = "PL_PESEL", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -39,6 +40,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: # noqa: D102 diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/singapore/sg_fin_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/singapore/sg_fin_recognizer.py index 7f9060774c..9d28ead575 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/singapore/sg_fin_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/singapore/sg_fin_recognizer.py @@ -30,6 +30,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "SG_NRIC_FIN", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -38,4 +39,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/singapore/sg_uen_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/singapore/sg_uen_recognizer.py index e57d4a31ee..54f36685c6 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/singapore/sg_uen_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/singapore/sg_uen_recognizer.py @@ -83,6 +83,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "SG_UEN", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -91,6 +92,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> Optional[bool]: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/spain/es_nie_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/spain/es_nie_recognizer.py index 647528d24d..63ab7978a5 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/spain/es_nie_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/spain/es_nie_recognizer.py @@ -38,6 +38,7 @@ def __init__( supported_language: str = "es", supported_entity: str = "ES_NIE", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -49,6 +50,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/spain/es_nif_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/spain/es_nif_recognizer.py index f494cf75c9..4141ccceae 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/spain/es_nif_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/spain/es_nif_recognizer.py @@ -33,6 +33,7 @@ def __init__( supported_language: str = "es", supported_entity: str = "ES_NIF", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -44,6 +45,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: # noqa: D102 diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/thai/th_tnin_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/thai/th_tnin_recognizer.py index 39a46eff75..86ff02028f 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/thai/th_tnin_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/thai/th_tnin_recognizer.py @@ -68,6 +68,7 @@ def __init__( supported_language: str = "th", supported_entity: str = "TH_TNIN", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = replacement_pairs if replacement_pairs else [] @@ -78,6 +79,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> Union[bool, None]: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/uk/uk_nhs_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/uk/uk_nhs_recognizer.py index c274e7fc3c..6f875d9f0d 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/uk/uk_nhs_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/uk/uk_nhs_recognizer.py @@ -38,6 +38,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "UK_NHS", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -49,6 +50,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/uk/uk_nino_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/uk/uk_nino_recognizer.py index c201597f78..e5c5ef00dc 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/uk/uk_nino_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/uk/uk_nino_recognizer.py @@ -29,6 +29,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "UK_NINO", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -37,4 +38,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/aba_routing_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/aba_routing_recognizer.py index 6c66eb3cc9..daa6c4ccd2 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/aba_routing_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/aba_routing_recognizer.py @@ -47,6 +47,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "ABA_ROUTING_NUMBER", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = replacement_pairs or [("-", "")] patterns = patterns if patterns else self.PATTERNS @@ -56,6 +57,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: # noqa: D102 diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/medical_license_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/medical_license_recognizer.py index acc0ef5996..757973b5ed 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/medical_license_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/medical_license_recognizer.py @@ -36,6 +36,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "MEDICAL_LICENSE", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -47,6 +48,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: # noqa: D102 diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_bank_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_bank_recognizer.py index b64c329820..0b90696ae3 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_bank_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_bank_recognizer.py @@ -39,6 +39,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "US_BANK_NUMBER", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -47,4 +48,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_driver_license_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_driver_license_recognizer.py index 7744e42cfa..52fba6ea07 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_driver_license_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_driver_license_recognizer.py @@ -55,6 +55,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "US_DRIVER_LICENSE", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -63,4 +64,5 @@ def __init__( supported_language=supported_language, patterns=patterns, context=context, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_itin_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_itin_recognizer.py index ea71e61cc0..9b64b49a23 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_itin_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_itin_recognizer.py @@ -39,6 +39,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "US_ITIN", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -47,4 +48,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_passport_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_passport_recognizer.py index 295fe844ee..e2da39e976 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_passport_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_passport_recognizer.py @@ -26,6 +26,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "US_PASSPORT", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -34,4 +35,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_ssn_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_ssn_recognizer.py index 4b40af8093..ec8feb98cb 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_ssn_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_ssn_recognizer.py @@ -38,6 +38,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "US_SSN", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -46,6 +47,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def invalidate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/credit_card_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/credit_card_recognizer.py index 5204a2d76f..912bf8d197 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/credit_card_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/credit_card_recognizer.py @@ -45,6 +45,7 @@ def __init__( supported_language: str = "en", supported_entity: str = "CREDIT_CARD", replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = ( replacement_pairs if replacement_pairs else [("-", ""), (" ", "")] @@ -56,6 +57,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: # noqa: D102 diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/crypto_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/crypto_recognizer.py index eeac055f50..b6c3eeece3 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/crypto_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/crypto_recognizer.py @@ -39,6 +39,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "CRYPTO", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -47,6 +48,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/date_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/date_recognizer.py index 8348ec7d4a..08bc4aa1de 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/date_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/date_recognizer.py @@ -89,6 +89,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "DATE_TIME", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -97,4 +98,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/email_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/email_recognizer.py index 5bd30d7fb3..a7ecb1bea7 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/email_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/email_recognizer.py @@ -31,6 +31,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "EMAIL_ADDRESS", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -39,6 +40,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def validate_result(self, pattern_text: str): # noqa: D102 diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/iban_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/iban_recognizer.py index acdb4b8b9b..9b1c472014 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/iban_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/iban_recognizer.py @@ -74,6 +74,7 @@ def __init__( bos_eos: Tuple[str, str] = (BOS, EOS), regex_flags: int = re.DOTALL | re.MULTILINE, replacement_pairs: Optional[List[Tuple[str, str]]] = None, + name: Optional[str] = None, ): self.replacement_pairs = replacement_pairs or [("-", ""), (" ", "")] self.exact_match = exact_match @@ -86,6 +87,7 @@ def __init__( context=context, supported_language=supported_language, global_regex_flags=regex_flags, + name=name, ) def validate_result(self, pattern_text: str): # noqa: D102 diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/ip_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/ip_recognizer.py index 2fd6bd953b..198e9b288a 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/ip_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/ip_recognizer.py @@ -40,6 +40,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "IP_ADDRESS", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -48,6 +49,7 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) def invalidate_result(self, pattern_text: str) -> bool: diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/phone_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/phone_recognizer.py index 3f1cad622d..8430a5eb01 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/phone_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/phone_recognizer.py @@ -34,6 +34,7 @@ def __init__( # For all regions, use phonenumbers.SUPPORTED_REGIONS supported_regions=DEFAULT_SUPPORTED_REGIONS, leniency: Optional[int] = 1, + name: Optional[str] = None, ): context = context if context else self.CONTEXT self.supported_regions = supported_regions @@ -42,6 +43,7 @@ def __init__( supported_entities=self.get_supported_entities(), supported_language=supported_language, context=context, + name=name, ) def load(self) -> None: # noqa: D102 diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/url_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/url_recognizer.py index 389a389a33..b0ef947591 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/url_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/generic/url_recognizer.py @@ -37,6 +37,7 @@ def __init__( context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "URL", + name: Optional[str] = None, ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT @@ -45,4 +46,5 @@ def __init__( patterns=patterns, context=context, supported_language=supported_language, + name=name, ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/spacy_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/spacy_recognizer.py index ffac893d64..772bb43f06 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/spacy_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/spacy_recognizer.py @@ -42,6 +42,7 @@ def __init__( default_explanation: Optional[str] = None, check_label_groups: Optional[List[Tuple[Set, Set]]] = None, context: Optional[List[str]] = None, + name: Optional[str] = None, ): """Initialize the SpaCy recognizer. @@ -69,6 +70,7 @@ def __init__( supported_entities=supported_entities, supported_language=supported_language, context=context, + name=name, ) def load(self) -> None: # noqa: D102 diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/stanza_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/stanza_recognizer.py index 8b15e668b5..889df4541c 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/stanza_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/stanza_recognizer.py @@ -1,3 +1,5 @@ +from typing import Optional + from presidio_analyzer.predefined_recognizers.nlp_engine_recognizers.spacy_recognizer import ( # noqa: E501 SpacyRecognizer, ) @@ -12,6 +14,6 @@ class StanzaRecognizer(SpacyRecognizer): Stanza's interface with spaCy's """ - def __init__(self, **kwargs): + def __init__(self, name: Optional[str] = None, **kwargs): self.DEFAULT_EXPLANATION = self.DEFAULT_EXPLANATION.replace("Spacy", "Stanza") - super().__init__(**kwargs) + super().__init__(name=name, **kwargs) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/transformers_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/transformers_recognizer.py index 2cd38e5f88..643d7b4ad6 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/transformers_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/nlp_engine_recognizers/transformers_recognizer.py @@ -1,4 +1,5 @@ import logging +from typing import Optional from presidio_analyzer.predefined_recognizers.nlp_engine_recognizers.spacy_recognizer import ( # noqa: E501 SpacyRecognizer, @@ -29,8 +30,8 @@ class TransformersRecognizer(SpacyRecognizer): "PHONE_NUMBER", ] - def __init__(self, **kwargs): + def __init__(self, name: Optional[str] = None, **kwargs): self.DEFAULT_EXPLANATION = self.DEFAULT_EXPLANATION.replace( "Spacy", "Transformers" ) - super().__init__(**kwargs) + super().__init__(name=name, **kwargs) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/ahds_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/ahds_recognizer.py index 3eae5dd400..88477610aa 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/ahds_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/ahds_recognizer.py @@ -34,7 +34,8 @@ def __init__( self, supported_entities: Optional[List[str]] = None, supported_language: str = "en", - client: Optional[DeidentificationClient] = None + client: Optional[DeidentificationClient] = None, + name: Optional[str] = None, ): """ Wrap PHI detection using Azure Health Data Services de-identification. @@ -46,7 +47,7 @@ def __init__( super().__init__( supported_entities=supported_entities, supported_language=supported_language, - name="Azure Health Data Services Deidentification", + name=name if name else "Azure Health Data Services Deidentification", version="1.0.0", ) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/azure_ai_language.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/azure_ai_language.py index c25b782d5d..1f87020cda 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/azure_ai_language.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/azure_ai_language.py @@ -24,7 +24,7 @@ def __init__( supported_language: str = "en", ta_client: Optional["TextAnalyticsClient"] = None, azure_ai_key: Optional[str] = None, - azure_ai_endpoint: Optional[str] = None + azure_ai_endpoint: Optional[str] = None, ): """ Wrap the PII detection in Azure AI Language. diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/azure_openai_langextract_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/azure_openai_langextract_recognizer.py index ff449ecf0f..223b8172da 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/azure_openai_langextract_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/azure_openai_langextract_recognizer.py @@ -59,6 +59,7 @@ def __init__( api_key: Optional[str] = None, api_version: Optional[str] = None, supported_language: str = "en", + name: str = "Azure OpenAI LangExtract PII", ): """ Initialize Azure OpenAI LangExtract recognizer for PII/PHI detection. @@ -111,14 +112,14 @@ def __init__( # Initialize parent class (loads config, sets self.model_id from config) super().__init__( config_path=actual_config_path, - name="Azure OpenAI LangExtract PII", + name=name, supported_language=supported_language, extract_params={ "extract": { "fence_output": True, "use_schema_constraints": False, }, - } + }, ) # Override model_id if provided as parameter (deployment name) diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/ollama_langextract_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/ollama_langextract_recognizer.py index b7a02684bf..d1d6317edf 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/ollama_langextract_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/third_party/ollama_langextract_recognizer.py @@ -20,6 +20,7 @@ def __init__( config_path: Optional[str] = None, supported_language: str = "en", context: Optional[list] = None, + name: str = "Ollama LangExtract PII", ): """Initialize Ollama LangExtract recognizer.""" actual_config_path = ( @@ -28,7 +29,7 @@ def __init__( super().__init__( config_path=actual_config_path, - name="Ollama LangExtract PII", + name=name, supported_language=supported_language, extract_params={ "extract": { @@ -40,7 +41,7 @@ def __init__( "timeout": 240, "num_ctx": 8192, } - } + }, ) model_config = self.config.get("model", {}) diff --git a/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizers_loader_utils.py b/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizers_loader_utils.py index e73f64398f..e2d5c2eadb 100644 --- a/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizers_loader_utils.py +++ b/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizers_loader_utils.py @@ -117,12 +117,23 @@ def _get_recognizer_languages( @staticmethod def get_recognizer_name(recognizer_conf: Union[Dict[str, Any], str]) -> str: - """Get the name of a recognizer in the configuration. + """Get the class name for recognizer instantiation. + + Uses 'class_name' if present, otherwise 'name'. + + Logic: + - If only 'name' exists: Use 'name' as both class name (for instantiation) + and instance name (passed to __init__) + - If 'class_name' exists: Use 'class_name' for instantiation and 'name' + as the instance name (passed to __init__) :param recognizer_conf: The recognizer configuration. """ if isinstance(recognizer_conf, str): return recognizer_conf + class_name = recognizer_conf.get("class_name") + if class_name: + return class_name return recognizer_conf["name"] @staticmethod @@ -296,12 +307,10 @@ def get( recognizer_instances = [] predefined, custom = RecognizerListLoader._split_recognizers(recognizers) - predefined_to_exclude = {"enabled", "type", "supported_languages", "name"} - - # For custom recognizers, we keep 'supported_languages' - # and don't exclude 'supported_entity' - # because PatternRecognizer needs it - custom_to_exclude = {"enabled", "type"} + predefined_to_exclude = { + "enabled", "type", "supported_languages", "class_name" + } + custom_to_exclude = {"enabled", "type", "class_name"} for recognizer_conf in predefined: for language_conf in RecognizerListLoader._get_recognizer_languages( recognizer_conf=recognizer_conf, supported_languages=supported_languages @@ -318,8 +327,6 @@ def get( recognizer_name=recognizer_name ) - # Prepare kwargs, converting supported_entities - # to supported_entity if needed kwargs = RecognizerListLoader._prepare_recognizer_kwargs( new_conf, language_conf, recognizer_cls )