microsoft · RonShakutai · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
diff --git a/evaluation/ai-assistant/.gitignore b/evaluation/ai-assistant/.gitignore
@@ -18,6 +18,7 @@ Thumbs.db
 backend/.venv/
 backend/__pycache__/
 backend/**/__pycache__/
+
 *.pyc
 *.pyo
 

diff --git a/evaluation/ai-assistant/backend/datasets.json b/evaluation/ai-assistant/backend/datasets.json
@@ -0,0 +1,34 @@
+[
+  {
+    "id": "example-dataset",
+    "filename": "example_pii_dataset.csv",
+    "name": "Example Dataset",
+    "description": "10 synthetic PII records spanning healthcare, finance, HR, and customer support scenarios.",
+    "path": "data/example_pii_dataset.csv",
+    "format": "csv",
+    "record_count": 10,
+    "has_entities": true,
+    "columns": [
+      "text",
+      "entities"
+    ],
+    "text_column": "text",
+    "entities_column": "entities"
+  },
+  {
+    "id": "upload-6ff31019",
+    "filename": "sample_medical_records.csv",
+    "name": "manual-dataset",
+    "description": "my custom entiteis",
+    "path": "/Users/ronshakutai/projects_folder/presidio/evaluation/ai-assistant/data/sample_medical_records.csv",
+    "format": "csv",
+    "record_count": 10,
+    "has_entities": true,
+    "columns": [
+      "text",
+      "entities"
+    ],
+    "text_column": "text",
+    "entities_column": "entities"
+  }
+]
diff --git a/evaluation/ai-assistant/backend/llm_service.py b/evaluation/ai-assistant/backend/llm_service.py
@@ -0,0 +1,97 @@
+"""LLM Judge service using Azure OpenAI via LangExtract."""
+
+from __future__ import annotations
+
+import logging
+from typing import Optional
+
+from models import Entity
+
+logger = logging.getLogger(__name__)
+
+# Lazy-loaded recognizer singleton
+_recognizer = None
+
+
+class LLMServiceError(Exception):
+    """Raised when LLM service encounters an error."""
+
+
+def configure(
+    azure_endpoint: str,
+    api_key: Optional[str] = None,
+    deployment_name: str = "gpt-4o",
+    api_version: str = "2024-02-15-preview",
+) -> dict:
+    """Initialise the Azure OpenAI LangExtract recognizer.
+
+    :param azure_endpoint: Azure OpenAI endpoint URL.
+    :param api_key: API key (or None for managed identity).
+    :param deployment_name: Azure deployment / model name.
+    :param api_version: Azure OpenAI API version.
+    :returns: Status dict.
+    """
+    global _recognizer
+
+    try:
+        from presidio_analyzer.predefined_recognizers.third_party.azure_openai_langextract_recognizer import (  # noqa: E501
+            AzureOpenAILangExtractRecognizer,
+        )
+    except ImportError as exc:
+        raise LLMServiceError(
+            "langextract or presidio-analyzer is not installed. "
+            "Run: pip install langextract presidio-analyzer"
+        ) from exc
+
+    try:
+        _recognizer = AzureOpenAILangExtractRecognizer(
+            model_id=deployment_name,
+            azure_endpoint=azure_endpoint,
+            api_key=api_key,
+            api_version=api_version,
+        )
+    except Exception as exc:
+        _recognizer = None
+        raise LLMServiceError(f"Failed to initialise recognizer: {exc}") from exc
+
+    logger.info(
+        "LLM Judge configured: endpoint=%s deployment=%s",
+        azure_endpoint,
+        deployment_name,
+    )
+    return {"status": "configured", "deployment": deployment_name}
+
+
+def is_configured() -> bool:
+    """Return True if the recognizer has been initialised."""
+    return _recognizer is not None
+
+
+def disconnect() -> None:
+    """Reset the recognizer so a new model can be configured."""
+    global _recognizer
+    _recognizer = None
+    logger.info("LLM Judge disconnected")
+
+
+def analyze_text(text: str) -> list[Entity]:
+    """Run the LLM recognizer on a single text and return Entity objects."""
+    if _recognizer is None:
+        raise LLMServiceError(
+            "LLM service not configured. Call /api/llm/configure first."
+        )
+
+    results = _recognizer.analyze(text=text, entities=None)
+
+    entities: list[Entity] = []
+    for r in results:
+        entities.append(
+            Entity(
+                text=text[r.start:r.end],
+                entity_type=r.entity_type,
+                start=r.start,
+                end=r.end,
+                score=round(r.score, 4),
+            )
+        )
+    return entities
diff --git a/evaluation/ai-assistant/backend/main.py b/evaluation/ai-assistant/backend/main.py
@@ -1,6 +1,13 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from routers import analysis, datasets, decision, evaluation, review, sampling, upload
+from routers import (
+    decision,
+    evaluation,
+    llm,
+    review,
+    sampling,
+    upload,
+)
 
 app = FastAPI(title="Presidio Evaluation Flow API", version="0.1.0")
 
@@ -11,13 +18,12 @@
     allow_headers=["*"],
 )
 
-app.include_router(datasets.router)
 app.include_router(upload.router)
 app.include_router(sampling.router)
-app.include_router(analysis.router)
 app.include_router(review.router)
 app.include_router(evaluation.router)
 app.include_router(decision.router)
+app.include_router(llm.router)
 
 
 @app.get("/api/health")
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,6 +18,7 @@ Thumbs.db @@
     backend/.venv/
     backend/__pycache__/
     backend/**/__pycache__/
     *.pyc
     *.pyo
@@ Expand Down @@