Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions evaluation/ai-assistant/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Thumbs.db
backend/.venv/
backend/__pycache__/
backend/**/__pycache__/

*.pyc
*.pyo

Expand Down
34 changes: 34 additions & 0 deletions evaluation/ai-assistant/backend/datasets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[
{
"id": "example-dataset",
"filename": "example_pii_dataset.csv",
"name": "Example Dataset",
"description": "10 synthetic PII records spanning healthcare, finance, HR, and customer support scenarios.",
"path": "data/example_pii_dataset.csv",
"format": "csv",
"record_count": 10,
"has_entities": true,
"columns": [
"text",
"entities"
],
"text_column": "text",
"entities_column": "entities"
},
{
"id": "upload-6ff31019",
"filename": "sample_medical_records.csv",
"name": "manual-dataset",
"description": "my custom entiteis",
"path": "/Users/ronshakutai/projects_folder/presidio/evaluation/ai-assistant/data/sample_medical_records.csv",
"format": "csv",
"record_count": 10,
"has_entities": true,
"columns": [
"text",
"entities"
],
"text_column": "text",
"entities_column": "entities"
}
]
97 changes: 97 additions & 0 deletions evaluation/ai-assistant/backend/llm_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""LLM Judge service using Azure OpenAI via LangExtract."""

from __future__ import annotations

import logging
from typing import Optional

from models import Entity

logger = logging.getLogger(__name__)

# Lazy-loaded recognizer singleton
_recognizer = None


class LLMServiceError(Exception):
"""Raised when LLM service encounters an error."""


def configure(
azure_endpoint: str,
api_key: Optional[str] = None,
deployment_name: str = "gpt-4o",
api_version: str = "2024-02-15-preview",
) -> dict:
"""Initialise the Azure OpenAI LangExtract recognizer.

:param azure_endpoint: Azure OpenAI endpoint URL.
:param api_key: API key (or None for managed identity).
:param deployment_name: Azure deployment / model name.
:param api_version: Azure OpenAI API version.
:returns: Status dict.
"""
global _recognizer

try:
from presidio_analyzer.predefined_recognizers.third_party.azure_openai_langextract_recognizer import ( # noqa: E501
AzureOpenAILangExtractRecognizer,
)
except ImportError as exc:
raise LLMServiceError(
"langextract or presidio-analyzer is not installed. "
"Run: pip install langextract presidio-analyzer"
) from exc

try:
_recognizer = AzureOpenAILangExtractRecognizer(
model_id=deployment_name,
azure_endpoint=azure_endpoint,
api_key=api_key,
api_version=api_version,
)
except Exception as exc:
_recognizer = None
raise LLMServiceError(f"Failed to initialise recognizer: {exc}") from exc

logger.info(
"LLM Judge configured: endpoint=%s deployment=%s",
azure_endpoint,
deployment_name,
)
return {"status": "configured", "deployment": deployment_name}


def is_configured() -> bool:
"""Return True if the recognizer has been initialised."""
return _recognizer is not None


def disconnect() -> None:
"""Reset the recognizer so a new model can be configured."""
global _recognizer
_recognizer = None
logger.info("LLM Judge disconnected")


def analyze_text(text: str) -> list[Entity]:
"""Run the LLM recognizer on a single text and return Entity objects."""
if _recognizer is None:
raise LLMServiceError(
"LLM service not configured. Call /api/llm/configure first."
)

results = _recognizer.analyze(text=text, entities=None)

entities: list[Entity] = []
for r in results:
entities.append(
Entity(
text=text[r.start:r.end],
entity_type=r.entity_type,
start=r.start,
end=r.end,
score=round(r.score, 4),
)
)
return entities
12 changes: 9 additions & 3 deletions evaluation/ai-assistant/backend/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from routers import analysis, datasets, decision, evaluation, review, sampling, upload
from routers import (
decision,
evaluation,
llm,
review,
sampling,
upload,
)

app = FastAPI(title="Presidio Evaluation Flow API", version="0.1.0")

Expand All @@ -11,13 +18,12 @@
allow_headers=["*"],
)

app.include_router(datasets.router)
app.include_router(upload.router)
app.include_router(sampling.router)
app.include_router(analysis.router)
app.include_router(review.router)
app.include_router(evaluation.router)
app.include_router(decision.router)
app.include_router(llm.router)


@app.get("/api/health")
Expand Down
Loading