diff --git a/evaluation/ai-assistant/.gitignore b/evaluation/ai-assistant/.gitignore index 82aae09a9..8ee1b3847 100644 --- a/evaluation/ai-assistant/.gitignore +++ b/evaluation/ai-assistant/.gitignore @@ -18,6 +18,7 @@ Thumbs.db backend/.venv/ backend/__pycache__/ backend/**/__pycache__/ + *.pyc *.pyo diff --git a/evaluation/ai-assistant/backend/datasets.json b/evaluation/ai-assistant/backend/datasets.json new file mode 100644 index 000000000..220be7885 --- /dev/null +++ b/evaluation/ai-assistant/backend/datasets.json @@ -0,0 +1,34 @@ +[ + { + "id": "example-dataset", + "filename": "example_pii_dataset.csv", + "name": "Example Dataset", + "description": "10 synthetic PII records spanning healthcare, finance, HR, and customer support scenarios.", + "path": "data/example_pii_dataset.csv", + "format": "csv", + "record_count": 10, + "has_entities": true, + "columns": [ + "text", + "entities" + ], + "text_column": "text", + "entities_column": "entities" + }, + { + "id": "upload-6ff31019", + "filename": "sample_medical_records.csv", + "name": "manual-dataset", + "description": "my custom entiteis", + "path": "/Users/ronshakutai/projects_folder/presidio/evaluation/ai-assistant/data/sample_medical_records.csv", + "format": "csv", + "record_count": 10, + "has_entities": true, + "columns": [ + "text", + "entities" + ], + "text_column": "text", + "entities_column": "entities" + } +] \ No newline at end of file diff --git a/evaluation/ai-assistant/backend/llm_service.py b/evaluation/ai-assistant/backend/llm_service.py new file mode 100644 index 000000000..ec05981cd --- /dev/null +++ b/evaluation/ai-assistant/backend/llm_service.py @@ -0,0 +1,97 @@ +"""LLM Judge service using Azure OpenAI via LangExtract.""" + +from __future__ import annotations + +import logging +from typing import Optional + +from models import Entity + +logger = logging.getLogger(__name__) + +# Lazy-loaded recognizer singleton +_recognizer = None + + +class LLMServiceError(Exception): + """Raised when LLM service encounters an error.""" + + +def configure( + azure_endpoint: str, + api_key: Optional[str] = None, + deployment_name: str = "gpt-4o", + api_version: str = "2024-02-15-preview", +) -> dict: + """Initialise the Azure OpenAI LangExtract recognizer. + + :param azure_endpoint: Azure OpenAI endpoint URL. + :param api_key: API key (or None for managed identity). + :param deployment_name: Azure deployment / model name. + :param api_version: Azure OpenAI API version. + :returns: Status dict. + """ + global _recognizer + + try: + from presidio_analyzer.predefined_recognizers.third_party.azure_openai_langextract_recognizer import ( # noqa: E501 + AzureOpenAILangExtractRecognizer, + ) + except ImportError as exc: + raise LLMServiceError( + "langextract or presidio-analyzer is not installed. " + "Run: pip install langextract presidio-analyzer" + ) from exc + + try: + _recognizer = AzureOpenAILangExtractRecognizer( + model_id=deployment_name, + azure_endpoint=azure_endpoint, + api_key=api_key, + api_version=api_version, + ) + except Exception as exc: + _recognizer = None + raise LLMServiceError(f"Failed to initialise recognizer: {exc}") from exc + + logger.info( + "LLM Judge configured: endpoint=%s deployment=%s", + azure_endpoint, + deployment_name, + ) + return {"status": "configured", "deployment": deployment_name} + + +def is_configured() -> bool: + """Return True if the recognizer has been initialised.""" + return _recognizer is not None + + +def disconnect() -> None: + """Reset the recognizer so a new model can be configured.""" + global _recognizer + _recognizer = None + logger.info("LLM Judge disconnected") + + +def analyze_text(text: str) -> list[Entity]: + """Run the LLM recognizer on a single text and return Entity objects.""" + if _recognizer is None: + raise LLMServiceError( + "LLM service not configured. Call /api/llm/configure first." + ) + + results = _recognizer.analyze(text=text, entities=None) + + entities: list[Entity] = [] + for r in results: + entities.append( + Entity( + text=text[r.start:r.end], + entity_type=r.entity_type, + start=r.start, + end=r.end, + score=round(r.score, 4), + ) + ) + return entities diff --git a/evaluation/ai-assistant/backend/main.py b/evaluation/ai-assistant/backend/main.py index 191cb0d2e..1b3450fc0 100644 --- a/evaluation/ai-assistant/backend/main.py +++ b/evaluation/ai-assistant/backend/main.py @@ -1,6 +1,13 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware -from routers import analysis, datasets, decision, evaluation, review, sampling, upload +from routers import ( + decision, + evaluation, + llm, + review, + sampling, + upload, +) app = FastAPI(title="Presidio Evaluation Flow API", version="0.1.0") @@ -11,13 +18,12 @@ allow_headers=["*"], ) -app.include_router(datasets.router) app.include_router(upload.router) app.include_router(sampling.router) -app.include_router(analysis.router) app.include_router(review.router) app.include_router(evaluation.router) app.include_router(decision.router) +app.include_router(llm.router) @app.get("/api/health") diff --git a/evaluation/ai-assistant/backend/mock_data.py b/evaluation/ai-assistant/backend/mock_data.py index 2c327428f..d5affc7e0 100644 --- a/evaluation/ai-assistant/backend/mock_data.py +++ b/evaluation/ai-assistant/backend/mock_data.py @@ -1,279 +1,16 @@ -"""Mock data mirroring the frontend for demo / development purposes.""" +"""Mock data for evaluation / decision stages only.""" from datetime import datetime from models import ( - Dataset, - DatasetType, Entity, EntityMiss, EvaluationMetrics, EvaluationRun, MissType, - Record, RiskLevel, ) -DATASETS: list[Dataset] = [ - Dataset( - id="ds-001", - name="Patient Records 2025", - type=DatasetType.customer, - record_count=15000, - description="Electronic health records from Q4 2025", - ), - Dataset( - id="ds-002", - name="Customer Support Tickets", - type=DatasetType.customer, - record_count=8500, - description="Support conversations with PII", - ), - Dataset( - id="ds-003", - name="Internal HR Data", - type=DatasetType.internal, - record_count=2300, - description="Employee records and performance reviews", - ), - Dataset( - id="ds-004", - name="Financial Transaction Logs", - type=DatasetType.customer, - record_count=42000, - description="Payment and billing information", - ), -] - -RECORDS: list[Record] = [ - Record( - id="rec-001", - text=( - "Patient John Smith, DOB 03/15/1985, was admitted on 2025-01-10. " - "Contact: john.smith@email.com, Phone: 555-0123. SSN: 123-45-6789." - ), - presidio_entities=[ - Entity( - text="John Smith", entity_type="PERSON", start=8, end=18, score=0.95 - ), - Entity( - text="03/15/1985", - entity_type="DATE_OF_BIRTH", - start=24, - end=34, - score=0.92, - ), - Entity( - text="john.smith@email.com", - entity_type="EMAIL", - start=77, - end=97, - score=0.98, - ), - Entity( - text="555-0123", - entity_type="PHONE_NUMBER", - start=106, - end=114, - score=0.89, - ), - Entity( - text="123-45-6789", entity_type="US_SSN", start=121, end=132, score=0.99 - ), - ], - llm_entities=[ - Entity( - text="John Smith", entity_type="PERSON", start=8, end=18, score=0.96 - ), - Entity( - text="03/15/1985", - entity_type="DATE_OF_BIRTH", - start=24, - end=34, - score=0.94, - ), - Entity(text="2025-01-10", entity_type="DATE", start=52, end=62, score=0.88), - Entity( - text="john.smith@email.com", - entity_type="EMAIL", - start=77, - end=97, - score=0.97, - ), - Entity( - text="555-0123", - entity_type="PHONE_NUMBER", - start=106, - end=114, - score=0.91, - ), - Entity( - text="123-45-6789", entity_type="US_SSN", start=121, end=132, score=0.98 - ), - ], - ), - Record( - id="rec-002", - text=( - "Dr. Sarah Johnson reviewed the case. " - "Medical Record #MR-445521. Insurance Policy: POL-8821-USA." - ), - presidio_entities=[ - Entity( - text="Sarah Johnson", entity_type="PERSON", start=4, end=17, score=0.93 - ), - Entity( - text="MR-445521", - entity_type="MEDICAL_RECORD", - start=55, - end=64, - score=0.87, - ), - ], - llm_entities=[ - Entity( - text="Dr. Sarah Johnson", - entity_type="PERSON", - start=0, - end=17, - score=0.95, - ), - Entity( - text="MR-445521", - entity_type="MEDICAL_RECORD", - start=55, - end=64, - score=0.89, - ), - Entity( - text="POL-8821-USA", - entity_type="INSURANCE_POLICY", - start=84, - end=96, - score=0.82, - ), - ], - ), - Record( - id="rec-003", - text=( - "Employee ID: EMP-8821, Jane Doe, started 2023-06-01. " - "Salary: $85,000. Emergency contact: Mike Doe at 555-9876." - ), - presidio_entities=[ - Entity( - text="EMP-8821", entity_type="EMPLOYEE_ID", start=13, end=21, score=0.91 - ), - Entity(text="Jane Doe", entity_type="PERSON", start=23, end=31, score=0.94), - Entity(text="2023-06-01", entity_type="DATE", start=41, end=51, score=0.96), - Entity(text="Mike Doe", entity_type="PERSON", start=89, end=97, score=0.92), - Entity( - text="555-9876", - entity_type="PHONE_NUMBER", - start=101, - end=109, - score=0.88, - ), - ], - llm_entities=[ - Entity( - text="EMP-8821", entity_type="EMPLOYEE_ID", start=13, end=21, score=0.90 - ), - Entity(text="Jane Doe", entity_type="PERSON", start=23, end=31, score=0.96), - Entity(text="2023-06-01", entity_type="DATE", start=41, end=51, score=0.94), - Entity(text="$85,000", entity_type="SALARY", start=61, end=68, score=0.79), - Entity(text="Mike Doe", entity_type="PERSON", start=89, end=97, score=0.93), - Entity( - text="555-9876", - entity_type="PHONE_NUMBER", - start=101, - end=109, - score=0.90, - ), - ], - ), - Record( - id="rec-004", - text=( - "Credit card ending in 4532 was used for transaction. " - "Customer: alice.wong@company.com. IP: 192.168.1.100" - ), - presidio_entities=[ - Entity( - text="4532", entity_type="CREDIT_CARD", start=22, end=26, score=0.65 - ), - Entity( - text="alice.wong@company.com", - entity_type="EMAIL", - start=64, - end=86, - score=0.97, - ), - Entity( - text="192.168.1.100", - entity_type="IP_ADDRESS", - start=92, - end=105, - score=0.99, - ), - ], - llm_entities=[ - Entity( - text="alice.wong@company.com", - entity_type="EMAIL", - start=64, - end=86, - score=0.98, - ), - Entity( - text="192.168.1.100", - entity_type="IP_ADDRESS", - start=92, - end=105, - score=0.97, - ), - ], - ), - Record( - id="rec-005", - text=( - "Prescription for Robert Chen: Medication ABC-123, dosage 50mg. " - "Doctor notes indicate history of diabetes." - ), - presidio_entities=[ - Entity( - text="Robert Chen", entity_type="PERSON", start=17, end=28, score=0.94 - ), - Entity( - text="ABC-123", - entity_type="MEDICATION_CODE", - start=41, - end=48, - score=0.71, - ), - ], - llm_entities=[ - Entity( - text="Robert Chen", entity_type="PERSON", start=17, end=28, score=0.95 - ), - Entity( - text="ABC-123", - entity_type="MEDICATION_CODE", - start=41, - end=48, - score=0.73, - ), - Entity( - text="diabetes", - entity_type="MEDICAL_CONDITION", - start=97, - end=105, - score=0.86, - ), - ], - ), -] - EVALUATION_RUNS: list[EvaluationRun] = [ EvaluationRun( id="run-001", diff --git a/evaluation/ai-assistant/backend/models.py b/evaluation/ai-assistant/backend/models.py index cd8f2a140..f2074c208 100644 --- a/evaluation/ai-assistant/backend/models.py +++ b/evaluation/ai-assistant/backend/models.py @@ -112,6 +112,8 @@ class DatasetLoadRequest(BaseModel): format: str # "csv" | "json" text_column: str = "text" entities_column: str | None = None + name: str | None = None # user-friendly display name + description: str | None = None # optional description class UploadedDataset(BaseModel): @@ -119,10 +121,21 @@ class UploadedDataset(BaseModel): id: str filename: str + name: str # user-friendly display name + description: str = "" # optional user-provided description + path: str # absolute file path format: str # "csv" | "json" record_count: int has_entities: bool columns: list[str] + text_column: str = "text" + entities_column: str | None = None + + +class DatasetRenameRequest(BaseModel): + """Request to rename a saved dataset.""" + + name: str class SetupConfig(BaseModel): @@ -182,3 +195,9 @@ class DecisionRequest(BaseModel): decision: DecisionType notes: str = "" selected_improvements: list[str] = [] + + +class LLMConfig(BaseModel): + """LLM Judge configuration — only deployment is chosen in the UI.""" + + deployment_name: str = "gpt-4o" diff --git a/evaluation/ai-assistant/backend/pyproject.toml b/evaluation/ai-assistant/backend/pyproject.toml index f0bb6035d..585b090be 100644 --- a/evaluation/ai-assistant/backend/pyproject.toml +++ b/evaluation/ai-assistant/backend/pyproject.toml @@ -5,13 +5,17 @@ description = "Backend API for Presidio Evaluation Flow" package-mode = false [tool.poetry.dependencies] -python = "^3.9" +python = ">=3.10,<3.14" fastapi = ">=0.115.0" uvicorn = { version = ">=0.32.0", extras = ["standard"] } pydantic = ">=2.0.0" python-multipart = ">=0.0.9" pandas = ">=2.0.0" scikit-learn = ">=1.3.0" +langextract = ">=0.1.0" +openai = ">=1.0.0" +presidio-analyzer = { path = "../../../presidio-analyzer", develop = true } +azure-identity = ">=1.15.0" [build-system] requires = ["poetry-core"] diff --git a/evaluation/ai-assistant/backend/routers/EntityComparison.tsx b/evaluation/ai-assistant/backend/routers/EntityComparison.tsx new file mode 100644 index 000000000..7e1fa3f58 --- /dev/null +++ b/evaluation/ai-assistant/backend/routers/EntityComparison.tsx @@ -0,0 +1,372 @@ +import { useState } from 'react'; +import { Card } from './ui/card'; +import { Button } from './ui/button'; +import { Badge } from './ui/badge'; +import { Input } from './ui/input'; +import { Label } from './ui/label'; +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from './ui/select'; +import { Collapsible, CollapsibleContent, CollapsibleTrigger } from './ui/collapsible'; +import { CheckCircle, XCircle, Edit, AlertTriangle, Check, X, ChevronDown, FileText } from 'lucide-react'; +import type { Entity } from '../types'; + +interface EntityComparisonProps { + recordId: string; + recordText: string; + presidioEntities: Entity[]; + llmEntities: Entity[]; + datasetEntities?: Entity[]; + onConfirm: (recordId: string, entity: Entity, source: 'presidio' | 'llm' | 'manual' | 'dataset') => void; + onReject: (recordId: string, entity: Entity, source: 'presidio' | 'llm' | 'dataset') => void; + onAddManual: (recordId: string, entity: Entity) => void; +} + +type EntityStatus = 'match' | 'conflict' | 'presidio-only' | 'llm-only' | 'predefined-only' + | 'presidio+predefined' | 'presidio+llm' | 'predefined+llm' | 'pending'; + +interface AnnotatedEntity extends Entity { + status: EntityStatus; + sources: ('presidio' | 'llm' | 'predefined')[]; + confirmed?: boolean; +} + +export function EntityComparison({ + recordId, + recordText, + presidioEntities = [], + llmEntities = [], + datasetEntities = [], + onConfirm, + onReject, + onAddManual, +}: EntityComparisonProps) { + const [showAddManual, setShowAddManual] = useState(false); + const [manualEntity, setManualEntity] = useState({ text: '', entity_type: '', start: 0, end: 0 }); + const [confirmedEntities, setConfirmedEntities] = useState>(new Set()); + const [rejectedEntities, setRejectedEntities] = useState>(new Set()); + const [expandedContexts, setExpandedContexts] = useState>(new Set()); + + // Combine and classify entities from all three sources + const annotatedEntities: AnnotatedEntity[] = []; + + // Two spans overlap if one starts before the other ends + const spansOverlap = (a: Entity, b: Entity) => + a.start < b.end && b.start < a.end; + + // Build a unified list: for each unique span, track which sources detected it + interface SpanEntry { entity: Entity; sources: Set<'presidio' | 'llm' | 'predefined'>; types: Map } + const spans: SpanEntry[] = []; + + const addToSpans = (entity: Entity, source: 'presidio' | 'llm' | 'predefined') => { + const existing = spans.find(s => spansOverlap(s.entity, entity)); + if (existing) { + existing.sources.add(source); + existing.types.set(source, entity.entity_type); + // Prefer the entity with more text or higher score + if (entity.text.length > existing.entity.text.length) { + existing.entity = { ...entity }; + } + } else { + const types = new Map(); + types.set(source, entity.entity_type); + spans.push({ entity: { ...entity }, sources: new Set([source]), types }); + } + }; + + presidioEntities.forEach(e => addToSpans(e, 'presidio')); + datasetEntities.forEach(e => addToSpans(e, 'predefined')); + llmEntities.forEach(e => addToSpans(e, 'llm')); + + spans.forEach(({ entity, sources, types }) => { + const sourceList = Array.from(sources) as ('presidio' | 'llm' | 'predefined')[]; + const uniqueTypes = new Set(types.values()); + const allAgree = uniqueTypes.size === 1; + + let status: EntityStatus; + if (sourceList.length >= 2 && allAgree) { + status = 'match'; + } else if (sourceList.length >= 2 && !allAgree) { + status = 'conflict'; + } else if (sourceList.length === 1) { + const s = sourceList[0]; + status = s === 'presidio' ? 'presidio-only' : s === 'llm' ? 'llm-only' : 'predefined-only'; + } else { + status = 'pending'; + } + + // For two-source non-match conflicts, use specific labels + if (sourceList.length === 2 && status !== 'match') { + const key = sourceList.sort().join('+'); + if (key === 'predefined+presidio') status = 'presidio+predefined'; + else if (key === 'llm+presidio') status = 'presidio+llm'; + else if (key === 'llm+predefined') status = 'predefined+llm'; + } + + annotatedEntities.push({ ...entity, status, sources: sourceList }); + }); + + const getEntityKey = (entity: Entity) => `${entity.text}-${entity.start}-${entity.end}`; + + const getContextForEntity = (entity: Entity) => { + const CONTEXT_CHARS = 150; + // Use indexOf for robust highlighting regardless of position accuracy + const idx = recordText.indexOf(entity.text); + const entityStart = idx >= 0 ? idx : entity.start; + const entityEnd = idx >= 0 ? idx + entity.text.length : entity.end; + + const start = Math.max(0, entityStart - CONTEXT_CHARS); + const end = Math.min(recordText.length, entityEnd + CONTEXT_CHARS); + + const before = recordText.substring(start, entityStart); + const entityText = recordText.substring(entityStart, entityEnd); + const after = recordText.substring(entityEnd, end); + + return { + before: (start > 0 ? '...' : '') + before, + entity: entityText, + after: after + (end < recordText.length ? '...' : ''), + }; + }; + + const toggleContext = (key: string) => { + setExpandedContexts(prev => { + const newSet = new Set(prev); + if (newSet.has(key)) { + newSet.delete(key); + } else { + newSet.add(key); + } + return newSet; + }); + }; + + const handleConfirmEntity = (entity: AnnotatedEntity) => { + const key = getEntityKey(entity); + setConfirmedEntities(new Set([...confirmedEntities, key])); + setRejectedEntities(prev => { + const newSet = new Set(prev); + newSet.delete(key); + return newSet; + }); + onConfirm(recordId, entity, entity.sources[0] === 'predefined' ? 'dataset' : entity.sources[0]); + }; + + const handleRejectEntity = (entity: AnnotatedEntity) => { + const key = getEntityKey(entity); + setRejectedEntities(new Set([...rejectedEntities, key])); + setConfirmedEntities(prev => { + const newSet = new Set(prev); + newSet.delete(key); + return newSet; + }); + onReject(recordId, entity, entity.sources[0] === 'predefined' ? 'dataset' : entity.sources[0]); + }; + + const handleAddManualEntity = () => { + if (manualEntity.text && manualEntity.entity_type) { + onAddManual(recordId, manualEntity); + setManualEntity({ text: '', entity_type: '', start: 0, end: 0 }); + setShowAddManual(false); + } + }; + + const getStatusBadge = (status: EntityStatus, confirmed?: boolean, rejected?: boolean) => { + if (confirmed) { + return Confirmed; + } + if (rejected) { + return Rejected; + } + + switch (status) { + case 'match': + return ✓ Match; + case 'conflict': + return ⚠ Conflict; + case 'presidio-only': + return Presidio; + case 'llm-only': + return LLM Judge; + case 'predefined-only': + return Predefined; + case 'presidio+predefined': + return Presidio + Predefined; + case 'presidio+llm': + return Presidio + LLM Judge; + case 'predefined+llm': + return Predefined + LLM Judge; + default: + return Pending; + } + }; + + return ( + +
+ {/* Record Text */} +
+ +
+ {recordText} +
+
+ + {/* Entities List */} +
+
+ + +
+ + {/* Manual Add Form */} + {showAddManual && ( +
+
+
+ + setManualEntity({ ...manualEntity, text: e.target.value })} + placeholder="Enter entity text..." + /> +
+
+ + +
+
+
+ + +
+
+ )} + + {/* Entity Cards */} +
+ {annotatedEntities.map((entity, index) => { + const key = getEntityKey(entity); + const isConfirmed = confirmedEntities.has(key); + const isRejected = rejectedEntities.has(key); + + return ( +
+
+
+
+ {entity.text} + {entity.entity_type} + {getStatusBadge(entity.status, isConfirmed, isRejected)} +
+ +
+ Position: {entity.start}-{entity.end} + {entity.score && Confidence: {(entity.score * 100).toFixed(0)}%} + {entity.status === 'conflict' && ( +
+ + Type mismatch between Presidio and LLM +
+ )} +
+
+ + {!isConfirmed && !isRejected && ( +
+ + +
+ )} +
+ + {/* Context Collapsible */} + toggleContext(key)}> + + + + +
+
+ + Surrounding Context: +
+
+ {getContextForEntity(entity).before} + {getContextForEntity(entity).entity} + {getContextForEntity(entity).after} +
+
+
+
+
+ ); + })} +
+
+ + {/* Summary */} +
+
+
+ {confirmedEntities.size} Confirmed +
+
+
+ {rejectedEntities.size} Rejected +
+
+
+ {annotatedEntities.length - confirmedEntities.size - rejectedEntities.size} Pending +
+
+
+ + ); +} diff --git a/evaluation/ai-assistant/backend/routers/analysis.py b/evaluation/ai-assistant/backend/routers/analysis.py deleted file mode 100644 index 9b6c6563e..000000000 --- a/evaluation/ai-assistant/backend/routers/analysis.py +++ /dev/null @@ -1,86 +0,0 @@ -import asyncio - -from fastapi import APIRouter -from mock_data import RECORDS -from models import AnalysisStatus, Record - -router = APIRouter(prefix="/api/analysis", tags=["analysis"]) - -# In-memory state for the current analysis run -_state: dict = { - "presidio_progress": 0, - "llm_progress": 0, - "running": False, -} - - -@router.post("/start") -async def start_analysis(): - """Kick off parallel Presidio + LLM analysis (simulated).""" - _state["presidio_progress"] = 0 - _state["llm_progress"] = 0 - _state["running"] = True - - async def _simulate(): - while _state["presidio_progress"] < 100 or _state["llm_progress"] < 100: - if _state["presidio_progress"] < 100: - _state["presidio_progress"] = min(100, _state["presidio_progress"] + 4) - if _state["llm_progress"] < 100: - _state["llm_progress"] = min(100, _state["llm_progress"] + 3) - await asyncio.sleep(0.2) - _state["running"] = False - - asyncio.create_task(_simulate()) - return {"status": "started"} - - -@router.get("/status", response_model=AnalysisStatus) -async def get_analysis_status(): - """Return current analysis progress.""" - presidio_done = _state["presidio_progress"] >= 100 - llm_done = _state["llm_progress"] >= 100 - - result = AnalysisStatus( - presidio_progress=_state["presidio_progress"], - llm_progress=_state["llm_progress"], - presidio_complete=presidio_done, - llm_complete=llm_done, - ) - - if presidio_done: - result.presidio_stats = { - "records": 500, - "entities": 1247, - "types": 12, - "avg_confidence": 91, - } - if llm_done: - result.llm_stats = { - "records": 500, - "entities": 1312, - "additional": 65, - "avg_confidence": 87, - } - if presidio_done and llm_done: - result.comparison = { - "matches": 1182, - "conflicts": 47, - "llm_only": 65, - "presidio_only": 18, - } - return result - - -@router.get("/records", response_model=list[Record]) -async def get_records(): - """Return all records with their detected entities.""" - return RECORDS - - -@router.get("/records/{record_id}", response_model=Record) -async def get_record(record_id: str): - """Return a single record by ID.""" - for rec in RECORDS: - if rec.id == record_id: - return rec - return {"error": "not found"} diff --git a/evaluation/ai-assistant/backend/routers/datasets.py b/evaluation/ai-assistant/backend/routers/datasets.py deleted file mode 100644 index a5645f97e..000000000 --- a/evaluation/ai-assistant/backend/routers/datasets.py +++ /dev/null @@ -1,20 +0,0 @@ -from fastapi import APIRouter, HTTPException -from mock_data import DATASETS -from models import Dataset - -router = APIRouter(prefix="/api/datasets", tags=["datasets"]) - - -@router.get("", response_model=list[Dataset]) -async def list_datasets(): - """List all available datasets.""" - return DATASETS - - -@router.get("/{dataset_id}", response_model=Dataset) -async def get_dataset(dataset_id: str): - """Get a dataset by ID.""" - for ds in DATASETS: - if ds.id == dataset_id: - return ds - raise HTTPException(status_code=404, detail="Dataset not found") diff --git a/evaluation/ai-assistant/backend/routers/llm.py b/evaluation/ai-assistant/backend/routers/llm.py new file mode 100644 index 000000000..21e79c8a3 --- /dev/null +++ b/evaluation/ai-assistant/backend/routers/llm.py @@ -0,0 +1,193 @@ +"""LLM Judge router — configure and run LLM-based PII detection.""" + +from __future__ import annotations + +import asyncio +import logging + +import llm_service +from fastapi import APIRouter, HTTPException +from models import LLMConfig +from settings import MODEL_CHOICES, load_from_env + +from routers import sampling as sampling_router + +router = APIRouter(prefix="/api/llm", tags=["llm"]) +logger = logging.getLogger(__name__) + +# Currently selected deployment (persisted across page reloads while server lives) +_selected_deployment: str | None = None + +# In-memory state for the current LLM analysis run +_state: dict = { + "progress": 0, + "total": 0, + "running": False, + "error": None, + "results": {}, # record_id -> list[Entity dict] +} + + +def _env_is_ready() -> bool: + """Check if .env has the required Azure endpoint.""" + env = load_from_env() + return bool(env.azure_endpoint) + + +# ── Model catalogue ────────────────────────────────────── + + +@router.get("/models") +async def list_models(): + """Return available model choices for the UI dropdown.""" + return MODEL_CHOICES + + +# ── Settings ───────────────────────────────────────────── + + +@router.get("/settings") +async def get_settings(): + """Return current LLM Judge configuration (no secrets).""" + env = load_from_env() + return { + "env_ready": bool(env.azure_endpoint), + "has_endpoint": bool(env.azure_endpoint), + "has_api_key": bool(env.azure_api_key), + "auth_method": "api_key" if env.azure_api_key else "default_credential", + "deployment_name": _selected_deployment or env.deployment_name, + "configured": llm_service.is_configured(), + } + + +@router.post("/configure") +async def configure_llm(config: LLMConfig): + """Configure the LLM recognizer using .env credentials + chosen deployment.""" + global _selected_deployment + + env = load_from_env() + if not env.azure_endpoint: + raise HTTPException( + status_code=400, + detail=( + "PRESIDIO_EVAL_AZURE_ENDPOINT must be set in backend/.env " + "before configuring the LLM Judge." + ), + ) + + # Validate the chosen deployment is in our allowed list + allowed_ids = {m["id"] for m in MODEL_CHOICES} + deployment = config.deployment_name + if deployment not in allowed_ids: + raise HTTPException( + status_code=400, + detail=f"Deployment '{deployment}' is not in the allowed list.", + ) + + try: + result = llm_service.configure( + azure_endpoint=env.azure_endpoint, + api_key=env.azure_api_key, # None → DefaultAzureCredential + deployment_name=deployment, + api_version=env.api_version, + ) + except llm_service.LLMServiceError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + _selected_deployment = deployment + return result + + +@router.post("/disconnect") +async def disconnect_llm(): + """Disconnect the LLM recognizer and reset analysis state.""" + global _selected_deployment + llm_service.disconnect() + _selected_deployment = None + _state["progress"] = 0 + _state["total"] = 0 + _state["running"] = False + _state["error"] = None + _state["results"] = {} + return {"status": "disconnected"} + + +# ── Status / analysis ──────────────────────────────────── + + +@router.get("/status") +async def get_llm_status(): + """Return LLM configuration and analysis status.""" + return { + "configured": llm_service.is_configured(), + "running": _state["running"], + "progress": _state["progress"], + "total": _state["total"], + "error": _state["error"], + } + + +@router.post("/analyze") +async def start_llm_analysis(): + """Run LLM analysis on all sampled records.""" + if not llm_service.is_configured(): + raise HTTPException( + status_code=400, + detail="LLM not configured. POST /api/llm/configure first.", + ) + + if _state["running"]: + raise HTTPException(status_code=409, detail="Analysis already running.") + + records = sampling_router.sampled_records + if not records: + raise HTTPException( + status_code=400, + detail="No sampled records. Run sampling first.", + ) + + _state["progress"] = 0 + _state["total"] = len(records) + _state["running"] = True + _state["error"] = None + _state["results"] = {} + + asyncio.create_task(_run_analysis()) + return {"status": "started", "total": _state["total"]} + + +async def _run_analysis(): + """Background task: analyse each sampled record via the LLM.""" + loop = asyncio.get_event_loop() + records = sampling_router.sampled_records + try: + for record in records: + try: + entities = await loop.run_in_executor( + None, llm_service.analyze_text, record.text + ) + _state["results"][record.id] = [e.model_dump() for e in entities] + except Exception: + logger.exception("LLM analysis failed for record %s", record.id) + _state["results"][record.id] = [] + _state["progress"] += 1 + except Exception as exc: + logger.exception("LLM analysis task failed") + _state["error"] = str(exc) + finally: + _state["running"] = False + + +@router.get("/results") +async def get_llm_results(): + """Return LLM entities for all analysed records.""" + return _state["results"] + + +@router.get("/results/{record_id}") +async def get_llm_record_results(record_id: str): + """Return LLM entities for a specific record.""" + entities = _state["results"].get(record_id) + if entities is None: + raise HTTPException(status_code=404, detail="Record not found in results.") + return entities diff --git a/evaluation/ai-assistant/backend/routers/review.py b/evaluation/ai-assistant/backend/routers/review.py index 5e561093c..aaa6831a9 100644 --- a/evaluation/ai-assistant/backend/routers/review.py +++ b/evaluation/ai-assistant/backend/routers/review.py @@ -1,6 +1,6 @@ from fastapi import APIRouter -from mock_data import RECORDS from models import Entity, EntityAction, Record +from routers.sampling import sampled_records router = APIRouter(prefix="/api/review", tags=["review"]) @@ -12,7 +12,7 @@ @router.get("/records", response_model=list[Record]) async def get_review_records(): """List records for human review.""" - return RECORDS + return sampled_records @router.post("/records/{record_id}/confirm") @@ -23,6 +23,11 @@ async def confirm_entity(record_id: str, action: EntityAction): return {"status": "confirmed", "record_id": record_id} +def _spans_overlap(a: Entity, b: Entity) -> bool: + """Return True if two entity spans overlap.""" + return a.start < b.end and b.start < a.end + + @router.post("/records/{record_id}/reject") async def reject_entity(record_id: str, action: EntityAction): """Reject an entity and remove it from the golden set.""" @@ -30,11 +35,7 @@ async def reject_entity(record_id: str, action: EntityAction): _golden_set[record_id] = [ e for e in entities - if not ( - e.text == action.entity.text - and e.start == action.entity.start - and e.end == action.entity.end - ) + if not _spans_overlap(e, action.entity) ] _reviewed.add(record_id) return {"status": "rejected", "record_id": record_id} @@ -52,7 +53,7 @@ async def add_manual_entity(record_id: str, action: EntityAction): @router.get("/progress") async def get_review_progress(): """Return review completion progress.""" - total = len(RECORDS) + total = len(sampled_records) reviewed = len(_reviewed) return { "total": total, diff --git a/evaluation/ai-assistant/backend/routers/upload.py b/evaluation/ai-assistant/backend/routers/upload.py index 2257be9dc..3bae9aad4 100644 --- a/evaluation/ai-assistant/backend/routers/upload.py +++ b/evaluation/ai-assistant/backend/routers/upload.py @@ -9,7 +9,13 @@ import uuid from fastapi import APIRouter, HTTPException -from models import DatasetLoadRequest, Entity, Record, UploadedDataset +from models import ( + DatasetLoadRequest, + DatasetRenameRequest, + Entity, + Record, + UploadedDataset, +) router = APIRouter(prefix="/api/datasets", tags=["datasets"]) @@ -19,6 +25,47 @@ MAX_FILE_SIZE = 50 * 1024 * 1024 # 50 MB +# Persistence file for saved datasets (next to this file → backend/datasets.json) +_DATASETS_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "datasets.json") + + +def _save_registry() -> None: + """Persist the dataset registry to disk.""" + data = [ds.model_dump() for ds in _uploaded.values()] + with open(_DATASETS_FILE, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + + +def _load_registry() -> None: + """Load previously saved datasets from disk on startup.""" + if not os.path.isfile(_DATASETS_FILE): + return + try: + with open(_DATASETS_FILE, encoding="utf-8") as f: + data = json.load(f) + for item in data: + ds = UploadedDataset(**item) + _uploaded[ds.id] = ds + except Exception: + pass # ignore corrupt file + + +# Project root (evaluation/ai-assistant/) — used to resolve relative paths +_PROJECT_ROOT = os.path.normpath( + os.path.join(os.path.dirname(__file__), "..", "..") +) + + +def _resolve_path(path: str) -> str: + """Resolve a path; relative paths are resolved against the project root.""" + if os.path.isabs(path): + return path + return os.path.normpath(os.path.join(_PROJECT_ROOT, path)) + + +# Load on import so saved datasets are available immediately +_load_registry() + def _parse_entities(raw: str | list | None) -> list[Entity]: """Parse entities from a JSON string or list.""" @@ -141,6 +188,12 @@ def _parse_json( return records, sorted(columns), has_entities +@router.get("/saved") +async def list_saved_datasets(): + """Return all saved datasets (for the dropdown).""" + return list(_uploaded.values()) + + @router.post("/load") async def load_dataset(req: DatasetLoadRequest): """Load a CSV or JSON file from a local absolute path.""" @@ -180,33 +233,86 @@ async def load_dataset(req: DatasetLoadRequest): dataset_id = f"upload-{uuid.uuid4().hex[:8]}" filename = os.path.basename(file_path) + display_name = req.name.strip() if req.name and req.name.strip() else filename + description = req.description.strip() if req.description else "" dataset = UploadedDataset( id=dataset_id, filename=filename, + name=display_name, + description=description, + path=file_path, format=req.format, record_count=len(records), has_entities=has_entities, columns=columns, + text_column=req.text_column, + entities_column=req.entities_column, ) _uploaded[dataset_id] = dataset _records[dataset_id] = records + _save_registry() return dataset +@router.patch("/{dataset_id}/rename") +async def rename_dataset(dataset_id: str, req: DatasetRenameRequest): + """Rename a saved dataset.""" + if dataset_id not in _uploaded: + raise HTTPException(status_code=404, detail="Dataset not found") + new_name = req.name.strip() + if not new_name: + raise HTTPException(status_code=400, detail="Name cannot be empty") + _uploaded[dataset_id] = _uploaded[dataset_id].model_copy( + update={"name": new_name} + ) + _save_registry() + return _uploaded[dataset_id] + + +@router.delete("/{dataset_id}") +async def delete_dataset(dataset_id: str): + """Remove a saved dataset from the registry.""" + if dataset_id not in _uploaded: + raise HTTPException(status_code=404, detail="Dataset not found") + del _uploaded[dataset_id] + _records.pop(dataset_id, None) + _save_registry() + return {"ok": True} + + +def _ensure_records_loaded(dataset_id: str) -> list[Record]: + """Reload records from the original file if not in memory.""" + if dataset_id in _records: + return _records[dataset_id] + ds = _uploaded.get(dataset_id) + if ds is None: + raise HTTPException(status_code=404, detail="Dataset not found") + resolved = _resolve_path(ds.path) + if not os.path.isfile(resolved): + raise HTTPException( + status_code=404, + detail=f"Source file no longer exists: {ds.path}", + ) + with open(resolved, encoding="utf-8") as f: + content = f.read() + if ds.format == "csv": + records, _, _ = _parse_csv(content, ds.text_column, ds.entities_column) + else: + records, _, _ = _parse_json(content, ds.text_column, ds.entities_column) + _records[dataset_id] = records + return records + + @router.get("/{dataset_id}/records") async def get_dataset_records(dataset_id: str): """Return parsed records for a loaded dataset.""" - if dataset_id not in _records: - raise HTTPException(status_code=404, detail="Dataset not found") - return _records[dataset_id] + return _ensure_records_loaded(dataset_id) @router.get("/{dataset_id}/preview") async def preview_dataset(dataset_id: str, limit: int = 5): """Return a small preview of the loaded dataset.""" - if dataset_id not in _records: - raise HTTPException(status_code=404, detail="Dataset not found") - records = _records[dataset_id][:limit] - return records + records = _ensure_records_loaded(dataset_id) + return records[:limit] diff --git a/evaluation/ai-assistant/backend/settings.py b/evaluation/ai-assistant/backend/settings.py new file mode 100644 index 000000000..585a093e4 --- /dev/null +++ b/evaluation/ai-assistant/backend/settings.py @@ -0,0 +1,66 @@ +"""Centralised settings loaded from .env with PRESIDIO_EVAL_ prefix.""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Optional + +from pydantic import BaseModel + +_ENV_FILE = Path(__file__).resolve().parent / ".env" + +# ── Available model choices shown in the UI dropdown ── +MODEL_CHOICES: list[dict[str, str]] = [ + {"id": "gpt-5.1", "label": "GPT-5.1", "provider": "Azure OpenAI"}, + { + "id": "gpt-5.2-chat", + "label": "GPT-5.2 Chat", + "provider": "Azure OpenAI", + }, + {"id": "gpt-5.4", "label": "GPT-5.4", "provider": "Azure OpenAI"}, +] + + +class EvalSettings(BaseModel): + """Runtime settings, sourced from env vars or interactive input.""" + + azure_endpoint: str = "" + azure_api_key: Optional[str] = None + deployment_name: str = "gpt-5.4" + api_version: str = "2024-02-15-preview" + + +def _load_dotenv() -> None: + """Read .env into os.environ (simple key=value parser, no dependency).""" + if not _ENV_FILE.is_file(): + return + with open(_ENV_FILE) as fh: + for line in fh: + line = line.strip() + if not line or line.startswith("#"): + continue + if "=" not in line: + continue + key, _, value = line.partition("=") + key = key.strip() + value = value.strip() + # Only set if not already in os.environ (explicit env wins) + if key not in os.environ: + os.environ[key] = value + + +def load_from_env() -> EvalSettings: + """Build settings from PRESIDIO_EVAL_* environment variables.""" + _load_dotenv() + api_key = os.environ.get("PRESIDIO_EVAL_AZURE_API_KEY") or None + return EvalSettings( + azure_endpoint=os.environ.get("PRESIDIO_EVAL_AZURE_ENDPOINT", ""), + azure_api_key=api_key, + deployment_name=os.environ.get( + "PRESIDIO_EVAL_DEPLOYMENT_NAME", "gpt-4o" + ), + api_version=os.environ.get( + "PRESIDIO_EVAL_API_VERSION", "2024-02-15-preview" + ), + ) diff --git a/evaluation/ai-assistant/data/example_pii_dataset.csv b/evaluation/ai-assistant/data/example_pii_dataset.csv new file mode 100644 index 000000000..75fd088ac --- /dev/null +++ b/evaluation/ai-assistant/data/example_pii_dataset.csv @@ -0,0 +1,11 @@ +"text","entities" +"Dear Mr. James Wilson, your appointment at Springfield Clinic is confirmed for 04/12/2025. Please bring your insurance card (ID: INS-77234) and a photo ID. For questions, call 312-555-0198 or email james.wilson@outlook.com.","[{""text"": ""James Wilson"", ""entity_type"": ""PERSON"", ""start"": 9, ""end"": 21, ""score"": 1.0}, {""text"": ""04/12/2025"", ""entity_type"": ""DATE"", ""start"": 79, ""end"": 89, ""score"": 1.0}, {""text"": ""INS-77234"", ""entity_type"": ""ID"", ""start"": 129, ""end"": 138, ""score"": 1.0}, {""text"": ""312-555-0198"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 176, ""end"": 188, ""score"": 1.0}, {""text"": ""james.wilson@outlook.com"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 198, ""end"": 222, ""score"": 1.0}]" +"The account holder, Maria Garcia (DOB: 11/23/1990), reported unauthorized charges on her Visa card ending in 4829. Her address on file is 742 Elm Street, Austin, TX 78701. Case reference: CR-2025-08813.","[{""text"": ""Maria Garcia"", ""entity_type"": ""PERSON"", ""start"": 20, ""end"": 32, ""score"": 1.0}, {""text"": ""11/23/1990"", ""entity_type"": ""DATE_OF_BIRTH"", ""start"": 39, ""end"": 49, ""score"": 1.0}, {""text"": ""4829"", ""entity_type"": ""CREDIT_CARD"", ""start"": 109, ""end"": 113, ""score"": 1.0}, {""text"": ""742 Elm Street, Austin, TX 78701"", ""entity_type"": ""ADDRESS"", ""start"": 138, ""end"": 170, ""score"": 1.0}]" +"Hi, this is a message for David Chen at extension 4021. Your lab results from Quest Diagnostics (order #QD-998471) are ready. Please contact Dr. Patel at 617-555-0342 to discuss. Your patient ID is PT-228109.","[{""text"": ""David Chen"", ""entity_type"": ""PERSON"", ""start"": 26, ""end"": 36, ""score"": 1.0}, {""text"": ""QD-998471"", ""entity_type"": ""ID"", ""start"": 104, ""end"": 113, ""score"": 1.0}, {""text"": ""Dr. Patel"", ""entity_type"": ""PERSON"", ""start"": 141, ""end"": 150, ""score"": 1.0}, {""text"": ""617-555-0342"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 154, ""end"": 166, ""score"": 1.0}, {""text"": ""PT-228109"", ""entity_type"": ""ID"", ""start"": 198, ""end"": 207, ""score"": 1.0}]" +"Employee review for Sarah O'Brien (Employee #EMP-30042). Performance rating: Exceeds Expectations. Current salary: $94,500. Manager: Tom Richards. Next review date: 09/15/2025.","[{""text"": ""Sarah O'Brien"", ""entity_type"": ""PERSON"", ""start"": 20, ""end"": 33, ""score"": 1.0}, {""text"": ""EMP-30042"", ""entity_type"": ""ID"", ""start"": 45, ""end"": 54, ""score"": 1.0}, {""text"": ""$94,500"", ""entity_type"": ""FINANCIAL"", ""start"": 115, ""end"": 122, ""score"": 1.0}, {""text"": ""Tom Richards"", ""entity_type"": ""PERSON"", ""start"": 133, ""end"": 145, ""score"": 1.0}, {""text"": ""09/15/2025"", ""entity_type"": ""DATE"", ""start"": 165, ""end"": 175, ""score"": 1.0}]" +"Tenant lease agreement: Robert Kim, SSN 531-72-8846, is approved for unit 4B at 1500 Oak Avenue, Portland, OR 97205. Monthly rent: $1,850. Lease start: 03/01/2025. Emergency contact: Linda Kim, 503-555-0147.","[{""text"": ""Robert Kim"", ""entity_type"": ""PERSON"", ""start"": 24, ""end"": 34, ""score"": 1.0}, {""text"": ""531-72-8846"", ""entity_type"": ""US_SSN"", ""start"": 40, ""end"": 51, ""score"": 1.0}, {""text"": ""1500 Oak Avenue, Portland, OR 97205"", ""entity_type"": ""ADDRESS"", ""start"": 80, ""end"": 115, ""score"": 1.0}, {""text"": ""$1,850"", ""entity_type"": ""FINANCIAL"", ""start"": 131, ""end"": 137, ""score"": 1.0}, {""text"": ""Linda Kim"", ""entity_type"": ""PERSON"", ""start"": 183, ""end"": 192, ""score"": 1.0}, {""text"": ""503-555-0147"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 194, ""end"": 206, ""score"": 1.0}]" +"Shipping confirmation for order #ORD-44218. Recipient: Aisha Patel, 2300 Birch Lane, Denver, CO 80202. Tracking number: 1Z999AA10123456784. Delivery expected by 02/28/2025. Contact: aisha.p@gmail.com.","[{""text"": ""Aisha Patel"", ""entity_type"": ""PERSON"", ""start"": 55, ""end"": 66, ""score"": 1.0}, {""text"": ""2300 Birch Lane, Denver, CO 80202"", ""entity_type"": ""ADDRESS"", ""start"": 68, ""end"": 101, ""score"": 1.0}, {""text"": ""1Z999AA10123456784"", ""entity_type"": ""ID"", ""start"": 120, ""end"": 138, ""score"": 1.0}, {""text"": ""02/28/2025"", ""entity_type"": ""DATE"", ""start"": 161, ""end"": 171, ""score"": 1.0}, {""text"": ""aisha.p@gmail.com"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 182, ""end"": 199, ""score"": 1.0}]" +"Insurance claim filed by John Martinez (Policy #POL-55931-CA). Incident date: 01/15/2025. Vehicle: 2022 Honda Civic, VIN 1HGBH41JXMN109186. Damage estimate: $4,200. Adjuster assigned: Karen White, karen.white@insco.com.","[{""text"": ""John Martinez"", ""entity_type"": ""PERSON"", ""start"": 25, ""end"": 38, ""score"": 1.0}, {""text"": ""POL-55931-CA"", ""entity_type"": ""ID"", ""start"": 48, ""end"": 60, ""score"": 1.0}, {""text"": ""01/15/2025"", ""entity_type"": ""DATE"", ""start"": 78, ""end"": 88, ""score"": 1.0}, {""text"": ""1HGBH41JXMN109186"", ""entity_type"": ""ID"", ""start"": 121, ""end"": 138, ""score"": 1.0}, {""text"": ""$4,200"", ""entity_type"": ""FINANCIAL"", ""start"": 157, ""end"": 163, ""score"": 1.0}, {""text"": ""Karen White"", ""entity_type"": ""PERSON"", ""start"": 184, ""end"": 195, ""score"": 1.0}, {""text"": ""karen.white@insco.com"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 197, ""end"": 218, ""score"": 1.0}]" +"Meeting notes: Discussed project timeline with Emily Zhao (emily.zhao@techcorp.io). Budget approved: $250,000. Next milestone due 06/30/2025. Stakeholder contact: VP of Engineering, ext. 7784, building 3A.","[{""text"": ""Emily Zhao"", ""entity_type"": ""PERSON"", ""start"": 47, ""end"": 57, ""score"": 1.0}, {""text"": ""emily.zhao@techcorp.io"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 59, ""end"": 81, ""score"": 1.0}, {""text"": ""$250,000"", ""entity_type"": ""FINANCIAL"", ""start"": 101, ""end"": 109, ""score"": 1.0}, {""text"": ""06/30/2025"", ""entity_type"": ""DATE"", ""start"": 130, ""end"": 140, ""score"": 1.0}]" +"Support ticket #TK-81923: Customer Peter Novak (peter.novak@yahoo.de) reports login issues. Account created 08/10/2022. Last known IP: 84.175.22.61. Browser: Chrome 120. Phone on file: +49-170-555-8821.","[{""text"": ""Peter Novak"", ""entity_type"": ""PERSON"", ""start"": 35, ""end"": 46, ""score"": 1.0}, {""text"": ""peter.novak@yahoo.de"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 48, ""end"": 68, ""score"": 1.0}, {""text"": ""08/10/2022"", ""entity_type"": ""DATE"", ""start"": 108, ""end"": 118, ""score"": 1.0}, {""text"": ""84.175.22.61"", ""entity_type"": ""IP_ADDRESS"", ""start"": 135, ""end"": 147, ""score"": 1.0}, {""text"": ""+49-170-555-8821"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 185, ""end"": 201, ""score"": 1.0}]" +"Discharge summary for patient Lisa Thompson (MRN: MR-667210). Admitted: 12/20/2024, discharged: 12/27/2024. Primary diagnosis: pneumonia. Attending physician: Dr. Angela Morrison. Follow-up with pulmonology at 555-0299. Pharmacy: CVS #4481.","[{""text"": ""Lisa Thompson"", ""entity_type"": ""PERSON"", ""start"": 30, ""end"": 43, ""score"": 1.0}, {""text"": ""MR-667210"", ""entity_type"": ""ID"", ""start"": 50, ""end"": 59, ""score"": 1.0}, {""text"": ""12/20/2024"", ""entity_type"": ""DATE"", ""start"": 72, ""end"": 82, ""score"": 1.0}, {""text"": ""12/27/2024"", ""entity_type"": ""DATE"", ""start"": 96, ""end"": 106, ""score"": 1.0}, {""text"": ""Dr. Angela Morrison"", ""entity_type"": ""PERSON"", ""start"": 159, ""end"": 178, ""score"": 1.0}, {""text"": ""555-0299"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 210, ""end"": 218, ""score"": 1.0}]" diff --git a/evaluation/ai-assistant/src/app/components/EntityComparison.tsx b/evaluation/ai-assistant/src/app/components/EntityComparison.tsx index 28a9ff594..49f485c47 100644 --- a/evaluation/ai-assistant/src/app/components/EntityComparison.tsx +++ b/evaluation/ai-assistant/src/app/components/EntityComparison.tsx @@ -6,7 +6,7 @@ import { Input } from './ui/input'; import { Label } from './ui/label'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from './ui/select'; import { Collapsible, CollapsibleContent, CollapsibleTrigger } from './ui/collapsible'; -import { CheckCircle, XCircle, Edit, AlertTriangle, Check, X, ChevronDown, FileText } from 'lucide-react'; +import { CheckCircle, XCircle, Edit, Check, X, ChevronDown, FileText } from 'lucide-react'; import type { Entity } from '../types'; interface EntityComparisonProps { @@ -20,11 +20,11 @@ interface EntityComparisonProps { onAddManual: (recordId: string, entity: Entity) => void; } -type EntityStatus = 'match' | 'conflict' | 'presidio-only' | 'llm-only' | 'dataset-only' | 'pending'; +type EntityStatus = 'match' | 'presidio-only' | 'llm-only' | 'predefined-only' | 'pending'; interface AnnotatedEntity extends Entity { status: EntityStatus; - source: 'presidio' | 'llm' | 'both' | 'dataset'; + sources: ('presidio' | 'llm' | 'predefined')[]; confirmed?: boolean; } @@ -44,54 +44,77 @@ export function EntityComparison({ const [rejectedEntities, setRejectedEntities] = useState>(new Set()); const [expandedContexts, setExpandedContexts] = useState>(new Set()); - // Combine and classify entities + // Combine and classify entities from all three sources const annotatedEntities: AnnotatedEntity[] = []; - presidioEntities.forEach(pe => { - const matchingLlm = llmEntities.find( - le => le.text === pe.text && le.start === pe.start && le.end === pe.end - ); - - if (matchingLlm) { - if (matchingLlm.entity_type === pe.entity_type) { - annotatedEntities.push({ ...pe, status: 'match', source: 'both' }); - } else { - annotatedEntities.push({ ...pe, status: 'conflict', source: 'both' }); + // Two spans overlap if one starts before the other ends + const spansOverlap = (a: Entity, b: Entity) => + a.start < b.end && b.start < a.end; + + // Build a unified list: for each unique span, track which sources detected it + interface SpanEntry { entity: Entity; sources: Set<'presidio' | 'llm' | 'predefined'>; types: Map } + const spans: SpanEntry[] = []; + + const addToSpans = (entity: Entity, source: 'presidio' | 'llm' | 'predefined') => { + const existing = spans.find(s => spansOverlap(s.entity, entity)); + if (existing) { + existing.sources.add(source); + existing.types.set(source, entity.entity_type); + // Prefer the entity with more text or higher score + if (entity.text.length > existing.entity.text.length) { + existing.entity = { ...entity }; } } else { - annotatedEntities.push({ ...pe, status: 'presidio-only', source: 'presidio' }); + const types = new Map(); + types.set(source, entity.entity_type); + spans.push({ entity: { ...entity }, sources: new Set([source]), types }); } - }); + }; - llmEntities.forEach(le => { - const alreadyAdded = annotatedEntities.some( - ae => ae.text === le.text && ae.start === le.start && ae.end === le.end - ); - if (!alreadyAdded) { - annotatedEntities.push({ ...le, status: 'llm-only', source: 'llm' }); - } - }); + presidioEntities.forEach(e => addToSpans(e, 'presidio')); + datasetEntities.forEach(e => addToSpans(e, 'predefined')); + llmEntities.forEach(e => addToSpans(e, 'llm')); - datasetEntities.forEach(de => { - const alreadyAdded = annotatedEntities.some( - ae => ae.text === de.text && ae.start === de.start && ae.end === de.end - ); - if (!alreadyAdded) { - annotatedEntities.push({ ...de, status: 'dataset-only', source: 'dataset' }); + spans.forEach(({ entity, sources, types }) => { + const sourceList = Array.from(sources) as ('presidio' | 'llm' | 'predefined')[]; + const uniqueTypes = new Set(types.values()); + const allAgree = uniqueTypes.size === 1; + + if (sourceList.length >= 2 && allAgree) { + // All active sources agree on type → single "Match" card + annotatedEntities.push({ ...entity, status: 'match', sources: sourceList }); + } else if (sourceList.length >= 2 && !allAgree) { + // Sources disagree on type → separate card per source so user can confirm/reject each + for (const src of sourceList) { + const srcType = types.get(src) || entity.entity_type; + const status: EntityStatus = src === 'presidio' ? 'presidio-only' : src === 'llm' ? 'llm-only' : 'predefined-only'; + annotatedEntities.push({ ...entity, entity_type: srcType, status, sources: [src] }); + } + } else if (sourceList.length === 1) { + const s = sourceList[0]; + const status: EntityStatus = s === 'presidio' ? 'presidio-only' : s === 'llm' ? 'llm-only' : 'predefined-only'; + annotatedEntities.push({ ...entity, status, sources: sourceList }); + } else { + annotatedEntities.push({ ...entity, status: 'pending', sources: sourceList }); } }); - const getEntityKey = (entity: Entity) => `${entity.text}-${entity.start}-${entity.end}`; + const getEntityKey = (entity: AnnotatedEntity) => `${entity.text}-${entity.start}-${entity.end}-${entity.sources.join(',')}`; const getContextForEntity = (entity: Entity) => { const CONTEXT_CHARS = 150; - const start = Math.max(0, entity.start - CONTEXT_CHARS); - const end = Math.min(recordText.length, entity.end + CONTEXT_CHARS); - - const before = recordText.substring(start, entity.start); - const entityText = recordText.substring(entity.start, entity.end); - const after = recordText.substring(entity.end, end); - + // Use indexOf for robust highlighting regardless of position accuracy + const idx = recordText.indexOf(entity.text); + const entityStart = idx >= 0 ? idx : entity.start; + const entityEnd = idx >= 0 ? idx + entity.text.length : entity.end; + + const start = Math.max(0, entityStart - CONTEXT_CHARS); + const end = Math.min(recordText.length, entityEnd + CONTEXT_CHARS); + + const before = recordText.substring(start, entityStart); + const entityText = recordText.substring(entityStart, entityEnd); + const after = recordText.substring(entityEnd, end); + return { before: (start > 0 ? '...' : '') + before, entity: entityText, @@ -119,7 +142,7 @@ export function EntityComparison({ newSet.delete(key); return newSet; }); - onConfirm(recordId, entity, entity.source === 'both' ? 'presidio' : entity.source); + onConfirm(recordId, entity, entity.sources[0] === 'predefined' ? 'dataset' : entity.sources[0]); }; const handleRejectEntity = (entity: AnnotatedEntity) => { @@ -130,9 +153,7 @@ export function EntityComparison({ newSet.delete(key); return newSet; }); - if (entity.source !== 'both') { - onReject(recordId, entity, entity.source); - } + onReject(recordId, entity, entity.sources[0] === 'predefined' ? 'dataset' : entity.sources[0]); }; const handleAddManualEntity = () => { @@ -154,14 +175,12 @@ export function EntityComparison({ switch (status) { case 'match': return ✓ Match; - case 'conflict': - return ⚠ Conflict; case 'presidio-only': - return Presidio Only; + return Presidio; case 'llm-only': - return LLM Only; - case 'dataset-only': - return Dataset; + return LLM Judge; + case 'predefined-only': + return Predefined; default: return Pending; } @@ -239,7 +258,6 @@ export function EntityComparison({ className={`p-4 rounded-lg border ${ isConfirmed ? 'bg-green-50 border-green-300' : isRejected ? 'bg-red-50 border-red-300' : - entity.status === 'conflict' ? 'bg-amber-50 border-amber-300' : 'bg-white border-slate-200' }`} > @@ -254,12 +272,6 @@ export function EntityComparison({
Position: {entity.start}-{entity.end} {entity.score && Confidence: {(entity.score * 100).toFixed(0)}%} - {entity.status === 'conflict' && ( -
- - Type mismatch between Presidio and LLM -
- )}
diff --git a/evaluation/ai-assistant/src/app/pages/Anonymization.tsx b/evaluation/ai-assistant/src/app/pages/Anonymization.tsx index 12eda26a9..6300ea996 100644 --- a/evaluation/ai-assistant/src/app/pages/Anonymization.tsx +++ b/evaluation/ai-assistant/src/app/pages/Anonymization.tsx @@ -1,11 +1,29 @@ -import { useMemo } from 'react'; +import { useState, useEffect, useMemo, useCallback } from 'react'; import { useNavigate } from 'react-router'; import { Card } from '../components/ui/card'; import { Button } from '../components/ui/button'; +import { Label } from '../components/ui/label'; +import { Progress } from '../components/ui/progress'; import { Alert, AlertDescription } from '../components/ui/alert'; -import { ArrowRight, Shield, Sparkles, Database } from 'lucide-react'; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '../components/ui/select'; +import { ArrowRight, Shield, Sparkles, Database, CheckCircle, Loader2, AlertTriangle, Unplug } from 'lucide-react'; +import { api } from '../lib/api'; import type { SetupConfig } from '../types'; +type LlmStep = 'loading' | 'env_missing' | 'idle' | 'configuring' | 'configured' | 'running' | 'done' | 'error'; + +interface ModelChoice { + id: string; + label: string; + provider: string; +} + export function Anonymization() { const navigate = useNavigate(); @@ -20,16 +38,107 @@ export function Anonymization() { const hasDatasetEntities = setupConfig?.hasDatasetEntities ?? false; + // LLM Judge state + const [llmStep, setLlmStep] = useState('loading'); + const [models, setModels] = useState([]); + const [selectedModel, setSelectedModel] = useState('gpt-5.4'); + const [llmProgress, setLlmProgress] = useState(0); + const [llmTotal, setLlmTotal] = useState(0); + const [llmError, setLlmError] = useState(null); + + // Load models + env status on mount + useEffect(() => { + Promise.all([api.llm.models(), api.llm.settings(), api.llm.status()]).then( + ([modelList, settings, status]) => { + setModels(modelList); + setSelectedModel(settings.deployment_name || 'gpt-4o'); + + if (status.running) { + setLlmStep('running'); + setLlmProgress(status.progress); + setLlmTotal(status.total); + } else if (settings.configured) { + setLlmStep('configured'); + } else if (!settings.env_ready) { + setLlmStep('env_missing'); + } else { + setLlmStep('idle'); + } + }, + ).catch(() => setLlmStep('env_missing')); + }, []); + + // Poll progress while running + useEffect(() => { + if (llmStep !== 'running') return; + const interval = setInterval(async () => { + try { + const s = await api.llm.status(); + setLlmProgress(s.progress); + setLlmTotal(s.total); + if (s.error) { + setLlmError(s.error); + setLlmStep('error'); + } else if (!s.running && s.progress >= s.total && s.total > 0) { + setLlmStep('done'); + } + } catch { + // keep polling + } + }, 1000); + return () => clearInterval(interval); + }, [llmStep]); + + const handleConfigure = useCallback(async () => { + setLlmStep('configuring'); + setLlmError(null); + try { + await api.llm.configure(selectedModel); + setLlmStep('configured'); + } catch (err: any) { + setLlmError(err.message ?? 'Configuration failed'); + setLlmStep('error'); + } + }, [selectedModel]); + + const handleRunAnalysis = useCallback(async () => { + setLlmError(null); + try { + const res = await api.llm.analyze(); + setLlmTotal(res.total); + setLlmProgress(0); + setLlmStep('running'); + } catch (err: any) { + setLlmError(err.message ?? 'Failed to start analysis'); + // Stay in configured state so user can retry + setLlmStep('configured'); + } + }, []); + + const handleDisconnect = useCallback(async () => { + try { + await api.llm.disconnect(); + setLlmStep('idle'); + setLlmError(null); + setLlmProgress(0); + setLlmTotal(0); + } catch (err: any) { + setLlmError(err.message ?? 'Failed to disconnect'); + } + }, []); + const handleContinue = () => { navigate('/human-review'); }; + const progressPct = llmTotal > 0 ? Math.round((llmProgress / llmTotal) * 100) : 0; + return (

PII Detection Analysis

- Automated PII detection engines will run here once implemented. + Configure and run PII detection engines. The LLM Judge uses Azure OpenAI via LangExtract to identify entities.

@@ -48,9 +157,9 @@ export function Anonymization() { )} - {/* Side-by-Side Cards — greyed out / coming soon */} + {/* Side-by-Side Cards */}
- {/* Presidio Processing — not implemented */} + {/* Presidio Processing — coming soon */}
@@ -63,30 +172,159 @@ export function Anonymization() {
Coming soon
-

Run Presidio's rule-based and NLP detection to identify PII entities with precise character spans and confidence scores.

- {/* LLM Processing — not implemented */} - -
+ {/* LLM Judge — active */} + +
- +
-

LLM Judge

-

AI-assisted entity detection

+

LLM Judge

+

Azure OpenAI via LangExtract

- Coming soon + {llmStep === 'done' && ( + + Complete + + )} + {llmStep === 'configured' && ( + + Ready + + )}
-

- Use an LLM to suggest additional PII entities and validate detections. Results will be combined with Presidio output for human review. -

+ {/* Step: loading */} + {llmStep === 'loading' && ( +
+ Loading configuration… +
+ )} + + {/* Step: .env not configured */} + {llmStep === 'env_missing' && ( + + + +

+ Azure OpenAI endpoint is required.{' '} + Add it to the backend/.env file: +

+
+{`PRESIDIO_EVAL_AZURE_ENDPOINT=https://your-resource.openai.azure.com
+
+# Option A — API key auth:
+PRESIDIO_EVAL_AZURE_API_KEY=your-api-key-here
+
+# Option B — leave API key empty to use
+# DefaultAzureCredential (managed identity / az login)`}
+                  
+

+ Then restart the backend server. See .env.example for reference. +

+
+
+ )} + + {/* Step: idle / error — choose deployment and connect */} + {(llmStep === 'idle' || llmStep === 'error' || llmStep === 'configuring') && ( +
+
+ + +
+ + {llmError && ( + + + {llmError} + + )} + + +
+ )} + + {/* Step: configured — ready to run */} + {llmStep === 'configured' && ( +
+

+ Connected with {selectedModel}. +

+ {llmError && ( + + + {llmError} + + )} +
+ + +
+
+ )} + + {/* Step: running */} + {llmStep === 'running' && ( +
+
+ + + Analysing records… + + {llmProgress} / {llmTotal} +
+ +
+ )} + + {/* Step: complete */} + {llmStep === 'done' && ( +
+

+ LLM analysis complete — {llmTotal} records processed with {selectedModel}. +

+ +
+ )}
diff --git a/evaluation/ai-assistant/src/app/pages/HumanReview.tsx b/evaluation/ai-assistant/src/app/pages/HumanReview.tsx index 9e9da31fc..f7c1329db 100644 --- a/evaluation/ai-assistant/src/app/pages/HumanReview.tsx +++ b/evaluation/ai-assistant/src/app/pages/HumanReview.tsx @@ -1,15 +1,30 @@ -import { useState, useMemo } from 'react'; +import { useEffect, useState, useMemo } from 'react'; import { useNavigate } from 'react-router'; import { Button } from '../components/ui/button'; import { Progress } from '../components/ui/progress'; import { Alert, AlertDescription } from '../components/ui/alert'; -import { ArrowRight, Users, CheckCircle, ChevronLeft, ChevronRight, CheckCheck } from 'lucide-react'; +import { ArrowRight, Users, CheckCircle, ChevronLeft, ChevronRight, CheckCheck, Loader2 } from 'lucide-react'; import { EntityComparison } from '../components/EntityComparison'; -import { mockRecords } from '../lib/mockData'; -import type { Entity, SetupConfig } from '../types'; +import { api } from '../lib/api'; +import type { Entity, Record as RecordType, SetupConfig } from '../types'; + +/** Map backend snake_case record to frontend camelCase Record. */ +function toFrontendRecord(raw: any): RecordType { + return { + id: raw.id, + text: raw.text, + presidioEntities: raw.presidio_entities ?? raw.presidioEntities ?? [], + llmEntities: raw.llm_entities ?? raw.llmEntities ?? [], + datasetEntities: raw.dataset_entities ?? raw.datasetEntities ?? [], + goldenEntities: raw.golden_entities ?? raw.goldenEntities ?? undefined, + }; +} export function HumanReview() { const navigate = useNavigate(); + const [records, setRecords] = useState([]); + const [loading, setLoading] = useState(true); + const [loadError, setLoadError] = useState(null); const [currentRecordIndex, setCurrentRecordIndex] = useState(0); const [reviewedRecords, setReviewedRecords] = useState>(new Set()); const [goldenSet, setGoldenSet] = useState>({}); @@ -25,9 +40,39 @@ export function HumanReview() { const hasDatasetEntities = setupConfig?.hasDatasetEntities ?? false; - const currentRecord = mockRecords[currentRecordIndex]; - const totalRecords = mockRecords.length; - const reviewProgress = (reviewedRecords.size / totalRecords) * 100; + // Fetch sampled records + LLM results on mount + useEffect(() => { + async function loadRecords() { + try { + setLoading(true); + const [rawRecords, llmResults] = await Promise.all([ + api.sampling.records(), + api.llm.results(), + ]); + + const merged = rawRecords.map((raw: any) => { + const rec = toFrontendRecord(raw); + // Merge in LLM entities from analysis results + const llmEntities = llmResults[rec.id]; + if (llmEntities) { + rec.llmEntities = llmEntities; + } + return rec; + }); + + setRecords(merged); + } catch (err: any) { + setLoadError(err.message || 'Failed to load records'); + } finally { + setLoading(false); + } + } + loadRecords(); + }, []); + + const currentRecord = records[currentRecordIndex] ?? null; + const totalRecords = records.length; + const reviewProgress = totalRecords > 0 ? (reviewedRecords.size / totalRecords) * 100 : 0; const handleConfirm = (recordId: string, entity: Entity, _source: string) => { setGoldenSet(prev => ({ @@ -41,7 +86,7 @@ export function HumanReview() { setGoldenSet(prev => ({ ...prev, [recordId]: (prev[recordId] || []).filter(e => - e.text !== entity.text || e.start !== entity.start || e.end !== entity.end + !(e.start < entity.end && entity.start < e.end) ), })); setReviewedRecords(new Set([...reviewedRecords, recordId])); @@ -72,11 +117,10 @@ export function HumanReview() { }; const handleAutoConfirmAll = () => { - // Auto-confirm all entities from all sources for all records const allReviewed = new Set(); const autoGolden: Record = {}; - mockRecords.forEach(record => { + records.forEach(record => { allReviewed.add(record.id); const entities: Entity[] = []; const seen = new Set(); @@ -91,9 +135,7 @@ export function HumanReview() { record.presidioEntities.forEach(addUnique); record.llmEntities.forEach(addUnique); - if ('datasetEntities' in record) { - (record as any).datasetEntities?.forEach(addUnique); - } + record.datasetEntities?.forEach(addUnique); autoGolden[record.id] = entities; }); @@ -102,8 +144,39 @@ export function HumanReview() { setGoldenSet(autoGolden); }; - const isReviewed = reviewedRecords.has(currentRecord.id); - const canContinue = reviewedRecords.size === totalRecords; + const isReviewed = currentRecord ? reviewedRecords.has(currentRecord.id) : false; + const canContinue = totalRecords > 0 && reviewedRecords.size === totalRecords; + + if (loading) { + return ( +
+ + Loading records… +
+ ); + } + + if (loadError) { + return ( +
+ + {loadError} + +
+ ); + } + + if (!currentRecord) { + return ( +
+ + + No sampled records found. Go back to Sampling to select a sample first. + + +
+ ); + } return (
@@ -183,7 +256,7 @@ export function HumanReview() { recordText={currentRecord.text} presidioEntities={currentRecord.presidioEntities} llmEntities={currentRecord.llmEntities} - datasetEntities={'datasetEntities' in currentRecord ? (currentRecord as any).datasetEntities : []} + datasetEntities={currentRecord.datasetEntities ?? []} onConfirm={handleConfirm} onReject={handleReject} onAddManual={handleAddManual} @@ -195,19 +268,19 @@ export function HumanReview() {
- ✓ Match - Both systems agree + ✓ Match (all sources agree)
-
- ⚠ Conflict - Type mismatch +
+ Presidio
-
- Presidio only detection +
+ Predefined
- LLM only detection + LLM Judge
diff --git a/evaluation/ai-assistant/src/app/pages/Setup.tsx b/evaluation/ai-assistant/src/app/pages/Setup.tsx index c752cc387..643542d51 100644 --- a/evaluation/ai-assistant/src/app/pages/Setup.tsx +++ b/evaluation/ai-assistant/src/app/pages/Setup.tsx @@ -1,4 +1,4 @@ -import { useState } from 'react'; +import { useEffect, useState } from 'react'; import { useNavigate } from 'react-router'; import { Card } from '../components/ui/card'; import { Button } from '../components/ui/button'; @@ -7,27 +7,19 @@ import { Input } from '../components/ui/input'; import { Checkbox } from '../components/ui/checkbox'; import { Alert, AlertDescription } from '../components/ui/alert'; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '../components/ui/select'; -import { Database, Shield, ArrowRight, Cloud, FileText, CheckCircle, Loader2, X, Plus } from 'lucide-react'; +import { Database, Shield, ArrowRight, Cloud, FileText, CheckCircle, Loader2, X, Plus, Pencil, Trash2 } from 'lucide-react'; import { api } from '../lib/api'; import type { ComplianceFramework, UploadedDataset } from '../types'; export function Setup() { const navigate = useNavigate(); - const [datasets, setDatasets] = useState([ - // Seed with the example dataset - { - id: 'ds-001', - filename: 'Example - Patient Records', - format: 'csv', - record_count: 1500, - has_entities: false, - columns: ['text'], - }, - ]); + const [datasets, setDatasets] = useState([]); const [selectedDatasetId, setSelectedDatasetId] = useState(''); const [showAddForm, setShowAddForm] = useState(false); const [filePath, setFilePath] = useState(''); const [fileFormat, setFileFormat] = useState<'csv' | 'json'>('csv'); + const [datasetName, setDatasetName] = useState(''); + const [datasetDescription, setDatasetDescription] = useState(''); const [textColumn, setTextColumn] = useState('text'); const [entitiesColumn, setEntitiesColumn] = useState(''); const [loading, setLoading] = useState(false); @@ -37,6 +29,15 @@ export function Setup() { const [cloudRestriction, setCloudRestriction] = useState<'allowed' | 'restricted'>('allowed'); const [runPresidio, setRunPresidio] = useState(true); const [runLlm, setRunLlm] = useState(true); + const [editingName, setEditingName] = useState(false); + const [editNameValue, setEditNameValue] = useState(''); + + // Fetch saved datasets on mount + useEffect(() => { + api.datasets.saved().then(saved => { + if (saved.length > 0) setDatasets(saved); + }).catch(() => {}); + }, []); const selectedDataset = datasets.find(d => d.id === selectedDatasetId) ?? null; const canProceed = selectedDataset !== null; @@ -49,14 +50,12 @@ export function Setup() { setSelectedDatasetId(value); setShowAddForm(false); setPreviewRecords([]); + setEditingName(false); - // Fetch preview for loaded datasets (skip for the seed example) - if (value.startsWith('upload-')) { - try { - const preview = await api.datasets.preview(value); - setPreviewRecords(preview); - } catch { /* ignore */ } - } + try { + const preview = await api.datasets.preview(value); + setPreviewRecords(preview); + } catch { /* ignore */ } }; const handleLoadDataset = async () => { @@ -73,6 +72,8 @@ export function Setup() { format: fileFormat, text_column: textColumn.trim() || 'text', entities_column: entitiesColumn.trim() || undefined, + name: datasetName.trim() || undefined, + description: datasetDescription.trim() || undefined, }); setDatasets(prev => [...prev, dataset]); @@ -84,6 +85,8 @@ export function Setup() { // Reset form fields setFilePath(''); + setDatasetName(''); + setDatasetDescription(''); setTextColumn('text'); setEntitiesColumn(''); } catch (err: any) { @@ -136,7 +139,7 @@ export function Setup() { {datasets.map(ds => ( - {ds.filename} — {ds.record_count.toLocaleString()} records + {ds.name} — {ds.record_count.toLocaleString()} records ))} @@ -160,6 +163,28 @@ export function Setup() {
+
+ + setDatasetName(e.target.value)} + className="mt-1 text-sm" + /> +
+ +
+ + setDatasetDescription(e.target.value)} + className="mt-1 text-sm" + /> +
+
-
-
{selectedDataset.filename}
+
+
+ {editingName ? ( +
{ + e.preventDefault(); + if (!editNameValue.trim()) return; + try { + const updated = await api.datasets.rename(selectedDataset.id, editNameValue.trim()); + setDatasets(prev => prev.map(d => d.id === updated.id ? updated : d)); + setEditingName(false); + } catch { /* ignore */ } + }} + > + setEditNameValue(e.target.value)} + className="h-7 text-sm w-56" + autoFocus + /> + + +
+ ) : ( + <> + {selectedDataset.name} + + + + )} +
+ {selectedDataset.description && ( +
{selectedDataset.description}
+ )}
{selectedDataset.record_count.toLocaleString()} records • {selectedDataset.format.toUpperCase()} format
+
{selectedDataset.path}
Columns: {selectedDataset.columns.join(', ')}
{selectedDataset.has_entities ? ( diff --git a/evaluation/ai-assistant/src/app/types.ts b/evaluation/ai-assistant/src/app/types.ts index dc8fd01cd..38c0baecb 100644 --- a/evaluation/ai-assistant/src/app/types.ts +++ b/evaluation/ai-assistant/src/app/types.ts @@ -31,10 +31,15 @@ export interface Record { export interface UploadedDataset { id: string; filename: string; + name: string; + description: string; + path: string; format: 'csv' | 'json'; record_count: number; has_entities: boolean; columns: string[]; + text_column: string; + entities_column?: string | null; } export interface SetupConfig {