diff --git a/evaluation/ai-assistant/.gitignore b/evaluation/ai-assistant/.gitignore
index 82aae09a9..8ee1b3847 100644
--- a/evaluation/ai-assistant/.gitignore
+++ b/evaluation/ai-assistant/.gitignore
@@ -18,6 +18,7 @@ Thumbs.db
 backend/.venv/
 backend/__pycache__/
 backend/**/__pycache__/
+
 *.pyc
 *.pyo
 
diff --git a/evaluation/ai-assistant/backend/datasets.json b/evaluation/ai-assistant/backend/datasets.json
new file mode 100644
index 000000000..220be7885
--- /dev/null
+++ b/evaluation/ai-assistant/backend/datasets.json
@@ -0,0 +1,34 @@
+[
+  {
+    "id": "example-dataset",
+    "filename": "example_pii_dataset.csv",
+    "name": "Example Dataset",
+    "description": "10 synthetic PII records spanning healthcare, finance, HR, and customer support scenarios.",
+    "path": "data/example_pii_dataset.csv",
+    "format": "csv",
+    "record_count": 10,
+    "has_entities": true,
+    "columns": [
+      "text",
+      "entities"
+    ],
+    "text_column": "text",
+    "entities_column": "entities"
+  },
+  {
+    "id": "upload-6ff31019",
+    "filename": "sample_medical_records.csv",
+    "name": "manual-dataset",
+    "description": "my custom entiteis",
+    "path": "/Users/ronshakutai/projects_folder/presidio/evaluation/ai-assistant/data/sample_medical_records.csv",
+    "format": "csv",
+    "record_count": 10,
+    "has_entities": true,
+    "columns": [
+      "text",
+      "entities"
+    ],
+    "text_column": "text",
+    "entities_column": "entities"
+  }
+]
\ No newline at end of file
diff --git a/evaluation/ai-assistant/backend/llm_service.py b/evaluation/ai-assistant/backend/llm_service.py
new file mode 100644
index 000000000..ec05981cd
--- /dev/null
+++ b/evaluation/ai-assistant/backend/llm_service.py
@@ -0,0 +1,97 @@
+"""LLM Judge service using Azure OpenAI via LangExtract."""
+
+from __future__ import annotations
+
+import logging
+from typing import Optional
+
+from models import Entity
+
+logger = logging.getLogger(__name__)
+
+# Lazy-loaded recognizer singleton
+_recognizer = None
+
+
+class LLMServiceError(Exception):
+    """Raised when LLM service encounters an error."""
+
+
+def configure(
+    azure_endpoint: str,
+    api_key: Optional[str] = None,
+    deployment_name: str = "gpt-4o",
+    api_version: str = "2024-02-15-preview",
+) -> dict:
+    """Initialise the Azure OpenAI LangExtract recognizer.
+
+    :param azure_endpoint: Azure OpenAI endpoint URL.
+    :param api_key: API key (or None for managed identity).
+    :param deployment_name: Azure deployment / model name.
+    :param api_version: Azure OpenAI API version.
+    :returns: Status dict.
+    """
+    global _recognizer
+
+    try:
+        from presidio_analyzer.predefined_recognizers.third_party.azure_openai_langextract_recognizer import (  # noqa: E501
+            AzureOpenAILangExtractRecognizer,
+        )
+    except ImportError as exc:
+        raise LLMServiceError(
+            "langextract or presidio-analyzer is not installed. "
+            "Run: pip install langextract presidio-analyzer"
+        ) from exc
+
+    try:
+        _recognizer = AzureOpenAILangExtractRecognizer(
+            model_id=deployment_name,
+            azure_endpoint=azure_endpoint,
+            api_key=api_key,
+            api_version=api_version,
+        )
+    except Exception as exc:
+        _recognizer = None
+        raise LLMServiceError(f"Failed to initialise recognizer: {exc}") from exc
+
+    logger.info(
+        "LLM Judge configured: endpoint=%s deployment=%s",
+        azure_endpoint,
+        deployment_name,
+    )
+    return {"status": "configured", "deployment": deployment_name}
+
+
+def is_configured() -> bool:
+    """Return True if the recognizer has been initialised."""
+    return _recognizer is not None
+
+
+def disconnect() -> None:
+    """Reset the recognizer so a new model can be configured."""
+    global _recognizer
+    _recognizer = None
+    logger.info("LLM Judge disconnected")
+
+
+def analyze_text(text: str) -> list[Entity]:
+    """Run the LLM recognizer on a single text and return Entity objects."""
+    if _recognizer is None:
+        raise LLMServiceError(
+            "LLM service not configured. Call /api/llm/configure first."
+        )
+
+    results = _recognizer.analyze(text=text, entities=None)
+
+    entities: list[Entity] = []
+    for r in results:
+        entities.append(
+            Entity(
+                text=text[r.start:r.end],
+                entity_type=r.entity_type,
+                start=r.start,
+                end=r.end,
+                score=round(r.score, 4),
+            )
+        )
+    return entities
diff --git a/evaluation/ai-assistant/backend/main.py b/evaluation/ai-assistant/backend/main.py
index 191cb0d2e..1b3450fc0 100644
--- a/evaluation/ai-assistant/backend/main.py
+++ b/evaluation/ai-assistant/backend/main.py
@@ -1,6 +1,13 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from routers import analysis, datasets, decision, evaluation, review, sampling, upload
+from routers import (
+    decision,
+    evaluation,
+    llm,
+    review,
+    sampling,
+    upload,
+)
 
 app = FastAPI(title="Presidio Evaluation Flow API", version="0.1.0")
 
@@ -11,13 +18,12 @@
     allow_headers=["*"],
 )
 
-app.include_router(datasets.router)
 app.include_router(upload.router)
 app.include_router(sampling.router)
-app.include_router(analysis.router)
 app.include_router(review.router)
 app.include_router(evaluation.router)
 app.include_router(decision.router)
+app.include_router(llm.router)
 
 
 @app.get("/api/health")
diff --git a/evaluation/ai-assistant/backend/mock_data.py b/evaluation/ai-assistant/backend/mock_data.py
index 2c327428f..d5affc7e0 100644
--- a/evaluation/ai-assistant/backend/mock_data.py
+++ b/evaluation/ai-assistant/backend/mock_data.py
@@ -1,279 +1,16 @@
-"""Mock data mirroring the frontend for demo / development purposes."""
+"""Mock data for evaluation / decision stages only."""
 
 from datetime import datetime
 
 from models import (
-    Dataset,
-    DatasetType,
     Entity,
     EntityMiss,
     EvaluationMetrics,
     EvaluationRun,
     MissType,
-    Record,
     RiskLevel,
 )
 
-DATASETS: list[Dataset] = [
-    Dataset(
-        id="ds-001",
-        name="Patient Records 2025",
-        type=DatasetType.customer,
-        record_count=15000,
-        description="Electronic health records from Q4 2025",
-    ),
-    Dataset(
-        id="ds-002",
-        name="Customer Support Tickets",
-        type=DatasetType.customer,
-        record_count=8500,
-        description="Support conversations with PII",
-    ),
-    Dataset(
-        id="ds-003",
-        name="Internal HR Data",
-        type=DatasetType.internal,
-        record_count=2300,
-        description="Employee records and performance reviews",
-    ),
-    Dataset(
-        id="ds-004",
-        name="Financial Transaction Logs",
-        type=DatasetType.customer,
-        record_count=42000,
-        description="Payment and billing information",
-    ),
-]
-
-RECORDS: list[Record] = [
-    Record(
-        id="rec-001",
-        text=(
-            "Patient John Smith, DOB 03/15/1985, was admitted on 2025-01-10. "
-            "Contact: john.smith@email.com, Phone: 555-0123. SSN: 123-45-6789."
-        ),
-        presidio_entities=[
-            Entity(
-                text="John Smith", entity_type="PERSON", start=8, end=18, score=0.95
-            ),
-            Entity(
-                text="03/15/1985",
-                entity_type="DATE_OF_BIRTH",
-                start=24,
-                end=34,
-                score=0.92,
-            ),
-            Entity(
-                text="john.smith@email.com",
-                entity_type="EMAIL",
-                start=77,
-                end=97,
-                score=0.98,
-            ),
-            Entity(
-                text="555-0123",
-                entity_type="PHONE_NUMBER",
-                start=106,
-                end=114,
-                score=0.89,
-            ),
-            Entity(
-                text="123-45-6789", entity_type="US_SSN", start=121, end=132, score=0.99
-            ),
-        ],
-        llm_entities=[
-            Entity(
-                text="John Smith", entity_type="PERSON", start=8, end=18, score=0.96
-            ),
-            Entity(
-                text="03/15/1985",
-                entity_type="DATE_OF_BIRTH",
-                start=24,
-                end=34,
-                score=0.94,
-            ),
-            Entity(text="2025-01-10", entity_type="DATE", start=52, end=62, score=0.88),
-            Entity(
-                text="john.smith@email.com",
-                entity_type="EMAIL",
-                start=77,
-                end=97,
-                score=0.97,
-            ),
-            Entity(
-                text="555-0123",
-                entity_type="PHONE_NUMBER",
-                start=106,
-                end=114,
-                score=0.91,
-            ),
-            Entity(
-                text="123-45-6789", entity_type="US_SSN", start=121, end=132, score=0.98
-            ),
-        ],
-    ),
-    Record(
-        id="rec-002",
-        text=(
-            "Dr. Sarah Johnson reviewed the case. "
-            "Medical Record #MR-445521. Insurance Policy: POL-8821-USA."
-        ),
-        presidio_entities=[
-            Entity(
-                text="Sarah Johnson", entity_type="PERSON", start=4, end=17, score=0.93
-            ),
-            Entity(
-                text="MR-445521",
-                entity_type="MEDICAL_RECORD",
-                start=55,
-                end=64,
-                score=0.87,
-            ),
-        ],
-        llm_entities=[
-            Entity(
-                text="Dr. Sarah Johnson",
-                entity_type="PERSON",
-                start=0,
-                end=17,
-                score=0.95,
-            ),
-            Entity(
-                text="MR-445521",
-                entity_type="MEDICAL_RECORD",
-                start=55,
-                end=64,
-                score=0.89,
-            ),
-            Entity(
-                text="POL-8821-USA",
-                entity_type="INSURANCE_POLICY",
-                start=84,
-                end=96,
-                score=0.82,
-            ),
-        ],
-    ),
-    Record(
-        id="rec-003",
-        text=(
-            "Employee ID: EMP-8821, Jane Doe, started 2023-06-01. "
-            "Salary: $85,000. Emergency contact: Mike Doe at 555-9876."
-        ),
-        presidio_entities=[
-            Entity(
-                text="EMP-8821", entity_type="EMPLOYEE_ID", start=13, end=21, score=0.91
-            ),
-            Entity(text="Jane Doe", entity_type="PERSON", start=23, end=31, score=0.94),
-            Entity(text="2023-06-01", entity_type="DATE", start=41, end=51, score=0.96),
-            Entity(text="Mike Doe", entity_type="PERSON", start=89, end=97, score=0.92),
-            Entity(
-                text="555-9876",
-                entity_type="PHONE_NUMBER",
-                start=101,
-                end=109,
-                score=0.88,
-            ),
-        ],
-        llm_entities=[
-            Entity(
-                text="EMP-8821", entity_type="EMPLOYEE_ID", start=13, end=21, score=0.90
-            ),
-            Entity(text="Jane Doe", entity_type="PERSON", start=23, end=31, score=0.96),
-            Entity(text="2023-06-01", entity_type="DATE", start=41, end=51, score=0.94),
-            Entity(text="$85,000", entity_type="SALARY", start=61, end=68, score=0.79),
-            Entity(text="Mike Doe", entity_type="PERSON", start=89, end=97, score=0.93),
-            Entity(
-                text="555-9876",
-                entity_type="PHONE_NUMBER",
-                start=101,
-                end=109,
-                score=0.90,
-            ),
-        ],
-    ),
-    Record(
-        id="rec-004",
-        text=(
-            "Credit card ending in 4532 was used for transaction. "
-            "Customer: alice.wong@company.com. IP: 192.168.1.100"
-        ),
-        presidio_entities=[
-            Entity(
-                text="4532", entity_type="CREDIT_CARD", start=22, end=26, score=0.65
-            ),
-            Entity(
-                text="alice.wong@company.com",
-                entity_type="EMAIL",
-                start=64,
-                end=86,
-                score=0.97,
-            ),
-            Entity(
-                text="192.168.1.100",
-                entity_type="IP_ADDRESS",
-                start=92,
-                end=105,
-                score=0.99,
-            ),
-        ],
-        llm_entities=[
-            Entity(
-                text="alice.wong@company.com",
-                entity_type="EMAIL",
-                start=64,
-                end=86,
-                score=0.98,
-            ),
-            Entity(
-                text="192.168.1.100",
-                entity_type="IP_ADDRESS",
-                start=92,
-                end=105,
-                score=0.97,
-            ),
-        ],
-    ),
-    Record(
-        id="rec-005",
-        text=(
-            "Prescription for Robert Chen: Medication ABC-123, dosage 50mg. "
-            "Doctor notes indicate history of diabetes."
-        ),
-        presidio_entities=[
-            Entity(
-                text="Robert Chen", entity_type="PERSON", start=17, end=28, score=0.94
-            ),
-            Entity(
-                text="ABC-123",
-                entity_type="MEDICATION_CODE",
-                start=41,
-                end=48,
-                score=0.71,
-            ),
-        ],
-        llm_entities=[
-            Entity(
-                text="Robert Chen", entity_type="PERSON", start=17, end=28, score=0.95
-            ),
-            Entity(
-                text="ABC-123",
-                entity_type="MEDICATION_CODE",
-                start=41,
-                end=48,
-                score=0.73,
-            ),
-            Entity(
-                text="diabetes",
-                entity_type="MEDICAL_CONDITION",
-                start=97,
-                end=105,
-                score=0.86,
-            ),
-        ],
-    ),
-]
-
 EVALUATION_RUNS: list[EvaluationRun] = [
     EvaluationRun(
         id="run-001",
diff --git a/evaluation/ai-assistant/backend/models.py b/evaluation/ai-assistant/backend/models.py
index cd8f2a140..f2074c208 100644
--- a/evaluation/ai-assistant/backend/models.py
+++ b/evaluation/ai-assistant/backend/models.py
@@ -112,6 +112,8 @@ class DatasetLoadRequest(BaseModel):
     format: str  # "csv" | "json"
     text_column: str = "text"
     entities_column: str | None = None
+    name: str | None = None  # user-friendly display name
+    description: str | None = None  # optional description
 
 
 class UploadedDataset(BaseModel):
@@ -119,10 +121,21 @@ class UploadedDataset(BaseModel):
 
     id: str
     filename: str
+    name: str  # user-friendly display name
+    description: str = ""  # optional user-provided description
+    path: str  # absolute file path
     format: str  # "csv" | "json"
     record_count: int
     has_entities: bool
     columns: list[str]
+    text_column: str = "text"
+    entities_column: str | None = None
+
+
+class DatasetRenameRequest(BaseModel):
+    """Request to rename a saved dataset."""
+
+    name: str
 
 
 class SetupConfig(BaseModel):
@@ -182,3 +195,9 @@ class DecisionRequest(BaseModel):
     decision: DecisionType
     notes: str = ""
     selected_improvements: list[str] = []
+
+
+class LLMConfig(BaseModel):
+    """LLM Judge configuration — only deployment is chosen in the UI."""
+
+    deployment_name: str = "gpt-4o"
diff --git a/evaluation/ai-assistant/backend/pyproject.toml b/evaluation/ai-assistant/backend/pyproject.toml
index f0bb6035d..585b090be 100644
--- a/evaluation/ai-assistant/backend/pyproject.toml
+++ b/evaluation/ai-assistant/backend/pyproject.toml
@@ -5,13 +5,17 @@ description = "Backend API for Presidio Evaluation Flow"
 package-mode = false
 
 [tool.poetry.dependencies]
-python = "^3.9"
+python = ">=3.10,<3.14"
 fastapi = ">=0.115.0"
 uvicorn = { version = ">=0.32.0", extras = ["standard"] }
 pydantic = ">=2.0.0"
 python-multipart = ">=0.0.9"
 pandas = ">=2.0.0"
 scikit-learn = ">=1.3.0"
+langextract = ">=0.1.0"
+openai = ">=1.0.0"
+presidio-analyzer = { path = "../../../presidio-analyzer", develop = true }
+azure-identity = ">=1.15.0"
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/evaluation/ai-assistant/backend/routers/EntityComparison.tsx b/evaluation/ai-assistant/backend/routers/EntityComparison.tsx
new file mode 100644
index 000000000..7e1fa3f58
--- /dev/null
+++ b/evaluation/ai-assistant/backend/routers/EntityComparison.tsx
@@ -0,0 +1,372 @@
+import { useState } from 'react';
+import { Card } from './ui/card';
+import { Button } from './ui/button';
+import { Badge } from './ui/badge';
+import { Input } from './ui/input';
+import { Label } from './ui/label';
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from './ui/select';
+import { Collapsible, CollapsibleContent, CollapsibleTrigger } from './ui/collapsible';
+import { CheckCircle, XCircle, Edit, AlertTriangle, Check, X, ChevronDown, FileText } from 'lucide-react';
+import type { Entity } from '../types';
+
+interface EntityComparisonProps {
+  recordId: string;
+  recordText: string;
+  presidioEntities: Entity[];
+  llmEntities: Entity[];
+  datasetEntities?: Entity[];
+  onConfirm: (recordId: string, entity: Entity, source: 'presidio' | 'llm' | 'manual' | 'dataset') => void;
+  onReject: (recordId: string, entity: Entity, source: 'presidio' | 'llm' | 'dataset') => void;
+  onAddManual: (recordId: string, entity: Entity) => void;
+}
+
+type EntityStatus = 'match' | 'conflict' | 'presidio-only' | 'llm-only' | 'predefined-only'
+  | 'presidio+predefined' | 'presidio+llm' | 'predefined+llm' | 'pending';
+
+interface AnnotatedEntity extends Entity {
+  status: EntityStatus;
+  sources: ('presidio' | 'llm' | 'predefined')[];
+  confirmed?: boolean;
+}
+
+export function EntityComparison({
+  recordId,
+  recordText,
+  presidioEntities = [],
+  llmEntities = [],
+  datasetEntities = [],
+  onConfirm,
+  onReject,
+  onAddManual,
+}: EntityComparisonProps) {
+  const [showAddManual, setShowAddManual] = useState(false);
+  const [manualEntity, setManualEntity] = useState({ text: '', entity_type: '', start: 0, end: 0 });
+  const [confirmedEntities, setConfirmedEntities] = useState<Set<string>>(new Set());
+  const [rejectedEntities, setRejectedEntities] = useState<Set<string>>(new Set());
+  const [expandedContexts, setExpandedContexts] = useState<Set<string>>(new Set());
+
+  // Combine and classify entities from all three sources
+  const annotatedEntities: AnnotatedEntity[] = [];
+
+  // Two spans overlap if one starts before the other ends
+  const spansOverlap = (a: Entity, b: Entity) =>
+    a.start < b.end && b.start < a.end;
+
+  // Build a unified list: for each unique span, track which sources detected it
+  interface SpanEntry { entity: Entity; sources: Set<'presidio' | 'llm' | 'predefined'>; types: Map<string, string> }
+  const spans: SpanEntry[] = [];
+
+  const addToSpans = (entity: Entity, source: 'presidio' | 'llm' | 'predefined') => {
+    const existing = spans.find(s => spansOverlap(s.entity, entity));
+    if (existing) {
+      existing.sources.add(source);
+      existing.types.set(source, entity.entity_type);
+      // Prefer the entity with more text or higher score
+      if (entity.text.length > existing.entity.text.length) {
+        existing.entity = { ...entity };
+      }
+    } else {
+      const types = new Map<string, string>();
+      types.set(source, entity.entity_type);
+      spans.push({ entity: { ...entity }, sources: new Set([source]), types });
+    }
+  };
+
+  presidioEntities.forEach(e => addToSpans(e, 'presidio'));
+  datasetEntities.forEach(e => addToSpans(e, 'predefined'));
+  llmEntities.forEach(e => addToSpans(e, 'llm'));
+
+  spans.forEach(({ entity, sources, types }) => {
+    const sourceList = Array.from(sources) as ('presidio' | 'llm' | 'predefined')[];
+    const uniqueTypes = new Set(types.values());
+    const allAgree = uniqueTypes.size === 1;
+
+    let status: EntityStatus;
+    if (sourceList.length >= 2 && allAgree) {
+      status = 'match';
+    } else if (sourceList.length >= 2 && !allAgree) {
+      status = 'conflict';
+    } else if (sourceList.length === 1) {
+      const s = sourceList[0];
+      status = s === 'presidio' ? 'presidio-only' : s === 'llm' ? 'llm-only' : 'predefined-only';
+    } else {
+      status = 'pending';
+    }
+
+    // For two-source non-match conflicts, use specific labels
+    if (sourceList.length === 2 && status !== 'match') {
+      const key = sourceList.sort().join('+');
+      if (key === 'predefined+presidio') status = 'presidio+predefined';
+      else if (key === 'llm+presidio') status = 'presidio+llm';
+      else if (key === 'llm+predefined') status = 'predefined+llm';
+    }
+
+    annotatedEntities.push({ ...entity, status, sources: sourceList });
+  });
+
+  const getEntityKey = (entity: Entity) => `${entity.text}-${entity.start}-${entity.end}`;
+
+  const getContextForEntity = (entity: Entity) => {
+    const CONTEXT_CHARS = 150;
+    // Use indexOf for robust highlighting regardless of position accuracy
+    const idx = recordText.indexOf(entity.text);
+    const entityStart = idx >= 0 ? idx : entity.start;
+    const entityEnd = idx >= 0 ? idx + entity.text.length : entity.end;
+
+    const start = Math.max(0, entityStart - CONTEXT_CHARS);
+    const end = Math.min(recordText.length, entityEnd + CONTEXT_CHARS);
+
+    const before = recordText.substring(start, entityStart);
+    const entityText = recordText.substring(entityStart, entityEnd);
+    const after = recordText.substring(entityEnd, end);
+
+    return {
+      before: (start > 0 ? '...' : '') + before,
+      entity: entityText,
+      after: after + (end < recordText.length ? '...' : ''),
+    };
+  };
+
+  const toggleContext = (key: string) => {
+    setExpandedContexts(prev => {
+      const newSet = new Set(prev);
+      if (newSet.has(key)) {
+        newSet.delete(key);
+      } else {
+        newSet.add(key);
+      }
+      return newSet;
+    });
+  };
+
+  const handleConfirmEntity = (entity: AnnotatedEntity) => {
+    const key = getEntityKey(entity);
+    setConfirmedEntities(new Set([...confirmedEntities, key]));
+    setRejectedEntities(prev => {
+      const newSet = new Set(prev);
+      newSet.delete(key);
+      return newSet;
+    });
+    onConfirm(recordId, entity, entity.sources[0] === 'predefined' ? 'dataset' : entity.sources[0]);
+  };
+
+  const handleRejectEntity = (entity: AnnotatedEntity) => {
+    const key = getEntityKey(entity);
+    setRejectedEntities(new Set([...rejectedEntities, key]));
+    setConfirmedEntities(prev => {
+      const newSet = new Set(prev);
+      newSet.delete(key);
+      return newSet;
+    });
+    onReject(recordId, entity, entity.sources[0] === 'predefined' ? 'dataset' : entity.sources[0]);
+  };
+
+  const handleAddManualEntity = () => {
+    if (manualEntity.text && manualEntity.entity_type) {
+      onAddManual(recordId, manualEntity);
+      setManualEntity({ text: '', entity_type: '', start: 0, end: 0 });
+      setShowAddManual(false);
+    }
+  };
+
+  const getStatusBadge = (status: EntityStatus, confirmed?: boolean, rejected?: boolean) => {
+    if (confirmed) {
+      return <Badge className="bg-green-100 text-green-800 border-green-300"><Check className="size-3 mr-1" />Confirmed</Badge>;
+    }
+    if (rejected) {
+      return <Badge className="bg-red-100 text-red-800 border-red-300"><X className="size-3 mr-1" />Rejected</Badge>;
+    }
+
+    switch (status) {
+      case 'match':
+        return <Badge className="bg-blue-100 text-blue-800 border-blue-300">✓ Match</Badge>;
+      case 'conflict':
+        return <Badge className="bg-amber-100 text-amber-800 border-amber-300">⚠ Conflict</Badge>;
+      case 'presidio-only':
+        return <Badge className="bg-purple-100 text-purple-800 border-purple-300">Presidio</Badge>;
+      case 'llm-only':
+        return <Badge className="bg-cyan-100 text-cyan-800 border-cyan-300">LLM Judge</Badge>;
+      case 'predefined-only':
+        return <Badge className="bg-emerald-100 text-emerald-800 border-emerald-300">Predefined</Badge>;
+      case 'presidio+predefined':
+        return <Badge className="bg-violet-100 text-violet-800 border-violet-300">Presidio + Predefined</Badge>;
+      case 'presidio+llm':
+        return <Badge className="bg-indigo-100 text-indigo-800 border-indigo-300">Presidio + LLM Judge</Badge>;
+      case 'predefined+llm':
+        return <Badge className="bg-teal-100 text-teal-800 border-teal-300">Predefined + LLM Judge</Badge>;
+      default:
+        return <Badge variant="secondary">Pending</Badge>;
+    }
+  };
+
+  return (
+    <Card className="p-6">
+      <div className="space-y-6">
+        {/* Record Text */}
+        <div className="space-y-2">
+          <Label>Record Text</Label>
+          <div className="p-4 bg-slate-50 rounded-lg border border-slate-200 text-sm font-mono">
+            {recordText}
+          </div>
+        </div>
+
+        {/* Entities List */}
+        <div className="space-y-3">
+          <div className="flex items-center justify-between">
+            <Label>Detected Entities ({annotatedEntities.length})</Label>
+            <Button
+              size="sm"
+              variant="outline"
+              onClick={() => setShowAddManual(!showAddManual)}
+            >
+              <Edit className="size-3 mr-1" />
+              Add Manual Entity
+            </Button>
+          </div>
+
+          {/* Manual Add Form */}
+          {showAddManual && (
+            <div className="p-4 bg-blue-50 border border-blue-200 rounded-lg space-y-3">
+              <div className="grid grid-cols-2 gap-3">
+                <div>
+                  <Label>Entity Text</Label>
+                  <Input
+                    value={manualEntity.text}
+                    onChange={(e) => setManualEntity({ ...manualEntity, text: e.target.value })}
+                    placeholder="Enter entity text..."
+                  />
+                </div>
+                <div>
+                  <Label>Entity Type</Label>
+                  <Select value={manualEntity.entity_type} onValueChange={(val) => setManualEntity({ ...manualEntity, entity_type: val })}>
+                    <SelectTrigger>
+                      <SelectValue placeholder="Select type..." />
+                    </SelectTrigger>
+                    <SelectContent>
+                      {['PERSON', 'EMAIL', 'PHONE_NUMBER', 'SSN', 'CREDIT_CARD', 'DATE_OF_BIRTH', 
+                        'MEDICAL_RECORD', 'IP_ADDRESS', 'ADDRESS', 'MEDICAL_CONDITION'].map(type => (
+                        <SelectItem key={type} value={type}>{type}</SelectItem>
+                      ))}
+                    </SelectContent>
+                  </Select>
+                </div>
+              </div>
+              <div className="flex gap-2">
+                <Button size="sm" onClick={handleAddManualEntity}>Add Entity</Button>
+                <Button size="sm" variant="ghost" onClick={() => setShowAddManual(false)}>Cancel</Button>
+              </div>
+            </div>
+          )}
+
+          {/* Entity Cards */}
+          <div className="space-y-2">
+            {annotatedEntities.map((entity, index) => {
+              const key = getEntityKey(entity);
+              const isConfirmed = confirmedEntities.has(key);
+              const isRejected = rejectedEntities.has(key);
+
+              return (
+                <div
+                  key={index}
+                  className={`p-4 rounded-lg border ${
+                    isConfirmed ? 'bg-green-50 border-green-300' :
+                    isRejected ? 'bg-red-50 border-red-300' :
+                    entity.status === 'conflict' ? 'bg-amber-50 border-amber-300' :
+                    'bg-white border-slate-200'
+                  }`}
+                >
+                  <div className="flex items-start justify-between gap-4">
+                    <div className="flex-1 space-y-2">
+                      <div className="flex items-center gap-2">
+                        <span className="font-medium text-slate-900">{entity.text}</span>
+                        <Badge variant="outline">{entity.entity_type}</Badge>
+                        {getStatusBadge(entity.status, isConfirmed, isRejected)}
+                      </div>
+                      
+                      <div className="flex items-center gap-4 text-sm text-slate-600">
+                        <span>Position: {entity.start}-{entity.end}</span>
+                        {entity.score && <span>Confidence: {(entity.score * 100).toFixed(0)}%</span>}
+                        {entity.status === 'conflict' && (
+                          <div className="flex items-center gap-1 text-amber-700">
+                            <AlertTriangle className="size-3" />
+                            <span className="text-xs">Type mismatch between Presidio and LLM</span>
+                          </div>
+                        )}
+                      </div>
+                    </div>
+
+                    {!isConfirmed && !isRejected && (
+                      <div className="flex gap-2">
+                        <Button
+                          size="sm"
+                          variant="ghost"
+                          className="text-green-700 hover:text-green-800 hover:bg-green-100"
+                          onClick={() => handleConfirmEntity(entity)}
+                        >
+                          <CheckCircle className="size-4 mr-1" />
+                          Confirm
+                        </Button>
+                        <Button
+                          size="sm"
+                          variant="ghost"
+                          className="text-red-700 hover:text-red-800 hover:bg-red-100"
+                          onClick={() => handleRejectEntity(entity)}
+                        >
+                          <XCircle className="size-4 mr-1" />
+                          Reject
+                        </Button>
+                      </div>
+                    )}
+                  </div>
+
+                  {/* Context Collapsible */}
+                  <Collapsible open={expandedContexts.has(key)} onOpenChange={() => toggleContext(key)}>
+                    <CollapsibleTrigger asChild>
+                      <Button
+                        variant="ghost"
+                        size="sm"
+                        className="mt-2 h-auto py-1 px-2 text-xs text-slate-600 hover:text-slate-900"
+                      >
+                        <ChevronDown className={`size-3 mr-1 transition-transform ${expandedContexts.has(key) ? 'rotate-180' : ''}`} />
+                        {expandedContexts.has(key) ? 'Hide Context' : 'View Context'}
+                      </Button>
+                    </CollapsibleTrigger>
+                    <CollapsibleContent className="mt-2">
+                      <div className="p-3 bg-slate-50 rounded border border-slate-200">
+                        <div className="flex items-center gap-2 mb-2 text-xs text-slate-500">
+                          <FileText className="size-3" />
+                          <span>Surrounding Context:</span>
+                        </div>
+                        <div className="text-sm font-mono leading-relaxed">
+                          <span className="text-slate-600">{getContextForEntity(entity).before}</span>
+                          <span className="bg-yellow-200 px-1 rounded font-semibold text-slate-900">{getContextForEntity(entity).entity}</span>
+                          <span className="text-slate-600">{getContextForEntity(entity).after}</span>
+                        </div>
+                      </div>
+                    </CollapsibleContent>
+                  </Collapsible>
+                </div>
+              );
+            })}
+          </div>
+        </div>
+
+        {/* Summary */}
+        <div className="flex items-center gap-4 text-sm">
+          <div className="flex items-center gap-2">
+            <div className="size-3 rounded-full bg-green-500" />
+            <span>{confirmedEntities.size} Confirmed</span>
+          </div>
+          <div className="flex items-center gap-2">
+            <div className="size-3 rounded-full bg-red-500" />
+            <span>{rejectedEntities.size} Rejected</span>
+          </div>
+          <div className="flex items-center gap-2">
+            <div className="size-3 rounded-full bg-slate-400" />
+            <span>{annotatedEntities.length - confirmedEntities.size - rejectedEntities.size} Pending</span>
+          </div>
+        </div>
+      </div>
+    </Card>
+  );
+}
diff --git a/evaluation/ai-assistant/backend/routers/analysis.py b/evaluation/ai-assistant/backend/routers/analysis.py
deleted file mode 100644
index 9b6c6563e..000000000
--- a/evaluation/ai-assistant/backend/routers/analysis.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import asyncio
-
-from fastapi import APIRouter
-from mock_data import RECORDS
-from models import AnalysisStatus, Record
-
-router = APIRouter(prefix="/api/analysis", tags=["analysis"])
-
-# In-memory state for the current analysis run
-_state: dict = {
-    "presidio_progress": 0,
-    "llm_progress": 0,
-    "running": False,
-}
-
-
-@router.post("/start")
-async def start_analysis():
-    """Kick off parallel Presidio + LLM analysis (simulated)."""
-    _state["presidio_progress"] = 0
-    _state["llm_progress"] = 0
-    _state["running"] = True
-
-    async def _simulate():
-        while _state["presidio_progress"] < 100 or _state["llm_progress"] < 100:
-            if _state["presidio_progress"] < 100:
-                _state["presidio_progress"] = min(100, _state["presidio_progress"] + 4)
-            if _state["llm_progress"] < 100:
-                _state["llm_progress"] = min(100, _state["llm_progress"] + 3)
-            await asyncio.sleep(0.2)
-        _state["running"] = False
-
-    asyncio.create_task(_simulate())
-    return {"status": "started"}
-
-
-@router.get("/status", response_model=AnalysisStatus)
-async def get_analysis_status():
-    """Return current analysis progress."""
-    presidio_done = _state["presidio_progress"] >= 100
-    llm_done = _state["llm_progress"] >= 100
-
-    result = AnalysisStatus(
-        presidio_progress=_state["presidio_progress"],
-        llm_progress=_state["llm_progress"],
-        presidio_complete=presidio_done,
-        llm_complete=llm_done,
-    )
-
-    if presidio_done:
-        result.presidio_stats = {
-            "records": 500,
-            "entities": 1247,
-            "types": 12,
-            "avg_confidence": 91,
-        }
-    if llm_done:
-        result.llm_stats = {
-            "records": 500,
-            "entities": 1312,
-            "additional": 65,
-            "avg_confidence": 87,
-        }
-    if presidio_done and llm_done:
-        result.comparison = {
-            "matches": 1182,
-            "conflicts": 47,
-            "llm_only": 65,
-            "presidio_only": 18,
-        }
-    return result
-
-
-@router.get("/records", response_model=list[Record])
-async def get_records():
-    """Return all records with their detected entities."""
-    return RECORDS
-
-
-@router.get("/records/{record_id}", response_model=Record)
-async def get_record(record_id: str):
-    """Return a single record by ID."""
-    for rec in RECORDS:
-        if rec.id == record_id:
-            return rec
-    return {"error": "not found"}
diff --git a/evaluation/ai-assistant/backend/routers/datasets.py b/evaluation/ai-assistant/backend/routers/datasets.py
deleted file mode 100644
index a5645f97e..000000000
--- a/evaluation/ai-assistant/backend/routers/datasets.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from fastapi import APIRouter, HTTPException
-from mock_data import DATASETS
-from models import Dataset
-
-router = APIRouter(prefix="/api/datasets", tags=["datasets"])
-
-
-@router.get("", response_model=list[Dataset])
-async def list_datasets():
-    """List all available datasets."""
-    return DATASETS
-
-
-@router.get("/{dataset_id}", response_model=Dataset)
-async def get_dataset(dataset_id: str):
-    """Get a dataset by ID."""
-    for ds in DATASETS:
-        if ds.id == dataset_id:
-            return ds
-    raise HTTPException(status_code=404, detail="Dataset not found")
diff --git a/evaluation/ai-assistant/backend/routers/llm.py b/evaluation/ai-assistant/backend/routers/llm.py
new file mode 100644
index 000000000..21e79c8a3
--- /dev/null
+++ b/evaluation/ai-assistant/backend/routers/llm.py
@@ -0,0 +1,193 @@
+"""LLM Judge router — configure and run LLM-based PII detection."""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+
+import llm_service
+from fastapi import APIRouter, HTTPException
+from models import LLMConfig
+from settings import MODEL_CHOICES, load_from_env
+
+from routers import sampling as sampling_router
+
+router = APIRouter(prefix="/api/llm", tags=["llm"])
+logger = logging.getLogger(__name__)
+
+# Currently selected deployment (persisted across page reloads while server lives)
+_selected_deployment: str | None = None
+
+# In-memory state for the current LLM analysis run
+_state: dict = {
+    "progress": 0,
+    "total": 0,
+    "running": False,
+    "error": None,
+    "results": {},  # record_id -> list[Entity dict]
+}
+
+
+def _env_is_ready() -> bool:
+    """Check if .env has the required Azure endpoint."""
+    env = load_from_env()
+    return bool(env.azure_endpoint)
+
+
+# ── Model catalogue ──────────────────────────────────────
+
+
+@router.get("/models")
+async def list_models():
+    """Return available model choices for the UI dropdown."""
+    return MODEL_CHOICES
+
+
+# ── Settings ─────────────────────────────────────────────
+
+
+@router.get("/settings")
+async def get_settings():
+    """Return current LLM Judge configuration (no secrets)."""
+    env = load_from_env()
+    return {
+        "env_ready": bool(env.azure_endpoint),
+        "has_endpoint": bool(env.azure_endpoint),
+        "has_api_key": bool(env.azure_api_key),
+        "auth_method": "api_key" if env.azure_api_key else "default_credential",
+        "deployment_name": _selected_deployment or env.deployment_name,
+        "configured": llm_service.is_configured(),
+    }
+
+
+@router.post("/configure")
+async def configure_llm(config: LLMConfig):
+    """Configure the LLM recognizer using .env credentials + chosen deployment."""
+    global _selected_deployment
+
+    env = load_from_env()
+    if not env.azure_endpoint:
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                "PRESIDIO_EVAL_AZURE_ENDPOINT must be set in backend/.env "
+                "before configuring the LLM Judge."
+            ),
+        )
+
+    # Validate the chosen deployment is in our allowed list
+    allowed_ids = {m["id"] for m in MODEL_CHOICES}
+    deployment = config.deployment_name
+    if deployment not in allowed_ids:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Deployment '{deployment}' is not in the allowed list.",
+        )
+
+    try:
+        result = llm_service.configure(
+            azure_endpoint=env.azure_endpoint,
+            api_key=env.azure_api_key,  # None → DefaultAzureCredential
+            deployment_name=deployment,
+            api_version=env.api_version,
+        )
+    except llm_service.LLMServiceError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+    _selected_deployment = deployment
+    return result
+
+
+@router.post("/disconnect")
+async def disconnect_llm():
+    """Disconnect the LLM recognizer and reset analysis state."""
+    global _selected_deployment
+    llm_service.disconnect()
+    _selected_deployment = None
+    _state["progress"] = 0
+    _state["total"] = 0
+    _state["running"] = False
+    _state["error"] = None
+    _state["results"] = {}
+    return {"status": "disconnected"}
+
+
+# ── Status / analysis ────────────────────────────────────
+
+
+@router.get("/status")
+async def get_llm_status():
+    """Return LLM configuration and analysis status."""
+    return {
+        "configured": llm_service.is_configured(),
+        "running": _state["running"],
+        "progress": _state["progress"],
+        "total": _state["total"],
+        "error": _state["error"],
+    }
+
+
+@router.post("/analyze")
+async def start_llm_analysis():
+    """Run LLM analysis on all sampled records."""
+    if not llm_service.is_configured():
+        raise HTTPException(
+            status_code=400,
+            detail="LLM not configured. POST /api/llm/configure first.",
+        )
+
+    if _state["running"]:
+        raise HTTPException(status_code=409, detail="Analysis already running.")
+
+    records = sampling_router.sampled_records
+    if not records:
+        raise HTTPException(
+            status_code=400,
+            detail="No sampled records. Run sampling first.",
+        )
+
+    _state["progress"] = 0
+    _state["total"] = len(records)
+    _state["running"] = True
+    _state["error"] = None
+    _state["results"] = {}
+
+    asyncio.create_task(_run_analysis())
+    return {"status": "started", "total": _state["total"]}
+
+
+async def _run_analysis():
+    """Background task: analyse each sampled record via the LLM."""
+    loop = asyncio.get_event_loop()
+    records = sampling_router.sampled_records
+    try:
+        for record in records:
+            try:
+                entities = await loop.run_in_executor(
+                    None, llm_service.analyze_text, record.text
+                )
+                _state["results"][record.id] = [e.model_dump() for e in entities]
+            except Exception:
+                logger.exception("LLM analysis failed for record %s", record.id)
+                _state["results"][record.id] = []
+            _state["progress"] += 1
+    except Exception as exc:
+        logger.exception("LLM analysis task failed")
+        _state["error"] = str(exc)
+    finally:
+        _state["running"] = False
+
+
+@router.get("/results")
+async def get_llm_results():
+    """Return LLM entities for all analysed records."""
+    return _state["results"]
+
+
+@router.get("/results/{record_id}")
+async def get_llm_record_results(record_id: str):
+    """Return LLM entities for a specific record."""
+    entities = _state["results"].get(record_id)
+    if entities is None:
+        raise HTTPException(status_code=404, detail="Record not found in results.")
+    return entities
diff --git a/evaluation/ai-assistant/backend/routers/review.py b/evaluation/ai-assistant/backend/routers/review.py
index 5e561093c..aaa6831a9 100644
--- a/evaluation/ai-assistant/backend/routers/review.py
+++ b/evaluation/ai-assistant/backend/routers/review.py
@@ -1,6 +1,6 @@
 from fastapi import APIRouter
-from mock_data import RECORDS
 from models import Entity, EntityAction, Record
+from routers.sampling import sampled_records
 
 router = APIRouter(prefix="/api/review", tags=["review"])
 
@@ -12,7 +12,7 @@
 @router.get("/records", response_model=list[Record])
 async def get_review_records():
     """List records for human review."""
-    return RECORDS
+    return sampled_records
 
 
 @router.post("/records/{record_id}/confirm")
@@ -23,6 +23,11 @@ async def confirm_entity(record_id: str, action: EntityAction):
     return {"status": "confirmed", "record_id": record_id}
 
 
+def _spans_overlap(a: Entity, b: Entity) -> bool:
+    """Return True if two entity spans overlap."""
+    return a.start < b.end and b.start < a.end
+
+
 @router.post("/records/{record_id}/reject")
 async def reject_entity(record_id: str, action: EntityAction):
     """Reject an entity and remove it from the golden set."""
@@ -30,11 +35,7 @@ async def reject_entity(record_id: str, action: EntityAction):
     _golden_set[record_id] = [
         e
         for e in entities
-        if not (
-            e.text == action.entity.text
-            and e.start == action.entity.start
-            and e.end == action.entity.end
-        )
+        if not _spans_overlap(e, action.entity)
     ]
     _reviewed.add(record_id)
     return {"status": "rejected", "record_id": record_id}
@@ -52,7 +53,7 @@ async def add_manual_entity(record_id: str, action: EntityAction):
 @router.get("/progress")
 async def get_review_progress():
     """Return review completion progress."""
-    total = len(RECORDS)
+    total = len(sampled_records)
     reviewed = len(_reviewed)
     return {
         "total": total,
diff --git a/evaluation/ai-assistant/backend/routers/upload.py b/evaluation/ai-assistant/backend/routers/upload.py
index 2257be9dc..3bae9aad4 100644
--- a/evaluation/ai-assistant/backend/routers/upload.py
+++ b/evaluation/ai-assistant/backend/routers/upload.py
@@ -9,7 +9,13 @@
 import uuid
 
 from fastapi import APIRouter, HTTPException
-from models import DatasetLoadRequest, Entity, Record, UploadedDataset
+from models import (
+    DatasetLoadRequest,
+    DatasetRenameRequest,
+    Entity,
+    Record,
+    UploadedDataset,
+)
 
 router = APIRouter(prefix="/api/datasets", tags=["datasets"])
 
@@ -19,6 +25,47 @@
 
 MAX_FILE_SIZE = 50 * 1024 * 1024  # 50 MB
 
+# Persistence file for saved datasets (next to this file → backend/datasets.json)
+_DATASETS_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "datasets.json")
+
+
+def _save_registry() -> None:
+    """Persist the dataset registry to disk."""
+    data = [ds.model_dump() for ds in _uploaded.values()]
+    with open(_DATASETS_FILE, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2)
+
+
+def _load_registry() -> None:
+    """Load previously saved datasets from disk on startup."""
+    if not os.path.isfile(_DATASETS_FILE):
+        return
+    try:
+        with open(_DATASETS_FILE, encoding="utf-8") as f:
+            data = json.load(f)
+        for item in data:
+            ds = UploadedDataset(**item)
+            _uploaded[ds.id] = ds
+    except Exception:
+        pass  # ignore corrupt file
+
+
+# Project root (evaluation/ai-assistant/) — used to resolve relative paths
+_PROJECT_ROOT = os.path.normpath(
+    os.path.join(os.path.dirname(__file__), "..", "..")
+)
+
+
+def _resolve_path(path: str) -> str:
+    """Resolve a path; relative paths are resolved against the project root."""
+    if os.path.isabs(path):
+        return path
+    return os.path.normpath(os.path.join(_PROJECT_ROOT, path))
+
+
+# Load on import so saved datasets are available immediately
+_load_registry()
+
 
 def _parse_entities(raw: str | list | None) -> list[Entity]:
     """Parse entities from a JSON string or list."""
@@ -141,6 +188,12 @@ def _parse_json(
     return records, sorted(columns), has_entities
 
 
+@router.get("/saved")
+async def list_saved_datasets():
+    """Return all saved datasets (for the dropdown)."""
+    return list(_uploaded.values())
+
+
 @router.post("/load")
 async def load_dataset(req: DatasetLoadRequest):
     """Load a CSV or JSON file from a local absolute path."""
@@ -180,33 +233,86 @@ async def load_dataset(req: DatasetLoadRequest):
 
     dataset_id = f"upload-{uuid.uuid4().hex[:8]}"
     filename = os.path.basename(file_path)
+    display_name = req.name.strip() if req.name and req.name.strip() else filename
+    description = req.description.strip() if req.description else ""
     dataset = UploadedDataset(
         id=dataset_id,
         filename=filename,
+        name=display_name,
+        description=description,
+        path=file_path,
         format=req.format,
         record_count=len(records),
         has_entities=has_entities,
         columns=columns,
+        text_column=req.text_column,
+        entities_column=req.entities_column,
     )
 
     _uploaded[dataset_id] = dataset
     _records[dataset_id] = records
+    _save_registry()
 
     return dataset
 
 
+@router.patch("/{dataset_id}/rename")
+async def rename_dataset(dataset_id: str, req: DatasetRenameRequest):
+    """Rename a saved dataset."""
+    if dataset_id not in _uploaded:
+        raise HTTPException(status_code=404, detail="Dataset not found")
+    new_name = req.name.strip()
+    if not new_name:
+        raise HTTPException(status_code=400, detail="Name cannot be empty")
+    _uploaded[dataset_id] = _uploaded[dataset_id].model_copy(
+        update={"name": new_name}
+    )
+    _save_registry()
+    return _uploaded[dataset_id]
+
+
+@router.delete("/{dataset_id}")
+async def delete_dataset(dataset_id: str):
+    """Remove a saved dataset from the registry."""
+    if dataset_id not in _uploaded:
+        raise HTTPException(status_code=404, detail="Dataset not found")
+    del _uploaded[dataset_id]
+    _records.pop(dataset_id, None)
+    _save_registry()
+    return {"ok": True}
+
+
+def _ensure_records_loaded(dataset_id: str) -> list[Record]:
+    """Reload records from the original file if not in memory."""
+    if dataset_id in _records:
+        return _records[dataset_id]
+    ds = _uploaded.get(dataset_id)
+    if ds is None:
+        raise HTTPException(status_code=404, detail="Dataset not found")
+    resolved = _resolve_path(ds.path)
+    if not os.path.isfile(resolved):
+        raise HTTPException(
+            status_code=404,
+            detail=f"Source file no longer exists: {ds.path}",
+        )
+    with open(resolved, encoding="utf-8") as f:
+        content = f.read()
+    if ds.format == "csv":
+        records, _, _ = _parse_csv(content, ds.text_column, ds.entities_column)
+    else:
+        records, _, _ = _parse_json(content, ds.text_column, ds.entities_column)
+    _records[dataset_id] = records
+    return records
+
+
 @router.get("/{dataset_id}/records")
 async def get_dataset_records(dataset_id: str):
     """Return parsed records for a loaded dataset."""
-    if dataset_id not in _records:
-        raise HTTPException(status_code=404, detail="Dataset not found")
-    return _records[dataset_id]
+    return _ensure_records_loaded(dataset_id)
 
 
 @router.get("/{dataset_id}/preview")
 async def preview_dataset(dataset_id: str, limit: int = 5):
     """Return a small preview of the loaded dataset."""
-    if dataset_id not in _records:
-        raise HTTPException(status_code=404, detail="Dataset not found")
-    records = _records[dataset_id][:limit]
-    return records
+    records = _ensure_records_loaded(dataset_id)
+    return records[:limit]
diff --git a/evaluation/ai-assistant/backend/settings.py b/evaluation/ai-assistant/backend/settings.py
new file mode 100644
index 000000000..585a093e4
--- /dev/null
+++ b/evaluation/ai-assistant/backend/settings.py
@@ -0,0 +1,66 @@
+"""Centralised settings loaded from .env with PRESIDIO_EVAL_ prefix."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Optional
+
+from pydantic import BaseModel
+
+_ENV_FILE = Path(__file__).resolve().parent / ".env"
+
+# ── Available model choices shown in the UI dropdown ──
+MODEL_CHOICES: list[dict[str, str]] = [
+    {"id": "gpt-5.1", "label": "GPT-5.1", "provider": "Azure OpenAI"},
+    {
+        "id": "gpt-5.2-chat",
+        "label": "GPT-5.2 Chat",
+        "provider": "Azure OpenAI",
+    },
+    {"id": "gpt-5.4", "label": "GPT-5.4", "provider": "Azure OpenAI"},
+]
+
+
+class EvalSettings(BaseModel):
+    """Runtime settings, sourced from env vars or interactive input."""
+
+    azure_endpoint: str = ""
+    azure_api_key: Optional[str] = None
+    deployment_name: str = "gpt-5.4"
+    api_version: str = "2024-02-15-preview"
+
+
+def _load_dotenv() -> None:
+    """Read .env into os.environ (simple key=value parser, no dependency)."""
+    if not _ENV_FILE.is_file():
+        return
+    with open(_ENV_FILE) as fh:
+        for line in fh:
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            if "=" not in line:
+                continue
+            key, _, value = line.partition("=")
+            key = key.strip()
+            value = value.strip()
+            # Only set if not already in os.environ (explicit env wins)
+            if key not in os.environ:
+                os.environ[key] = value
+
+
+def load_from_env() -> EvalSettings:
+    """Build settings from PRESIDIO_EVAL_* environment variables."""
+    _load_dotenv()
+    api_key = os.environ.get("PRESIDIO_EVAL_AZURE_API_KEY") or None
+    return EvalSettings(
+        azure_endpoint=os.environ.get("PRESIDIO_EVAL_AZURE_ENDPOINT", ""),
+        azure_api_key=api_key,
+        deployment_name=os.environ.get(
+            "PRESIDIO_EVAL_DEPLOYMENT_NAME", "gpt-4o"
+        ),
+        api_version=os.environ.get(
+            "PRESIDIO_EVAL_API_VERSION", "2024-02-15-preview"
+        ),
+    )
diff --git a/evaluation/ai-assistant/data/example_pii_dataset.csv b/evaluation/ai-assistant/data/example_pii_dataset.csv
new file mode 100644
index 000000000..75fd088ac
--- /dev/null
+++ b/evaluation/ai-assistant/data/example_pii_dataset.csv
@@ -0,0 +1,11 @@
+"text","entities"
+"Dear Mr. James Wilson, your appointment at Springfield Clinic is confirmed for 04/12/2025. Please bring your insurance card (ID: INS-77234) and a photo ID. For questions, call 312-555-0198 or email james.wilson@outlook.com.","[{""text"": ""James Wilson"", ""entity_type"": ""PERSON"", ""start"": 9, ""end"": 21, ""score"": 1.0}, {""text"": ""04/12/2025"", ""entity_type"": ""DATE"", ""start"": 79, ""end"": 89, ""score"": 1.0}, {""text"": ""INS-77234"", ""entity_type"": ""ID"", ""start"": 129, ""end"": 138, ""score"": 1.0}, {""text"": ""312-555-0198"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 176, ""end"": 188, ""score"": 1.0}, {""text"": ""james.wilson@outlook.com"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 198, ""end"": 222, ""score"": 1.0}]"
+"The account holder, Maria Garcia (DOB: 11/23/1990), reported unauthorized charges on her Visa card ending in 4829. Her address on file is 742 Elm Street, Austin, TX 78701. Case reference: CR-2025-08813.","[{""text"": ""Maria Garcia"", ""entity_type"": ""PERSON"", ""start"": 20, ""end"": 32, ""score"": 1.0}, {""text"": ""11/23/1990"", ""entity_type"": ""DATE_OF_BIRTH"", ""start"": 39, ""end"": 49, ""score"": 1.0}, {""text"": ""4829"", ""entity_type"": ""CREDIT_CARD"", ""start"": 109, ""end"": 113, ""score"": 1.0}, {""text"": ""742 Elm Street, Austin, TX 78701"", ""entity_type"": ""ADDRESS"", ""start"": 138, ""end"": 170, ""score"": 1.0}]"
+"Hi, this is a message for David Chen at extension 4021. Your lab results from Quest Diagnostics (order #QD-998471) are ready. Please contact Dr. Patel at 617-555-0342 to discuss. Your patient ID is PT-228109.","[{""text"": ""David Chen"", ""entity_type"": ""PERSON"", ""start"": 26, ""end"": 36, ""score"": 1.0}, {""text"": ""QD-998471"", ""entity_type"": ""ID"", ""start"": 104, ""end"": 113, ""score"": 1.0}, {""text"": ""Dr. Patel"", ""entity_type"": ""PERSON"", ""start"": 141, ""end"": 150, ""score"": 1.0}, {""text"": ""617-555-0342"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 154, ""end"": 166, ""score"": 1.0}, {""text"": ""PT-228109"", ""entity_type"": ""ID"", ""start"": 198, ""end"": 207, ""score"": 1.0}]"
+"Employee review for Sarah O'Brien (Employee #EMP-30042). Performance rating: Exceeds Expectations. Current salary: $94,500. Manager: Tom Richards. Next review date: 09/15/2025.","[{""text"": ""Sarah O'Brien"", ""entity_type"": ""PERSON"", ""start"": 20, ""end"": 33, ""score"": 1.0}, {""text"": ""EMP-30042"", ""entity_type"": ""ID"", ""start"": 45, ""end"": 54, ""score"": 1.0}, {""text"": ""$94,500"", ""entity_type"": ""FINANCIAL"", ""start"": 115, ""end"": 122, ""score"": 1.0}, {""text"": ""Tom Richards"", ""entity_type"": ""PERSON"", ""start"": 133, ""end"": 145, ""score"": 1.0}, {""text"": ""09/15/2025"", ""entity_type"": ""DATE"", ""start"": 165, ""end"": 175, ""score"": 1.0}]"
+"Tenant lease agreement: Robert Kim, SSN 531-72-8846, is approved for unit 4B at 1500 Oak Avenue, Portland, OR 97205. Monthly rent: $1,850. Lease start: 03/01/2025. Emergency contact: Linda Kim, 503-555-0147.","[{""text"": ""Robert Kim"", ""entity_type"": ""PERSON"", ""start"": 24, ""end"": 34, ""score"": 1.0}, {""text"": ""531-72-8846"", ""entity_type"": ""US_SSN"", ""start"": 40, ""end"": 51, ""score"": 1.0}, {""text"": ""1500 Oak Avenue, Portland, OR 97205"", ""entity_type"": ""ADDRESS"", ""start"": 80, ""end"": 115, ""score"": 1.0}, {""text"": ""$1,850"", ""entity_type"": ""FINANCIAL"", ""start"": 131, ""end"": 137, ""score"": 1.0}, {""text"": ""Linda Kim"", ""entity_type"": ""PERSON"", ""start"": 183, ""end"": 192, ""score"": 1.0}, {""text"": ""503-555-0147"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 194, ""end"": 206, ""score"": 1.0}]"
+"Shipping confirmation for order #ORD-44218. Recipient: Aisha Patel, 2300 Birch Lane, Denver, CO 80202. Tracking number: 1Z999AA10123456784. Delivery expected by 02/28/2025. Contact: aisha.p@gmail.com.","[{""text"": ""Aisha Patel"", ""entity_type"": ""PERSON"", ""start"": 55, ""end"": 66, ""score"": 1.0}, {""text"": ""2300 Birch Lane, Denver, CO 80202"", ""entity_type"": ""ADDRESS"", ""start"": 68, ""end"": 101, ""score"": 1.0}, {""text"": ""1Z999AA10123456784"", ""entity_type"": ""ID"", ""start"": 120, ""end"": 138, ""score"": 1.0}, {""text"": ""02/28/2025"", ""entity_type"": ""DATE"", ""start"": 161, ""end"": 171, ""score"": 1.0}, {""text"": ""aisha.p@gmail.com"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 182, ""end"": 199, ""score"": 1.0}]"
+"Insurance claim filed by John Martinez (Policy #POL-55931-CA). Incident date: 01/15/2025. Vehicle: 2022 Honda Civic, VIN 1HGBH41JXMN109186. Damage estimate: $4,200. Adjuster assigned: Karen White, karen.white@insco.com.","[{""text"": ""John Martinez"", ""entity_type"": ""PERSON"", ""start"": 25, ""end"": 38, ""score"": 1.0}, {""text"": ""POL-55931-CA"", ""entity_type"": ""ID"", ""start"": 48, ""end"": 60, ""score"": 1.0}, {""text"": ""01/15/2025"", ""entity_type"": ""DATE"", ""start"": 78, ""end"": 88, ""score"": 1.0}, {""text"": ""1HGBH41JXMN109186"", ""entity_type"": ""ID"", ""start"": 121, ""end"": 138, ""score"": 1.0}, {""text"": ""$4,200"", ""entity_type"": ""FINANCIAL"", ""start"": 157, ""end"": 163, ""score"": 1.0}, {""text"": ""Karen White"", ""entity_type"": ""PERSON"", ""start"": 184, ""end"": 195, ""score"": 1.0}, {""text"": ""karen.white@insco.com"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 197, ""end"": 218, ""score"": 1.0}]"
+"Meeting notes: Discussed project timeline with Emily Zhao (emily.zhao@techcorp.io). Budget approved: $250,000. Next milestone due 06/30/2025. Stakeholder contact: VP of Engineering, ext. 7784, building 3A.","[{""text"": ""Emily Zhao"", ""entity_type"": ""PERSON"", ""start"": 47, ""end"": 57, ""score"": 1.0}, {""text"": ""emily.zhao@techcorp.io"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 59, ""end"": 81, ""score"": 1.0}, {""text"": ""$250,000"", ""entity_type"": ""FINANCIAL"", ""start"": 101, ""end"": 109, ""score"": 1.0}, {""text"": ""06/30/2025"", ""entity_type"": ""DATE"", ""start"": 130, ""end"": 140, ""score"": 1.0}]"
+"Support ticket #TK-81923: Customer Peter Novak (peter.novak@yahoo.de) reports login issues. Account created 08/10/2022. Last known IP: 84.175.22.61. Browser: Chrome 120. Phone on file: +49-170-555-8821.","[{""text"": ""Peter Novak"", ""entity_type"": ""PERSON"", ""start"": 35, ""end"": 46, ""score"": 1.0}, {""text"": ""peter.novak@yahoo.de"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 48, ""end"": 68, ""score"": 1.0}, {""text"": ""08/10/2022"", ""entity_type"": ""DATE"", ""start"": 108, ""end"": 118, ""score"": 1.0}, {""text"": ""84.175.22.61"", ""entity_type"": ""IP_ADDRESS"", ""start"": 135, ""end"": 147, ""score"": 1.0}, {""text"": ""+49-170-555-8821"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 185, ""end"": 201, ""score"": 1.0}]"
+"Discharge summary for patient Lisa Thompson (MRN: MR-667210). Admitted: 12/20/2024, discharged: 12/27/2024. Primary diagnosis: pneumonia. Attending physician: Dr. Angela Morrison. Follow-up with pulmonology at 555-0299. Pharmacy: CVS #4481.","[{""text"": ""Lisa Thompson"", ""entity_type"": ""PERSON"", ""start"": 30, ""end"": 43, ""score"": 1.0}, {""text"": ""MR-667210"", ""entity_type"": ""ID"", ""start"": 50, ""end"": 59, ""score"": 1.0}, {""text"": ""12/20/2024"", ""entity_type"": ""DATE"", ""start"": 72, ""end"": 82, ""score"": 1.0}, {""text"": ""12/27/2024"", ""entity_type"": ""DATE"", ""start"": 96, ""end"": 106, ""score"": 1.0}, {""text"": ""Dr. Angela Morrison"", ""entity_type"": ""PERSON"", ""start"": 159, ""end"": 178, ""score"": 1.0}, {""text"": ""555-0299"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 210, ""end"": 218, ""score"": 1.0}]"
diff --git a/evaluation/ai-assistant/src/app/components/EntityComparison.tsx b/evaluation/ai-assistant/src/app/components/EntityComparison.tsx
index 28a9ff594..49f485c47 100644
--- a/evaluation/ai-assistant/src/app/components/EntityComparison.tsx
+++ b/evaluation/ai-assistant/src/app/components/EntityComparison.tsx
@@ -6,7 +6,7 @@ import { Input } from './ui/input';
 import { Label } from './ui/label';
 import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from './ui/select';
 import { Collapsible, CollapsibleContent, CollapsibleTrigger } from './ui/collapsible';
-import { CheckCircle, XCircle, Edit, AlertTriangle, Check, X, ChevronDown, FileText } from 'lucide-react';
+import { CheckCircle, XCircle, Edit, Check, X, ChevronDown, FileText } from 'lucide-react';
 import type { Entity } from '../types';
 
 interface EntityComparisonProps {
@@ -20,11 +20,11 @@ interface EntityComparisonProps {
   onAddManual: (recordId: string, entity: Entity) => void;
 }
 
-type EntityStatus = 'match' | 'conflict' | 'presidio-only' | 'llm-only' | 'dataset-only' | 'pending';
+type EntityStatus = 'match' | 'presidio-only' | 'llm-only' | 'predefined-only' | 'pending';
 
 interface AnnotatedEntity extends Entity {
   status: EntityStatus;
-  source: 'presidio' | 'llm' | 'both' | 'dataset';
+  sources: ('presidio' | 'llm' | 'predefined')[];
   confirmed?: boolean;
 }
 
@@ -44,54 +44,77 @@ export function EntityComparison({
   const [rejectedEntities, setRejectedEntities] = useState<Set<string>>(new Set());
   const [expandedContexts, setExpandedContexts] = useState<Set<string>>(new Set());
 
-  // Combine and classify entities
+  // Combine and classify entities from all three sources
   const annotatedEntities: AnnotatedEntity[] = [];
 
-  presidioEntities.forEach(pe => {
-    const matchingLlm = llmEntities.find(
-      le => le.text === pe.text && le.start === pe.start && le.end === pe.end
-    );
-    
-    if (matchingLlm) {
-      if (matchingLlm.entity_type === pe.entity_type) {
-        annotatedEntities.push({ ...pe, status: 'match', source: 'both' });
-      } else {
-        annotatedEntities.push({ ...pe, status: 'conflict', source: 'both' });
+  // Two spans overlap if one starts before the other ends
+  const spansOverlap = (a: Entity, b: Entity) =>
+    a.start < b.end && b.start < a.end;
+
+  // Build a unified list: for each unique span, track which sources detected it
+  interface SpanEntry { entity: Entity; sources: Set<'presidio' | 'llm' | 'predefined'>; types: Map<string, string> }
+  const spans: SpanEntry[] = [];
+
+  const addToSpans = (entity: Entity, source: 'presidio' | 'llm' | 'predefined') => {
+    const existing = spans.find(s => spansOverlap(s.entity, entity));
+    if (existing) {
+      existing.sources.add(source);
+      existing.types.set(source, entity.entity_type);
+      // Prefer the entity with more text or higher score
+      if (entity.text.length > existing.entity.text.length) {
+        existing.entity = { ...entity };
       }
     } else {
-      annotatedEntities.push({ ...pe, status: 'presidio-only', source: 'presidio' });
+      const types = new Map<string, string>();
+      types.set(source, entity.entity_type);
+      spans.push({ entity: { ...entity }, sources: new Set([source]), types });
     }
-  });
+  };
 
-  llmEntities.forEach(le => {
-    const alreadyAdded = annotatedEntities.some(
-      ae => ae.text === le.text && ae.start === le.start && ae.end === le.end
-    );
-    if (!alreadyAdded) {
-      annotatedEntities.push({ ...le, status: 'llm-only', source: 'llm' });
-    }
-  });
+  presidioEntities.forEach(e => addToSpans(e, 'presidio'));
+  datasetEntities.forEach(e => addToSpans(e, 'predefined'));
+  llmEntities.forEach(e => addToSpans(e, 'llm'));
 
-  datasetEntities.forEach(de => {
-    const alreadyAdded = annotatedEntities.some(
-      ae => ae.text === de.text && ae.start === de.start && ae.end === de.end
-    );
-    if (!alreadyAdded) {
-      annotatedEntities.push({ ...de, status: 'dataset-only', source: 'dataset' });
+  spans.forEach(({ entity, sources, types }) => {
+    const sourceList = Array.from(sources) as ('presidio' | 'llm' | 'predefined')[];
+    const uniqueTypes = new Set(types.values());
+    const allAgree = uniqueTypes.size === 1;
+
+    if (sourceList.length >= 2 && allAgree) {
+      // All active sources agree on type → single "Match" card
+      annotatedEntities.push({ ...entity, status: 'match', sources: sourceList });
+    } else if (sourceList.length >= 2 && !allAgree) {
+      // Sources disagree on type → separate card per source so user can confirm/reject each
+      for (const src of sourceList) {
+        const srcType = types.get(src) || entity.entity_type;
+        const status: EntityStatus = src === 'presidio' ? 'presidio-only' : src === 'llm' ? 'llm-only' : 'predefined-only';
+        annotatedEntities.push({ ...entity, entity_type: srcType, status, sources: [src] });
+      }
+    } else if (sourceList.length === 1) {
+      const s = sourceList[0];
+      const status: EntityStatus = s === 'presidio' ? 'presidio-only' : s === 'llm' ? 'llm-only' : 'predefined-only';
+      annotatedEntities.push({ ...entity, status, sources: sourceList });
+    } else {
+      annotatedEntities.push({ ...entity, status: 'pending', sources: sourceList });
     }
   });
 
-  const getEntityKey = (entity: Entity) => `${entity.text}-${entity.start}-${entity.end}`;
+  const getEntityKey = (entity: AnnotatedEntity) => `${entity.text}-${entity.start}-${entity.end}-${entity.sources.join(',')}`;
 
   const getContextForEntity = (entity: Entity) => {
     const CONTEXT_CHARS = 150;
-    const start = Math.max(0, entity.start - CONTEXT_CHARS);
-    const end = Math.min(recordText.length, entity.end + CONTEXT_CHARS);
-    
-    const before = recordText.substring(start, entity.start);
-    const entityText = recordText.substring(entity.start, entity.end);
-    const after = recordText.substring(entity.end, end);
-    
+    // Use indexOf for robust highlighting regardless of position accuracy
+    const idx = recordText.indexOf(entity.text);
+    const entityStart = idx >= 0 ? idx : entity.start;
+    const entityEnd = idx >= 0 ? idx + entity.text.length : entity.end;
+
+    const start = Math.max(0, entityStart - CONTEXT_CHARS);
+    const end = Math.min(recordText.length, entityEnd + CONTEXT_CHARS);
+
+    const before = recordText.substring(start, entityStart);
+    const entityText = recordText.substring(entityStart, entityEnd);
+    const after = recordText.substring(entityEnd, end);
+
     return {
       before: (start > 0 ? '...' : '') + before,
       entity: entityText,
@@ -119,7 +142,7 @@ export function EntityComparison({
       newSet.delete(key);
       return newSet;
     });
-    onConfirm(recordId, entity, entity.source === 'both' ? 'presidio' : entity.source);
+    onConfirm(recordId, entity, entity.sources[0] === 'predefined' ? 'dataset' : entity.sources[0]);
   };
 
   const handleRejectEntity = (entity: AnnotatedEntity) => {
@@ -130,9 +153,7 @@ export function EntityComparison({
       newSet.delete(key);
       return newSet;
     });
-    if (entity.source !== 'both') {
-      onReject(recordId, entity, entity.source);
-    }
+    onReject(recordId, entity, entity.sources[0] === 'predefined' ? 'dataset' : entity.sources[0]);
   };
 
   const handleAddManualEntity = () => {
@@ -154,14 +175,12 @@ export function EntityComparison({
     switch (status) {
       case 'match':
         return <Badge className="bg-blue-100 text-blue-800 border-blue-300">✓ Match</Badge>;
-      case 'conflict':
-        return <Badge className="bg-amber-100 text-amber-800 border-amber-300">⚠ Conflict</Badge>;
       case 'presidio-only':
-        return <Badge className="bg-purple-100 text-purple-800 border-purple-300">Presidio Only</Badge>;
+        return <Badge className="bg-purple-100 text-purple-800 border-purple-300">Presidio</Badge>;
       case 'llm-only':
-        return <Badge className="bg-cyan-100 text-cyan-800 border-cyan-300">LLM Only</Badge>;
-      case 'dataset-only':
-        return <Badge className="bg-emerald-100 text-emerald-800 border-emerald-300">Dataset</Badge>;
+        return <Badge className="bg-cyan-100 text-cyan-800 border-cyan-300">LLM Judge</Badge>;
+      case 'predefined-only':
+        return <Badge className="bg-emerald-100 text-emerald-800 border-emerald-300">Predefined</Badge>;
       default:
         return <Badge variant="secondary">Pending</Badge>;
     }
@@ -239,7 +258,6 @@ export function EntityComparison({
                   className={`p-4 rounded-lg border ${
                     isConfirmed ? 'bg-green-50 border-green-300' :
                     isRejected ? 'bg-red-50 border-red-300' :
-                    entity.status === 'conflict' ? 'bg-amber-50 border-amber-300' :
                     'bg-white border-slate-200'
                   }`}
                 >
@@ -254,12 +272,6 @@ export function EntityComparison({
                       <div className="flex items-center gap-4 text-sm text-slate-600">
                         <span>Position: {entity.start}-{entity.end}</span>
                         {entity.score && <span>Confidence: {(entity.score * 100).toFixed(0)}%</span>}
-                        {entity.status === 'conflict' && (
-                          <div className="flex items-center gap-1 text-amber-700">
-                            <AlertTriangle className="size-3" />
-                            <span className="text-xs">Type mismatch between Presidio and LLM</span>
-                          </div>
-                        )}
                       </div>
                     </div>
 
diff --git a/evaluation/ai-assistant/src/app/pages/Anonymization.tsx b/evaluation/ai-assistant/src/app/pages/Anonymization.tsx
index 12eda26a9..6300ea996 100644
--- a/evaluation/ai-assistant/src/app/pages/Anonymization.tsx
+++ b/evaluation/ai-assistant/src/app/pages/Anonymization.tsx
@@ -1,11 +1,29 @@
-import { useMemo } from 'react';
+import { useState, useEffect, useMemo, useCallback } from 'react';
 import { useNavigate } from 'react-router';
 import { Card } from '../components/ui/card';
 import { Button } from '../components/ui/button';
+import { Label } from '../components/ui/label';
+import { Progress } from '../components/ui/progress';
 import { Alert, AlertDescription } from '../components/ui/alert';
-import { ArrowRight, Shield, Sparkles, Database } from 'lucide-react';
+import {
+  Select,
+  SelectContent,
+  SelectItem,
+  SelectTrigger,
+  SelectValue,
+} from '../components/ui/select';
+import { ArrowRight, Shield, Sparkles, Database, CheckCircle, Loader2, AlertTriangle, Unplug } from 'lucide-react';
+import { api } from '../lib/api';
 import type { SetupConfig } from '../types';
 
+type LlmStep = 'loading' | 'env_missing' | 'idle' | 'configuring' | 'configured' | 'running' | 'done' | 'error';
+
+interface ModelChoice {
+  id: string;
+  label: string;
+  provider: string;
+}
+
 export function Anonymization() {
   const navigate = useNavigate();
 
@@ -20,16 +38,107 @@ export function Anonymization() {
 
   const hasDatasetEntities = setupConfig?.hasDatasetEntities ?? false;
 
+  // LLM Judge state
+  const [llmStep, setLlmStep] = useState<LlmStep>('loading');
+  const [models, setModels] = useState<ModelChoice[]>([]);
+  const [selectedModel, setSelectedModel] = useState('gpt-5.4');
+  const [llmProgress, setLlmProgress] = useState(0);
+  const [llmTotal, setLlmTotal] = useState(0);
+  const [llmError, setLlmError] = useState<string | null>(null);
+
+  // Load models + env status on mount
+  useEffect(() => {
+    Promise.all([api.llm.models(), api.llm.settings(), api.llm.status()]).then(
+      ([modelList, settings, status]) => {
+        setModels(modelList);
+        setSelectedModel(settings.deployment_name || 'gpt-4o');
+
+        if (status.running) {
+          setLlmStep('running');
+          setLlmProgress(status.progress);
+          setLlmTotal(status.total);
+        } else if (settings.configured) {
+          setLlmStep('configured');
+        } else if (!settings.env_ready) {
+          setLlmStep('env_missing');
+        } else {
+          setLlmStep('idle');
+        }
+      },
+    ).catch(() => setLlmStep('env_missing'));
+  }, []);
+
+  // Poll progress while running
+  useEffect(() => {
+    if (llmStep !== 'running') return;
+    const interval = setInterval(async () => {
+      try {
+        const s = await api.llm.status();
+        setLlmProgress(s.progress);
+        setLlmTotal(s.total);
+        if (s.error) {
+          setLlmError(s.error);
+          setLlmStep('error');
+        } else if (!s.running && s.progress >= s.total && s.total > 0) {
+          setLlmStep('done');
+        }
+      } catch {
+        // keep polling
+      }
+    }, 1000);
+    return () => clearInterval(interval);
+  }, [llmStep]);
+
+  const handleConfigure = useCallback(async () => {
+    setLlmStep('configuring');
+    setLlmError(null);
+    try {
+      await api.llm.configure(selectedModel);
+      setLlmStep('configured');
+    } catch (err: any) {
+      setLlmError(err.message ?? 'Configuration failed');
+      setLlmStep('error');
+    }
+  }, [selectedModel]);
+
+  const handleRunAnalysis = useCallback(async () => {
+    setLlmError(null);
+    try {
+      const res = await api.llm.analyze();
+      setLlmTotal(res.total);
+      setLlmProgress(0);
+      setLlmStep('running');
+    } catch (err: any) {
+      setLlmError(err.message ?? 'Failed to start analysis');
+      // Stay in configured state so user can retry
+      setLlmStep('configured');
+    }
+  }, []);
+
+  const handleDisconnect = useCallback(async () => {
+    try {
+      await api.llm.disconnect();
+      setLlmStep('idle');
+      setLlmError(null);
+      setLlmProgress(0);
+      setLlmTotal(0);
+    } catch (err: any) {
+      setLlmError(err.message ?? 'Failed to disconnect');
+    }
+  }, []);
+
   const handleContinue = () => {
     navigate('/human-review');
   };
 
+  const progressPct = llmTotal > 0 ? Math.round((llmProgress / llmTotal) * 100) : 0;
+
   return (
     <div className="max-w-6xl mx-auto space-y-6">
       <div>
         <h2 className="text-2xl font-semibold text-slate-900 mb-2">PII Detection Analysis</h2>
         <p className="text-slate-600">
-          Automated PII detection engines will run here once implemented.
+          Configure and run PII detection engines. The LLM Judge uses Azure OpenAI via LangExtract to identify entities.
         </p>
       </div>
 
@@ -48,9 +157,9 @@ export function Anonymization() {
         </Alert>
       )}
 
-      {/* Side-by-Side Cards — greyed out / coming soon */}
+      {/* Side-by-Side Cards */}
       <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
-        {/* Presidio Processing — not implemented */}
+        {/* Presidio Processing — coming soon */}
         <Card className="p-6 opacity-50 pointer-events-none">
           <div className="space-y-6">
             <div className="flex items-center justify-between">
@@ -63,30 +172,159 @@ export function Anonymization() {
               </div>
               <span className="text-xs text-slate-400 bg-slate-100 px-2 py-1 rounded">Coming soon</span>
             </div>
-
             <p className="text-sm text-slate-400">
               Run Presidio's rule-based and NLP detection to identify PII entities with precise character spans and confidence scores.
             </p>
           </div>
         </Card>
 
-        {/* LLM Processing — not implemented */}
-        <Card className="p-6 opacity-50 pointer-events-none">
-          <div className="space-y-6">
+        {/* LLM Judge — active */}
+        <Card className="p-6 border-purple-200">
+          <div className="space-y-4">
             <div className="flex items-center justify-between">
               <div className="flex items-center gap-2">
-                <Sparkles className="size-6 text-slate-400" />
+                <Sparkles className="size-6 text-purple-600" />
                 <div>
-                  <h3 className="font-semibold text-slate-400">LLM Judge</h3>
-                  <p className="text-sm text-slate-400">AI-assisted entity detection</p>
+                  <h3 className="font-semibold text-slate-900">LLM Judge</h3>
+                  <p className="text-sm text-slate-500">Azure OpenAI via LangExtract</p>
                 </div>
               </div>
-              <span className="text-xs text-slate-400 bg-slate-100 px-2 py-1 rounded">Coming soon</span>
+              {llmStep === 'done' && (
+                <span className="flex items-center gap-1 text-xs text-green-700 bg-green-50 px-2 py-1 rounded">
+                  <CheckCircle className="size-3" /> Complete
+                </span>
+              )}
+              {llmStep === 'configured' && (
+                <span className="flex items-center gap-1 text-xs text-blue-700 bg-blue-50 px-2 py-1 rounded">
+                  Ready
+                </span>
+              )}
             </div>
 
-            <p className="text-sm text-slate-400">
-              Use an LLM to suggest additional PII entities and validate detections. Results will be combined with Presidio output for human review.
-            </p>
+            {/* Step: loading */}
+            {llmStep === 'loading' && (
+              <div className="flex items-center gap-2 text-sm text-slate-500">
+                <Loader2 className="size-4 animate-spin" /> Loading configuration…
+              </div>
+            )}
+
+            {/* Step: .env not configured */}
+            {llmStep === 'env_missing' && (
+              <Alert className="border-amber-200 bg-amber-50">
+                <AlertTriangle className="size-4 text-amber-600" />
+                <AlertDescription>
+                  <p className="text-sm text-amber-900">
+                    <strong>Azure OpenAI endpoint is required.</strong>{' '}
+                    Add it to the <code className="bg-amber-100 px-1 rounded text-xs">backend/.env</code> file:
+                  </p>
+                  <pre className="mt-2 text-xs bg-amber-100/60 rounded p-2 text-amber-900 font-mono">
+{`PRESIDIO_EVAL_AZURE_ENDPOINT=https://your-resource.openai.azure.com
+
+# Option A — API key auth:
+PRESIDIO_EVAL_AZURE_API_KEY=your-api-key-here
+
+# Option B — leave API key empty to use
+# DefaultAzureCredential (managed identity / az login)`}
+                  </pre>
+                  <p className="text-xs text-amber-700 mt-2">
+                    Then restart the backend server. See <code className="bg-amber-100 px-1 rounded">.env.example</code> for reference.
+                  </p>
+                </AlertDescription>
+              </Alert>
+            )}
+
+            {/* Step: idle / error — choose deployment and connect */}
+            {(llmStep === 'idle' || llmStep === 'error' || llmStep === 'configuring') && (
+              <div className="space-y-3">
+                <div className="space-y-1.5">
+                  <Label htmlFor="deployment-select" className="text-xs font-medium">Model Deployment</Label>
+                  <Select value={selectedModel} onValueChange={setSelectedModel} disabled={llmStep === 'configuring'}>
+                    <SelectTrigger id="deployment-select">
+                      <SelectValue placeholder="Select a model" />
+                    </SelectTrigger>
+                    <SelectContent>
+                      {models.map((m) => (
+                        <SelectItem key={m.id} value={m.id}>
+                          {m.label} <span className="text-slate-400 ml-1">— {m.provider}</span>
+                        </SelectItem>
+                      ))}
+                    </SelectContent>
+                  </Select>
+                </div>
+
+                {llmError && (
+                  <Alert className="border-red-200 bg-red-50">
+                    <AlertTriangle className="size-4 text-red-600" />
+                    <AlertDescription className="text-sm text-red-800">{llmError}</AlertDescription>
+                  </Alert>
+                )}
+
+                <Button
+                  size="sm"
+                  onClick={handleConfigure}
+                  disabled={llmStep === 'configuring'}
+                  className="w-full"
+                >
+                  {llmStep === 'configuring' ? (
+                    <><Loader2 className="size-4 mr-2 animate-spin" /> Connecting…</>
+                  ) : (
+                    'Connect to Azure OpenAI'
+                  )}
+                </Button>
+              </div>
+            )}
+
+            {/* Step: configured — ready to run */}
+            {llmStep === 'configured' && (
+              <div className="space-y-3">
+                <p className="text-sm text-slate-600">
+                  Connected with <strong>{selectedModel}</strong>.
+                </p>
+                {llmError && (
+                  <Alert className="border-red-200 bg-red-50">
+                    <AlertTriangle className="size-4 text-red-600" />
+                    <AlertDescription className="text-sm text-red-800">{llmError}</AlertDescription>
+                  </Alert>
+                )}
+                <div className="flex gap-2">
+                  <Button size="sm" onClick={handleRunAnalysis} className="flex-1">
+                    <Sparkles className="size-4 mr-2" />
+                    Run LLM Analysis
+                  </Button>
+                  <Button size="sm" variant="outline" onClick={handleDisconnect}>
+                    <Unplug className="size-4 mr-1" />
+                    Disconnect
+                  </Button>
+                </div>
+              </div>
+            )}
+
+            {/* Step: running */}
+            {llmStep === 'running' && (
+              <div className="space-y-3">
+                <div className="flex items-center justify-between text-sm text-slate-600">
+                  <span className="flex items-center gap-2">
+                    <Loader2 className="size-4 animate-spin" />
+                    Analysing records…
+                  </span>
+                  <span>{llmProgress} / {llmTotal}</span>
+                </div>
+                <Progress value={progressPct} className="h-2" />
+              </div>
+            )}
+
+            {/* Step: complete */}
+            {llmStep === 'done' && (
+              <div className="space-y-3">
+                <p className="text-sm text-green-700">
+                  LLM analysis complete — {llmTotal} records processed with <strong>{selectedModel}</strong>.
+                </p>
+                <Button size="sm" variant="outline" onClick={handleDisconnect}>
+                  <Unplug className="size-4 mr-1" />
+                  Disconnect & Change Model
+                </Button>
+              </div>
+            )}
           </div>
         </Card>
       </div>
diff --git a/evaluation/ai-assistant/src/app/pages/HumanReview.tsx b/evaluation/ai-assistant/src/app/pages/HumanReview.tsx
index 9e9da31fc..f7c1329db 100644
--- a/evaluation/ai-assistant/src/app/pages/HumanReview.tsx
+++ b/evaluation/ai-assistant/src/app/pages/HumanReview.tsx
@@ -1,15 +1,30 @@
-import { useState, useMemo } from 'react';
+import { useEffect, useState, useMemo } from 'react';
 import { useNavigate } from 'react-router';
 import { Button } from '../components/ui/button';
 import { Progress } from '../components/ui/progress';
 import { Alert, AlertDescription } from '../components/ui/alert';
-import { ArrowRight, Users, CheckCircle, ChevronLeft, ChevronRight, CheckCheck } from 'lucide-react';
+import { ArrowRight, Users, CheckCircle, ChevronLeft, ChevronRight, CheckCheck, Loader2 } from 'lucide-react';
 import { EntityComparison } from '../components/EntityComparison';
-import { mockRecords } from '../lib/mockData';
-import type { Entity, SetupConfig } from '../types';
+import { api } from '../lib/api';
+import type { Entity, Record as RecordType, SetupConfig } from '../types';
+
+/** Map backend snake_case record to frontend camelCase Record. */
+function toFrontendRecord(raw: any): RecordType {
+  return {
+    id: raw.id,
+    text: raw.text,
+    presidioEntities: raw.presidio_entities ?? raw.presidioEntities ?? [],
+    llmEntities: raw.llm_entities ?? raw.llmEntities ?? [],
+    datasetEntities: raw.dataset_entities ?? raw.datasetEntities ?? [],
+    goldenEntities: raw.golden_entities ?? raw.goldenEntities ?? undefined,
+  };
+}
 
 export function HumanReview() {
   const navigate = useNavigate();
+  const [records, setRecords] = useState<RecordType[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [loadError, setLoadError] = useState<string | null>(null);
   const [currentRecordIndex, setCurrentRecordIndex] = useState(0);
   const [reviewedRecords, setReviewedRecords] = useState<Set<string>>(new Set());
   const [goldenSet, setGoldenSet] = useState<Record<string, Entity[]>>({});
@@ -25,9 +40,39 @@ export function HumanReview() {
 
   const hasDatasetEntities = setupConfig?.hasDatasetEntities ?? false;
 
-  const currentRecord = mockRecords[currentRecordIndex];
-  const totalRecords = mockRecords.length;
-  const reviewProgress = (reviewedRecords.size / totalRecords) * 100;
+  // Fetch sampled records + LLM results on mount
+  useEffect(() => {
+    async function loadRecords() {
+      try {
+        setLoading(true);
+        const [rawRecords, llmResults] = await Promise.all([
+          api.sampling.records(),
+          api.llm.results(),
+        ]);
+
+        const merged = rawRecords.map((raw: any) => {
+          const rec = toFrontendRecord(raw);
+          // Merge in LLM entities from analysis results
+          const llmEntities = llmResults[rec.id];
+          if (llmEntities) {
+            rec.llmEntities = llmEntities;
+          }
+          return rec;
+        });
+
+        setRecords(merged);
+      } catch (err: any) {
+        setLoadError(err.message || 'Failed to load records');
+      } finally {
+        setLoading(false);
+      }
+    }
+    loadRecords();
+  }, []);
+
+  const currentRecord = records[currentRecordIndex] ?? null;
+  const totalRecords = records.length;
+  const reviewProgress = totalRecords > 0 ? (reviewedRecords.size / totalRecords) * 100 : 0;
 
   const handleConfirm = (recordId: string, entity: Entity, _source: string) => {
     setGoldenSet(prev => ({
@@ -41,7 +86,7 @@ export function HumanReview() {
     setGoldenSet(prev => ({
       ...prev,
       [recordId]: (prev[recordId] || []).filter(e => 
-        e.text !== entity.text || e.start !== entity.start || e.end !== entity.end
+        !(e.start < entity.end && entity.start < e.end)
       ),
     }));
     setReviewedRecords(new Set([...reviewedRecords, recordId]));
@@ -72,11 +117,10 @@ export function HumanReview() {
   };
 
   const handleAutoConfirmAll = () => {
-    // Auto-confirm all entities from all sources for all records
     const allReviewed = new Set<string>();
     const autoGolden: Record<string, Entity[]> = {};
 
-    mockRecords.forEach(record => {
+    records.forEach(record => {
       allReviewed.add(record.id);
       const entities: Entity[] = [];
       const seen = new Set<string>();
@@ -91,9 +135,7 @@ export function HumanReview() {
 
       record.presidioEntities.forEach(addUnique);
       record.llmEntities.forEach(addUnique);
-      if ('datasetEntities' in record) {
-        (record as any).datasetEntities?.forEach(addUnique);
-      }
+      record.datasetEntities?.forEach(addUnique);
 
       autoGolden[record.id] = entities;
     });
@@ -102,8 +144,39 @@ export function HumanReview() {
     setGoldenSet(autoGolden);
   };
 
-  const isReviewed = reviewedRecords.has(currentRecord.id);
-  const canContinue = reviewedRecords.size === totalRecords;
+  const isReviewed = currentRecord ? reviewedRecords.has(currentRecord.id) : false;
+  const canContinue = totalRecords > 0 && reviewedRecords.size === totalRecords;
+
+  if (loading) {
+    return (
+      <div className="max-w-5xl mx-auto flex items-center justify-center py-20">
+        <Loader2 className="size-6 animate-spin text-slate-400 mr-3" />
+        <span className="text-slate-600">Loading records…</span>
+      </div>
+    );
+  }
+
+  if (loadError) {
+    return (
+      <div className="max-w-5xl mx-auto py-10">
+        <Alert className="border-red-200 bg-red-50">
+          <AlertDescription className="text-red-800">{loadError}</AlertDescription>
+        </Alert>
+      </div>
+    );
+  }
+
+  if (!currentRecord) {
+    return (
+      <div className="max-w-5xl mx-auto py-10">
+        <Alert>
+          <AlertDescription>
+            No sampled records found. Go back to <strong>Sampling</strong> to select a sample first.
+          </AlertDescription>
+        </Alert>
+      </div>
+    );
+  }
 
   return (
     <div className="max-w-5xl mx-auto space-y-6">
@@ -183,7 +256,7 @@ export function HumanReview() {
         recordText={currentRecord.text}
         presidioEntities={currentRecord.presidioEntities}
         llmEntities={currentRecord.llmEntities}
-        datasetEntities={'datasetEntities' in currentRecord ? (currentRecord as any).datasetEntities : []}
+        datasetEntities={currentRecord.datasetEntities ?? []}
         onConfirm={handleConfirm}
         onReject={handleReject}
         onAddManual={handleAddManual}
@@ -195,19 +268,19 @@ export function HumanReview() {
         <div className="grid grid-cols-2 md:grid-cols-4 gap-3 text-sm">
           <div className="flex items-center gap-2">
             <div className="size-3 rounded-full bg-blue-500" />
-            <span>✓ Match - Both systems agree</span>
+            <span>✓ Match (all sources agree)</span>
           </div>
           <div className="flex items-center gap-2">
-            <div className="size-3 rounded-full bg-amber-500" />
-            <span>⚠ Conflict - Type mismatch</span>
+            <div className="size-3 rounded-full bg-purple-500" />
+            <span>Presidio</span>
           </div>
           <div className="flex items-center gap-2">
-            <div className="size-3 rounded-full bg-purple-500" />
-            <span>Presidio only detection</span>
+            <div className="size-3 rounded-full bg-emerald-500" />
+            <span>Predefined</span>
           </div>
           <div className="flex items-center gap-2">
             <div className="size-3 rounded-full bg-cyan-500" />
-            <span>LLM only detection</span>
+            <span>LLM Judge</span>
           </div>
         </div>
       </div>
diff --git a/evaluation/ai-assistant/src/app/pages/Setup.tsx b/evaluation/ai-assistant/src/app/pages/Setup.tsx
index c752cc387..643542d51 100644
--- a/evaluation/ai-assistant/src/app/pages/Setup.tsx
+++ b/evaluation/ai-assistant/src/app/pages/Setup.tsx
@@ -1,4 +1,4 @@
-import { useState } from 'react';
+import { useEffect, useState } from 'react';
 import { useNavigate } from 'react-router';
 import { Card } from '../components/ui/card';
 import { Button } from '../components/ui/button';
@@ -7,27 +7,19 @@ import { Input } from '../components/ui/input';
 import { Checkbox } from '../components/ui/checkbox';
 import { Alert, AlertDescription } from '../components/ui/alert';
 import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '../components/ui/select';
-import { Database, Shield, ArrowRight, Cloud, FileText, CheckCircle, Loader2, X, Plus } from 'lucide-react';
+import { Database, Shield, ArrowRight, Cloud, FileText, CheckCircle, Loader2, X, Plus, Pencil, Trash2 } from 'lucide-react';
 import { api } from '../lib/api';
 import type { ComplianceFramework, UploadedDataset } from '../types';
 
 export function Setup() {
   const navigate = useNavigate();
-  const [datasets, setDatasets] = useState<UploadedDataset[]>([
-    // Seed with the example dataset
-    {
-      id: 'ds-001',
-      filename: 'Example - Patient Records',
-      format: 'csv',
-      record_count: 1500,
-      has_entities: false,
-      columns: ['text'],
-    },
-  ]);
+  const [datasets, setDatasets] = useState<UploadedDataset[]>([]);
   const [selectedDatasetId, setSelectedDatasetId] = useState('');
   const [showAddForm, setShowAddForm] = useState(false);
   const [filePath, setFilePath] = useState('');
   const [fileFormat, setFileFormat] = useState<'csv' | 'json'>('csv');
+  const [datasetName, setDatasetName] = useState('');
+  const [datasetDescription, setDatasetDescription] = useState('');
   const [textColumn, setTextColumn] = useState('text');
   const [entitiesColumn, setEntitiesColumn] = useState('');
   const [loading, setLoading] = useState(false);
@@ -37,6 +29,15 @@ export function Setup() {
   const [cloudRestriction, setCloudRestriction] = useState<'allowed' | 'restricted'>('allowed');
   const [runPresidio, setRunPresidio] = useState(true);
   const [runLlm, setRunLlm] = useState(true);
+  const [editingName, setEditingName] = useState(false);
+  const [editNameValue, setEditNameValue] = useState('');
+
+  // Fetch saved datasets on mount
+  useEffect(() => {
+    api.datasets.saved().then(saved => {
+      if (saved.length > 0) setDatasets(saved);
+    }).catch(() => {});
+  }, []);
 
   const selectedDataset = datasets.find(d => d.id === selectedDatasetId) ?? null;
   const canProceed = selectedDataset !== null;
@@ -49,14 +50,12 @@ export function Setup() {
     setSelectedDatasetId(value);
     setShowAddForm(false);
     setPreviewRecords([]);
+    setEditingName(false);
 
-    // Fetch preview for loaded datasets (skip for the seed example)
-    if (value.startsWith('upload-')) {
-      try {
-        const preview = await api.datasets.preview(value);
-        setPreviewRecords(preview);
-      } catch { /* ignore */ }
-    }
+    try {
+      const preview = await api.datasets.preview(value);
+      setPreviewRecords(preview);
+    } catch { /* ignore */ }
   };
 
   const handleLoadDataset = async () => {
@@ -73,6 +72,8 @@ export function Setup() {
         format: fileFormat,
         text_column: textColumn.trim() || 'text',
         entities_column: entitiesColumn.trim() || undefined,
+        name: datasetName.trim() || undefined,
+        description: datasetDescription.trim() || undefined,
       });
 
       setDatasets(prev => [...prev, dataset]);
@@ -84,6 +85,8 @@ export function Setup() {
 
       // Reset form fields
       setFilePath('');
+      setDatasetName('');
+      setDatasetDescription('');
       setTextColumn('text');
       setEntitiesColumn('');
     } catch (err: any) {
@@ -136,7 +139,7 @@ export function Setup() {
               <SelectContent>
                 {datasets.map(ds => (
                   <SelectItem key={ds.id} value={ds.id}>
-                    {ds.filename} — {ds.record_count.toLocaleString()} records
+                    {ds.name} — {ds.record_count.toLocaleString()} records
                   </SelectItem>
                 ))}
                 <SelectItem value="__add_new__">
@@ -160,6 +163,28 @@ export function Setup() {
               </div>
 
               <div className="space-y-3">
+                <div>
+                  <Label htmlFor="dataset-name">Dataset Name <span className="text-slate-400">(optional)</span></Label>
+                  <Input
+                    id="dataset-name"
+                    placeholder="e.g. Patient Records Q4"
+                    value={datasetName}
+                    onChange={(e) => setDatasetName(e.target.value)}
+                    className="mt-1 text-sm"
+                  />
+                </div>
+
+                <div>
+                  <Label htmlFor="dataset-description">Description <span className="text-slate-400">(optional)</span></Label>
+                  <Input
+                    id="dataset-description"
+                    placeholder="Brief description of the dataset contents"
+                    value={datasetDescription}
+                    onChange={(e) => setDatasetDescription(e.target.value)}
+                    className="mt-1 text-sm"
+                  />
+                </div>
+
                 <div>
                   <Label htmlFor="file-path">Absolute File Path</Label>
                   <Input
@@ -233,10 +258,69 @@ export function Setup() {
               <div className="p-4 bg-green-50 border border-green-200 rounded-lg">
                 <div className="flex items-start gap-3">
                   <CheckCircle className="size-5 text-green-600 mt-0.5" />
-                  <div>
-                    <div className="font-medium text-green-900">{selectedDataset.filename}</div>
+                  <div className="flex-1">
+                    <div className="flex items-center gap-2">
+                      {editingName ? (
+                        <form
+                          className="flex items-center gap-2"
+                          onSubmit={async (e) => {
+                            e.preventDefault();
+                            if (!editNameValue.trim()) return;
+                            try {
+                              const updated = await api.datasets.rename(selectedDataset.id, editNameValue.trim());
+                              setDatasets(prev => prev.map(d => d.id === updated.id ? updated : d));
+                              setEditingName(false);
+                            } catch { /* ignore */ }
+                          }}
+                        >
+                          <Input
+                            value={editNameValue}
+                            onChange={(e) => setEditNameValue(e.target.value)}
+                            className="h-7 text-sm w-56"
+                            autoFocus
+                          />
+                          <Button type="submit" size="sm" variant="ghost" className="h-7 px-2">
+                            <CheckCircle className="size-3.5" />
+                          </Button>
+                          <Button type="button" size="sm" variant="ghost" className="h-7 px-2" onClick={() => setEditingName(false)}>
+                            <X className="size-3.5" />
+                          </Button>
+                        </form>
+                      ) : (
+                        <>
+                          <span className="font-medium text-green-900">{selectedDataset.name}</span>
+                          <Button
+                            size="sm"
+                            variant="ghost"
+                            className="h-6 px-1.5 text-green-700 hover:text-green-900"
+                            onClick={() => { setEditingName(true); setEditNameValue(selectedDataset.name); }}
+                          >
+                            <Pencil className="size-3" />
+                          </Button>
+                          <Button
+                            size="sm"
+                            variant="ghost"
+                            className="h-6 px-1.5 text-red-500 hover:text-red-700"
+                            onClick={async () => {
+                              try {
+                                await api.datasets.remove(selectedDataset.id);
+                                setDatasets(prev => prev.filter(d => d.id !== selectedDataset.id));
+                                setSelectedDatasetId('');
+                                setPreviewRecords([]);
+                              } catch { /* ignore */ }
+                            }}
+                          >
+                            <Trash2 className="size-3" />
+                          </Button>
+                        </>
+                      )}
+                    </div>
                     <div className="text-sm text-green-800 mt-1 space-y-0.5">
+                      {selectedDataset.description && (
+                        <div className="text-green-700 italic">{selectedDataset.description}</div>
+                      )}
                       <div>{selectedDataset.record_count.toLocaleString()} records • {selectedDataset.format.toUpperCase()} format</div>
+                      <div className="text-xs text-green-700 font-mono truncate">{selectedDataset.path}</div>
                       <div>Columns: {selectedDataset.columns.join(', ')}</div>
                       <div>
                         {selectedDataset.has_entities ? (
diff --git a/evaluation/ai-assistant/src/app/types.ts b/evaluation/ai-assistant/src/app/types.ts
index dc8fd01cd..38c0baecb 100644
--- a/evaluation/ai-assistant/src/app/types.ts
+++ b/evaluation/ai-assistant/src/app/types.ts
@@ -31,10 +31,15 @@ export interface Record {
 export interface UploadedDataset {
   id: string;
   filename: string;
+  name: string;
+  description: string;
+  path: string;
   format: 'csv' | 'json';
   record_count: number;
   has_entities: boolean;
   columns: string[];
+  text_column: string;
+  entities_column?: string | null;
 }
 
 export interface SetupConfig {