diff --git a/evaluation/ai-assistant/backend/main.py b/evaluation/ai-assistant/backend/main.py
index ca741a3ca..0bf84bb7d 100644
--- a/evaluation/ai-assistant/backend/main.py
+++ b/evaluation/ai-assistant/backend/main.py
@@ -1,7 +1,7 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 
-from routers import analysis, datasets, decision, evaluation, review, sampling
+from routers import analysis, datasets, decision, evaluation, review, sampling, upload
 
 app = FastAPI(title="Presidio Evaluation Flow API", version="0.1.0")
 
@@ -13,6 +13,7 @@
 )
 
 app.include_router(datasets.router)
+app.include_router(upload.router)
 app.include_router(sampling.router)
 app.include_router(analysis.router)
 app.include_router(review.router)
diff --git a/evaluation/ai-assistant/backend/mock_data.py b/evaluation/ai-assistant/backend/mock_data.py
index 06fb784c1..ebcfc88c3 100644
--- a/evaluation/ai-assistant/backend/mock_data.py
+++ b/evaluation/ai-assistant/backend/mock_data.py
@@ -53,32 +53,32 @@
             "Contact: john.smith@email.com, Phone: 555-0123. SSN: 123-45-6789."
         ),
         presidio_entities=[
-            Entity(text="John Smith", type="PERSON", start=8, end=18, score=0.95),
-            Entity(text="03/15/1985", type="DATE_OF_BIRTH", start=24, end=34, score=0.92),
-            Entity(text="john.smith@email.com", type="EMAIL", start=77, end=97, score=0.98),
-            Entity(text="555-0123", type="PHONE_NUMBER", start=106, end=114, score=0.89),
-            Entity(text="123-45-6789", type="US_SSN", start=121, end=132, score=0.99),
+            Entity(text="John Smith", entity_type="PERSON", start=8, end=18, score=0.95),
+            Entity(text="03/15/1985", entity_type="DATE_OF_BIRTH", start=24, end=34, score=0.92),
+            Entity(text="john.smith@email.com", entity_type="EMAIL", start=77, end=97, score=0.98),
+            Entity(text="555-0123", entity_type="PHONE_NUMBER", start=106, end=114, score=0.89),
+            Entity(text="123-45-6789", entity_type="US_SSN", start=121, end=132, score=0.99),
         ],
         llm_entities=[
-            Entity(text="John Smith", type="PERSON", start=8, end=18, score=0.96),
-            Entity(text="03/15/1985", type="DATE_OF_BIRTH", start=24, end=34, score=0.94),
-            Entity(text="2025-01-10", type="DATE", start=52, end=62, score=0.88),
-            Entity(text="john.smith@email.com", type="EMAIL", start=77, end=97, score=0.97),
-            Entity(text="555-0123", type="PHONE_NUMBER", start=106, end=114, score=0.91),
-            Entity(text="123-45-6789", type="US_SSN", start=121, end=132, score=0.98),
+            Entity(text="John Smith", entity_type="PERSON", start=8, end=18, score=0.96),
+            Entity(text="03/15/1985", entity_type="DATE_OF_BIRTH", start=24, end=34, score=0.94),
+            Entity(text="2025-01-10", entity_type="DATE", start=52, end=62, score=0.88),
+            Entity(text="john.smith@email.com", entity_type="EMAIL", start=77, end=97, score=0.97),
+            Entity(text="555-0123", entity_type="PHONE_NUMBER", start=106, end=114, score=0.91),
+            Entity(text="123-45-6789", entity_type="US_SSN", start=121, end=132, score=0.98),
         ],
     ),
     Record(
         id="rec-002",
         text="Dr. Sarah Johnson reviewed the case. Medical Record #MR-445521. Insurance Policy: POL-8821-USA.",
         presidio_entities=[
-            Entity(text="Sarah Johnson", type="PERSON", start=4, end=17, score=0.93),
-            Entity(text="MR-445521", type="MEDICAL_RECORD", start=55, end=64, score=0.87),
+            Entity(text="Sarah Johnson", entity_type="PERSON", start=4, end=17, score=0.93),
+            Entity(text="MR-445521", entity_type="MEDICAL_RECORD", start=55, end=64, score=0.87),
         ],
         llm_entities=[
-            Entity(text="Dr. Sarah Johnson", type="PERSON", start=0, end=17, score=0.95),
-            Entity(text="MR-445521", type="MEDICAL_RECORD", start=55, end=64, score=0.89),
-            Entity(text="POL-8821-USA", type="INSURANCE_POLICY", start=84, end=96, score=0.82),
+            Entity(text="Dr. Sarah Johnson", entity_type="PERSON", start=0, end=17, score=0.95),
+            Entity(text="MR-445521", entity_type="MEDICAL_RECORD", start=55, end=64, score=0.89),
+            Entity(text="POL-8821-USA", entity_type="INSURANCE_POLICY", start=84, end=96, score=0.82),
         ],
     ),
     Record(
@@ -88,19 +88,19 @@
             "Salary: $85,000. Emergency contact: Mike Doe at 555-9876."
         ),
         presidio_entities=[
-            Entity(text="EMP-8821", type="EMPLOYEE_ID", start=13, end=21, score=0.91),
-            Entity(text="Jane Doe", type="PERSON", start=23, end=31, score=0.94),
-            Entity(text="2023-06-01", type="DATE", start=41, end=51, score=0.96),
-            Entity(text="Mike Doe", type="PERSON", start=89, end=97, score=0.92),
-            Entity(text="555-9876", type="PHONE_NUMBER", start=101, end=109, score=0.88),
+            Entity(text="EMP-8821", entity_type="EMPLOYEE_ID", start=13, end=21, score=0.91),
+            Entity(text="Jane Doe", entity_type="PERSON", start=23, end=31, score=0.94),
+            Entity(text="2023-06-01", entity_type="DATE", start=41, end=51, score=0.96),
+            Entity(text="Mike Doe", entity_type="PERSON", start=89, end=97, score=0.92),
+            Entity(text="555-9876", entity_type="PHONE_NUMBER", start=101, end=109, score=0.88),
         ],
         llm_entities=[
-            Entity(text="EMP-8821", type="EMPLOYEE_ID", start=13, end=21, score=0.90),
-            Entity(text="Jane Doe", type="PERSON", start=23, end=31, score=0.96),
-            Entity(text="2023-06-01", type="DATE", start=41, end=51, score=0.94),
-            Entity(text="$85,000", type="SALARY", start=61, end=68, score=0.79),
-            Entity(text="Mike Doe", type="PERSON", start=89, end=97, score=0.93),
-            Entity(text="555-9876", type="PHONE_NUMBER", start=101, end=109, score=0.90),
+            Entity(text="EMP-8821", entity_type="EMPLOYEE_ID", start=13, end=21, score=0.90),
+            Entity(text="Jane Doe", entity_type="PERSON", start=23, end=31, score=0.96),
+            Entity(text="2023-06-01", entity_type="DATE", start=41, end=51, score=0.94),
+            Entity(text="$85,000", entity_type="SALARY", start=61, end=68, score=0.79),
+            Entity(text="Mike Doe", entity_type="PERSON", start=89, end=97, score=0.93),
+            Entity(text="555-9876", entity_type="PHONE_NUMBER", start=101, end=109, score=0.90),
         ],
     ),
     Record(
@@ -110,13 +110,13 @@
             "Customer: alice.wong@company.com. IP: 192.168.1.100"
         ),
         presidio_entities=[
-            Entity(text="4532", type="CREDIT_CARD", start=22, end=26, score=0.65),
-            Entity(text="alice.wong@company.com", type="EMAIL", start=64, end=86, score=0.97),
-            Entity(text="192.168.1.100", type="IP_ADDRESS", start=92, end=105, score=0.99),
+            Entity(text="4532", entity_type="CREDIT_CARD", start=22, end=26, score=0.65),
+            Entity(text="alice.wong@company.com", entity_type="EMAIL", start=64, end=86, score=0.97),
+            Entity(text="192.168.1.100", entity_type="IP_ADDRESS", start=92, end=105, score=0.99),
         ],
         llm_entities=[
-            Entity(text="alice.wong@company.com", type="EMAIL", start=64, end=86, score=0.98),
-            Entity(text="192.168.1.100", type="IP_ADDRESS", start=92, end=105, score=0.97),
+            Entity(text="alice.wong@company.com", entity_type="EMAIL", start=64, end=86, score=0.98),
+            Entity(text="192.168.1.100", entity_type="IP_ADDRESS", start=92, end=105, score=0.97),
         ],
     ),
     Record(
@@ -126,13 +126,13 @@
             "Doctor notes indicate history of diabetes."
         ),
         presidio_entities=[
-            Entity(text="Robert Chen", type="PERSON", start=17, end=28, score=0.94),
-            Entity(text="ABC-123", type="MEDICATION_CODE", start=41, end=48, score=0.71),
+            Entity(text="Robert Chen", entity_type="PERSON", start=17, end=28, score=0.94),
+            Entity(text="ABC-123", entity_type="MEDICATION_CODE", start=41, end=48, score=0.71),
         ],
         llm_entities=[
-            Entity(text="Robert Chen", type="PERSON", start=17, end=28, score=0.95),
-            Entity(text="ABC-123", type="MEDICATION_CODE", start=41, end=48, score=0.73),
-            Entity(text="diabetes", type="MEDICAL_CONDITION", start=97, end=105, score=0.86),
+            Entity(text="Robert Chen", entity_type="PERSON", start=17, end=28, score=0.95),
+            Entity(text="ABC-123", entity_type="MEDICATION_CODE", start=41, end=48, score=0.73),
+            Entity(text="diabetes", entity_type="MEDICAL_CONDITION", start=97, end=105, score=0.86),
         ],
     ),
 ]
@@ -189,7 +189,7 @@
     EntityMiss(
         record_id="rec-004",
         record_text="Credit card ending in 4532 was used for transaction. Customer: alice.wong@company.com.",
-        missed_entity=Entity(text="4532", type="CREDIT_CARD", start=22, end=26, score=0.65),
+        missed_entity=Entity(text="4532", entity_type="CREDIT_CARD", start=22, end=26, score=0.65),
         miss_type=MissType.false_negative,
         entity_type="CREDIT_CARD",
         risk_level=RiskLevel.high,
@@ -197,7 +197,7 @@
     EntityMiss(
         record_id="rec-002",
         record_text="Dr. Sarah Johnson reviewed the case. Insurance Policy: POL-8821-USA.",
-        missed_entity=Entity(text="POL-8821-USA", type="INSURANCE_POLICY", start=56, end=68),
+        missed_entity=Entity(text="POL-8821-USA", entity_type="INSURANCE_POLICY", start=56, end=68),
         miss_type=MissType.false_negative,
         entity_type="INSURANCE_POLICY",
         risk_level=RiskLevel.medium,
@@ -208,7 +208,7 @@
             "Prescription for Robert Chen: Medication ABC-123, dosage 50mg. "
             "Doctor notes indicate history of diabetes."
         ),
-        missed_entity=Entity(text="diabetes", type="MEDICAL_CONDITION", start=97, end=105),
+        missed_entity=Entity(text="diabetes", entity_type="MEDICAL_CONDITION", start=97, end=105),
         miss_type=MissType.false_negative,
         entity_type="MEDICAL_CONDITION",
         risk_level=RiskLevel.high,
@@ -216,7 +216,7 @@
     EntityMiss(
         record_id="rec-003",
         record_text="Employee ID: EMP-8821, Jane Doe, started 2023-06-01. Salary: $85,000.",
-        missed_entity=Entity(text="$85,000", type="SALARY", start=61, end=68),
+        missed_entity=Entity(text="$85,000", entity_type="SALARY", start=61, end=68),
         miss_type=MissType.false_negative,
         entity_type="SALARY",
         risk_level=RiskLevel.medium,
diff --git a/evaluation/ai-assistant/backend/models.py b/evaluation/ai-assistant/backend/models.py
index 6c4ae6150..03c91219d 100644
--- a/evaluation/ai-assistant/backend/models.py
+++ b/evaluation/ai-assistant/backend/models.py
@@ -29,7 +29,7 @@ class Dataset(BaseModel):
 
 class Entity(BaseModel):
     text: str
-    type: str
+    entity_type: str
     start: int
     end: int
     score: Optional[float] = None
@@ -38,8 +38,9 @@ class Entity(BaseModel):
 class Record(BaseModel):
     id: str
     text: str
-    presidio_entities: list[Entity]
-    llm_entities: list[Entity]
+    presidio_entities: list[Entity] = []
+    llm_entities: list[Entity] = []
+    dataset_entities: list[Entity] = []
     golden_entities: Optional[list[Entity]] = None
 
 
@@ -84,10 +85,28 @@ class EntityMiss(BaseModel):
 # --- Request / Response models ---
 
 
+class DatasetLoadRequest(BaseModel):
+    path: str
+    format: str  # "csv" | "json"
+    text_column: str = "text"
+    entities_column: str | None = None
+
+
+class UploadedDataset(BaseModel):
+    id: str
+    filename: str
+    format: str  # "csv" | "json"
+    record_count: int
+    has_entities: bool
+    columns: list[str]
+
+
 class SetupConfig(BaseModel):
     dataset_id: str
     compliance_frameworks: list[ComplianceFramework]
     cloud_restriction: str  # "allowed" | "restricted"
+    run_presidio: bool = True
+    run_llm: bool = True
 
 
 class SamplingConfig(BaseModel):
diff --git a/evaluation/ai-assistant/backend/pyproject.toml b/evaluation/ai-assistant/backend/pyproject.toml
index 0e3d3e365..fb1e2001b 100644
--- a/evaluation/ai-assistant/backend/pyproject.toml
+++ b/evaluation/ai-assistant/backend/pyproject.toml
@@ -9,6 +9,7 @@ python = "^3.9"
 fastapi = ">=0.115.0"
 uvicorn = { version = ">=0.32.0", extras = ["standard"] }
 pydantic = ">=2.0.0"
+python-multipart = ">=0.0.9"
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/evaluation/ai-assistant/backend/routers/upload.py b/evaluation/ai-assistant/backend/routers/upload.py
new file mode 100644
index 000000000..d3ff5f1c6
--- /dev/null
+++ b/evaluation/ai-assistant/backend/routers/upload.py
@@ -0,0 +1,199 @@
+"""Dataset load router — reads CSV and JSON files from a local path."""
+
+from __future__ import annotations
+
+import csv
+import io
+import json
+import os
+import uuid
+
+from fastapi import APIRouter, HTTPException
+
+from models import DatasetLoadRequest, Entity, Record, UploadedDataset
+
+router = APIRouter(prefix="/api/datasets", tags=["datasets"])
+
+# In-memory store for loaded datasets
+_uploaded: dict[str, UploadedDataset] = {}
+_records: dict[str, list[Record]] = {}
+
+MAX_FILE_SIZE = 50 * 1024 * 1024  # 50 MB
+
+
+def _parse_entities(raw: str | list | None) -> list[Entity]:
+    """Parse entities from a JSON string or list."""
+    if not raw:
+        return []
+    if isinstance(raw, str):
+        try:
+            raw = json.loads(raw)
+        except json.JSONDecodeError:
+            return []
+    if not isinstance(raw, list):
+        return []
+
+    entities: list[Entity] = []
+    for item in raw:
+        if isinstance(item, dict):
+            try:
+                entities.append(Entity(**item))
+            except Exception:
+                continue
+    return entities
+
+
+def _parse_csv(
+    content: str,
+    text_column: str,
+    entities_column: str | None,
+) -> tuple[list[Record], list[str], bool]:
+    """Parse CSV content into records."""
+    reader = csv.DictReader(io.StringIO(content))
+    fieldnames = list(reader.fieldnames or [])
+    if text_column not in fieldnames:
+        raise HTTPException(
+            status_code=400,
+            detail=f"CSV must have a '{text_column}' column. Found: {fieldnames}",
+        )
+
+    has_entities = entities_column is not None and entities_column in fieldnames
+    records: list[Record] = []
+    for i, row in enumerate(reader):
+        text = row.get(text_column, "").strip()
+        if not text:
+            continue
+        entities = _parse_entities(row.get(entities_column)) if has_entities and entities_column else []
+        records.append(
+            Record(
+                id=f"rec-{i + 1:04d}",
+                text=text,
+                dataset_entities=entities,
+            )
+        )
+    return records, fieldnames, has_entities
+
+
+def _parse_json(
+    content: str,
+    text_column: str,
+    entities_column: str | None,
+) -> tuple[list[Record], list[str], bool]:
+    """Parse JSON content (array of objects or JSONL) into records."""
+    # Try parsing as a JSON array first
+    try:
+        data = json.loads(content)
+    except json.JSONDecodeError:
+        # Fall back to JSONL
+        data = []
+        for line in content.splitlines():
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                data.append(json.loads(line))
+            except json.JSONDecodeError:
+                continue
+
+    if not isinstance(data, list) or not data:
+        raise HTTPException(
+            status_code=400,
+            detail="JSON file must contain an array of objects or be in JSONL format.",
+        )
+
+    records: list[Record] = []
+    columns: set[str] = set()
+    has_entities = False
+
+    for i, obj in enumerate(data):
+        if not isinstance(obj, dict) or text_column not in obj:
+            continue
+
+        columns.update(obj.keys())
+        text = str(obj[text_column]).strip()
+        if not text:
+            continue
+
+        ent_raw = obj.get(entities_column) if entities_column else None
+        entities = _parse_entities(ent_raw)
+        if entities:
+            has_entities = True
+
+        records.append(
+            Record(
+                id=f"rec-{i + 1:04d}",
+                text=text,
+                dataset_entities=entities,
+            )
+        )
+
+    if not records:
+        raise HTTPException(
+            status_code=400,
+            detail=f"No valid records found. Each object must have a '{text_column}' field.",
+        )
+    return records, sorted(columns), has_entities
+
+
+@router.post("/load")
+async def load_dataset(req: DatasetLoadRequest):
+    """Load a CSV or JSON file from a local absolute path."""
+    if req.format not in ("csv", "json"):
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unsupported format '{req.format}'. Only 'csv' and 'json' are supported.",
+        )
+
+    file_path = os.path.expanduser(req.path)
+    if not os.path.isabs(file_path):
+        raise HTTPException(status_code=400, detail="Path must be absolute.")
+    if not os.path.isfile(file_path):
+        raise HTTPException(status_code=400, detail=f"File not found: {file_path}")
+
+    file_size = os.path.getsize(file_path)
+    if file_size > MAX_FILE_SIZE:
+        raise HTTPException(status_code=400, detail="File too large (max 50 MB)")
+
+    with open(file_path, encoding="utf-8") as f:
+        content = f.read()
+
+    if req.format == "csv":
+        records, columns, has_entities = _parse_csv(content, req.text_column, req.entities_column)
+    else:
+        records, columns, has_entities = _parse_json(content, req.text_column, req.entities_column)
+
+    if not records:
+        raise HTTPException(status_code=400, detail="No valid records found in file")
+
+    dataset_id = f"upload-{uuid.uuid4().hex[:8]}"
+    filename = os.path.basename(file_path)
+    dataset = UploadedDataset(
+        id=dataset_id,
+        filename=filename,
+        format=req.format,
+        record_count=len(records),
+        has_entities=has_entities,
+        columns=columns,
+    )
+
+    _uploaded[dataset_id] = dataset
+    _records[dataset_id] = records
+
+    return dataset
+
+
+@router.get("/{dataset_id}/records")
+async def get_dataset_records(dataset_id: str):
+    """Return parsed records for a loaded dataset."""
+    if dataset_id not in _records:
+        raise HTTPException(status_code=404, detail="Dataset not found")
+    return _records[dataset_id]
+
+
+@router.get("/{dataset_id}/preview")
+async def preview_dataset(dataset_id: str, limit: int = 5):
+    """Return a small preview of the loaded dataset."""
+    if dataset_id not in _records:
+        raise HTTPException(status_code=404, detail="Dataset not found")
+    records = _records[dataset_id][:limit]
+    return records
diff --git a/evaluation/ai-assistant/data/sample_medical_records.csv b/evaluation/ai-assistant/data/sample_medical_records.csv
new file mode 100644
index 000000000..d30fe26f9
--- /dev/null
+++ b/evaluation/ai-assistant/data/sample_medical_records.csv
@@ -0,0 +1,11 @@
+text,entities
+Patient John Smith was admitted on 03/15/2025 with diagnosis code A09.,"[{""text"": ""John Smith"", ""entity_type"": ""PERSON"", ""start"": 8, ""end"": 18, ""score"": 1.0}, {""text"": ""03/15/2025"", ""entity_type"": ""DATE_TIME"", ""start"": 35, ""end"": 45, ""score"": 1.0}]"
+Contact Dr. Emily Chen at emily.chen@hospital.org or call 555-987-6543.,"[{""text"": ""Emily Chen"", ""entity_type"": ""PERSON"", ""start"": 12, ""end"": 22, ""score"": 1.0}, {""text"": ""emily.chen@hospital.org"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 26, ""end"": 49, ""score"": 1.0}, {""text"": ""555-987-6543"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 58, ""end"": 70, ""score"": 1.0}]"
+"SSN 123-45-6789 belongs to Maria Garcia, born on January 12, 1990.","[{""text"": ""123-45-6789"", ""entity_type"": ""US_SSN"", ""start"": 4, ""end"": 15, ""score"": 1.0}, {""text"": ""Maria Garcia"", ""entity_type"": ""PERSON"", ""start"": 27, ""end"": 39, ""score"": 1.0}, {""text"": ""January 12, 1990"", ""entity_type"": ""DATE_TIME"", ""start"": 49, ""end"": 65, ""score"": 1.0}]"
+"The patient resides at 742 Evergreen Terrace, Springfield, IL 62704.","[{""text"": ""742 Evergreen Terrace, Springfield, IL 62704"", ""entity_type"": ""LOCATION"", ""start"": 23, ""end"": 67, ""score"": 1.0}]"
+Credit card number 4111-1111-1111-1111 was used by Robert Johnson on 12/01/2024.,"[{""text"": ""4111-1111-1111-1111"", ""entity_type"": ""CREDIT_CARD"", ""start"": 19, ""end"": 38, ""score"": 1.0}, {""text"": ""Robert Johnson"", ""entity_type"": ""PERSON"", ""start"": 51, ""end"": 65, ""score"": 1.0}, {""text"": ""12/01/2024"", ""entity_type"": ""DATE_TIME"", ""start"": 69, ""end"": 79, ""score"": 1.0}]"
+"Please send records to Sarah Williams at 1600 Pennsylvania Ave, Washington DC 20500.","[{""text"": ""Sarah Williams"", ""entity_type"": ""PERSON"", ""start"": 23, ""end"": 37, ""score"": 1.0}, {""text"": ""1600 Pennsylvania Ave, Washington DC 20500"", ""entity_type"": ""LOCATION"", ""start"": 41, ""end"": 83, ""score"": 1.0}]"
+"Driver's license D1234567 issued to James Brown, DOB 07/04/1985.","[{""text"": ""D1234567"", ""entity_type"": ""US_DRIVER_LICENSE"", ""start"": 17, ""end"": 25, ""score"": 1.0}, {""text"": ""James Brown"", ""entity_type"": ""PERSON"", ""start"": 36, ""end"": 47, ""score"": 1.0}, {""text"": ""07/04/1985"", ""entity_type"": ""DATE_TIME"", ""start"": 53, ""end"": 63, ""score"": 1.0}]"
+"Insurance ID BC-9876543 for patient Lisa Anderson, phone 202-555-0147.","[{""text"": ""BC-9876543"", ""entity_type"": ""MEDICAL_LICENSE"", ""start"": 13, ""end"": 23, ""score"": 1.0}, {""text"": ""Lisa Anderson"", ""entity_type"": ""PERSON"", ""start"": 36, ""end"": 49, ""score"": 1.0}, {""text"": ""202-555-0147"", ""entity_type"": ""PHONE_NUMBER"", ""start"": 57, ""end"": 69, ""score"": 1.0}]"
+Lab results for Michael Davis (MRN 00112233) were sent to m.davis@gmail.com.,"[{""text"": ""Michael Davis"", ""entity_type"": ""PERSON"", ""start"": 16, ""end"": 29, ""score"": 1.0}, {""text"": ""00112233"", ""entity_type"": ""US_BANK_NUMBER"", ""start"": 35, ""end"": 43, ""score"": 1.0}, {""text"": ""m.davis@gmail.com"", ""entity_type"": ""EMAIL_ADDRESS"", ""start"": 58, ""end"": 75, ""score"": 1.0}]"
+"Nurse Jennifer Lee noted blood pressure 140/90 for patient at 55 Oak Street, Boston MA 02108.","[{""text"": ""Jennifer Lee"", ""entity_type"": ""PERSON"", ""start"": 6, ""end"": 18, ""score"": 1.0}, {""text"": ""55 Oak Street, Boston MA 02108"", ""entity_type"": ""LOCATION"", ""start"": 62, ""end"": 92, ""score"": 1.0}]"
diff --git a/evaluation/ai-assistant/src/app/components/EntityComparison.tsx b/evaluation/ai-assistant/src/app/components/EntityComparison.tsx
index 0d127c323..28a9ff594 100644
--- a/evaluation/ai-assistant/src/app/components/EntityComparison.tsx
+++ b/evaluation/ai-assistant/src/app/components/EntityComparison.tsx
@@ -14,16 +14,17 @@ interface EntityComparisonProps {
   recordText: string;
   presidioEntities: Entity[];
   llmEntities: Entity[];
-  onConfirm: (recordId: string, entity: Entity, source: 'presidio' | 'llm' | 'manual') => void;
-  onReject: (recordId: string, entity: Entity, source: 'presidio' | 'llm') => void;
+  datasetEntities?: Entity[];
+  onConfirm: (recordId: string, entity: Entity, source: 'presidio' | 'llm' | 'manual' | 'dataset') => void;
+  onReject: (recordId: string, entity: Entity, source: 'presidio' | 'llm' | 'dataset') => void;
   onAddManual: (recordId: string, entity: Entity) => void;
 }
 
-type EntityStatus = 'match' | 'conflict' | 'presidio-only' | 'llm-only' | 'pending';
+type EntityStatus = 'match' | 'conflict' | 'presidio-only' | 'llm-only' | 'dataset-only' | 'pending';
 
 interface AnnotatedEntity extends Entity {
   status: EntityStatus;
-  source: 'presidio' | 'llm' | 'both';
+  source: 'presidio' | 'llm' | 'both' | 'dataset';
   confirmed?: boolean;
 }
 
@@ -32,12 +33,13 @@ export function EntityComparison({
   recordText,
   presidioEntities = [],
   llmEntities = [],
+  datasetEntities = [],
   onConfirm,
   onReject,
   onAddManual,
 }: EntityComparisonProps) {
   const [showAddManual, setShowAddManual] = useState(false);
-  const [manualEntity, setManualEntity] = useState({ text: '', type: '', start: 0, end: 0 });
+  const [manualEntity, setManualEntity] = useState({ text: '', entity_type: '', start: 0, end: 0 });
   const [confirmedEntities, setConfirmedEntities] = useState<Set<string>>(new Set());
   const [rejectedEntities, setRejectedEntities] = useState<Set<string>>(new Set());
   const [expandedContexts, setExpandedContexts] = useState<Set<string>>(new Set());
@@ -51,7 +53,7 @@ export function EntityComparison({
     );
     
     if (matchingLlm) {
-      if (matchingLlm.type === pe.type) {
+      if (matchingLlm.entity_type === pe.entity_type) {
         annotatedEntities.push({ ...pe, status: 'match', source: 'both' });
       } else {
         annotatedEntities.push({ ...pe, status: 'conflict', source: 'both' });
@@ -70,6 +72,15 @@ export function EntityComparison({
     }
   });
 
+  datasetEntities.forEach(de => {
+    const alreadyAdded = annotatedEntities.some(
+      ae => ae.text === de.text && ae.start === de.start && ae.end === de.end
+    );
+    if (!alreadyAdded) {
+      annotatedEntities.push({ ...de, status: 'dataset-only', source: 'dataset' });
+    }
+  });
+
   const getEntityKey = (entity: Entity) => `${entity.text}-${entity.start}-${entity.end}`;
 
   const getContextForEntity = (entity: Entity) => {
@@ -125,9 +136,9 @@ export function EntityComparison({
   };
 
   const handleAddManualEntity = () => {
-    if (manualEntity.text && manualEntity.type) {
+    if (manualEntity.text && manualEntity.entity_type) {
       onAddManual(recordId, manualEntity);
-      setManualEntity({ text: '', type: '', start: 0, end: 0 });
+      setManualEntity({ text: '', entity_type: '', start: 0, end: 0 });
       setShowAddManual(false);
     }
   };
@@ -149,6 +160,8 @@ export function EntityComparison({
         return <Badge className="bg-purple-100 text-purple-800 border-purple-300">Presidio Only</Badge>;
       case 'llm-only':
         return <Badge className="bg-cyan-100 text-cyan-800 border-cyan-300">LLM Only</Badge>;
+      case 'dataset-only':
+        return <Badge className="bg-emerald-100 text-emerald-800 border-emerald-300">Dataset</Badge>;
       default:
         return <Badge variant="secondary">Pending</Badge>;
     }
@@ -193,7 +206,7 @@ export function EntityComparison({
                 </div>
                 <div>
                   <Label>Entity Type</Label>
-                  <Select value={manualEntity.type} onValueChange={(val) => setManualEntity({ ...manualEntity, type: val })}>
+                  <Select value={manualEntity.entity_type} onValueChange={(val) => setManualEntity({ ...manualEntity, entity_type: val })}>
                     <SelectTrigger>
                       <SelectValue placeholder="Select type..." />
                     </SelectTrigger>
@@ -234,7 +247,7 @@ export function EntityComparison({
                     <div className="flex-1 space-y-2">
                       <div className="flex items-center gap-2">
                         <span className="font-medium text-slate-900">{entity.text}</span>
-                        <Badge variant="outline">{entity.type}</Badge>
+                        <Badge variant="outline">{entity.entity_type}</Badge>
                         {getStatusBadge(entity.status, isConfirmed, isRejected)}
                       </div>
                       
diff --git a/evaluation/ai-assistant/src/app/pages/Anonymization.tsx b/evaluation/ai-assistant/src/app/pages/Anonymization.tsx
index 50e4c9228..2077ab2c4 100644
--- a/evaluation/ai-assistant/src/app/pages/Anonymization.tsx
+++ b/evaluation/ai-assistant/src/app/pages/Anonymization.tsx
@@ -1,34 +1,57 @@
-import { useState, useEffect } from 'react';
+import { useState, useEffect, useMemo } from 'react';
 import { useNavigate } from 'react-router';
 import { Card } from '../components/ui/card';
 import { Button } from '../components/ui/button';
 import { Progress } from '../components/ui/progress';
 import { Badge } from '../components/ui/badge';
 import { Alert, AlertDescription } from '../components/ui/alert';
-import { ArrowRight, Loader2, CheckCircle, Shield, Sparkles, AlertTriangle } from 'lucide-react';
+import { ArrowRight, Loader2, CheckCircle, Shield, Sparkles, AlertTriangle, Database } from 'lucide-react';
+import type { SetupConfig } from '../types';
 
 export function Anonymization() {
   const navigate = useNavigate();
-  const [presidioProgress, setPresidioProgress] = useState(0);
-  const [llmProgress, setLlmProgress] = useState(0);
-  const [presidioComplete, setPresidioComplete] = useState(false);
-  const [llmComplete, setLlmComplete] = useState(false);
+
+  const setupConfig = useMemo<SetupConfig | null>(() => {
+    try {
+      const raw = sessionStorage.getItem('setupConfig');
+      return raw ? JSON.parse(raw) : null;
+    } catch {
+      return null;
+    }
+  }, []);
+
+  const runPresidio = setupConfig?.runPresidio ?? true;
+  const runLlm = setupConfig?.runLlm ?? true;
+  const hasDatasetEntities = setupConfig?.hasDatasetEntities ?? false;
+
+  const [presidioProgress, setPresidioProgress] = useState(runPresidio ? 0 : 100);
+  const [llmProgress, setLlmProgress] = useState(runLlm ? 0 : 100);
+  const [presidioComplete, setPresidioComplete] = useState(!runPresidio);
+  const [llmComplete, setLlmComplete] = useState(!runLlm);
 
   const isComplete = presidioComplete && llmComplete;
 
   useEffect(() => {
-    const presidioInterval = setInterval(() => {
-      setPresidioProgress((prev) => {
-        if (prev >= 100) {
-          clearInterval(presidioInterval);
-          setPresidioComplete(true);
-          return 100;
-        }
-        return prev + 2;
-      });
-    }, 50);
+    if (!runPresidio && !runLlm) return; // nothing to simulate
 
-    setTimeout(() => {
+    if (runPresidio) {
+      const presidioInterval = setInterval(() => {
+        setPresidioProgress((prev) => {
+          if (prev >= 100) {
+            clearInterval(presidioInterval);
+            setPresidioComplete(true);
+            return 100;
+          }
+          return prev + 2;
+        });
+      }, 50);
+      return () => clearInterval(presidioInterval);
+    }
+  }, [runPresidio]);
+
+  useEffect(() => {
+    if (!runLlm) return;
+    const timer = setTimeout(() => {
       const llmInterval = setInterval(() => {
         setLlmProgress((prev) => {
           if (prev >= 100) {
@@ -39,12 +62,9 @@ export function Anonymization() {
           return prev + 1.5;
         });
       }, 80);
-    }, 500);
-
-    return () => {
-      clearInterval(presidioInterval);
-    };
-  }, []);
+    }, runPresidio ? 500 : 0);
+    return () => clearTimeout(timer);
+  }, [runLlm, runPresidio]);
 
   const handleContinue = () => {
     navigate('/human-review');
@@ -55,11 +75,33 @@ export function Anonymization() {
       <div>
         <h2 className="text-2xl font-semibold text-slate-900 mb-2">PII Detection Analysis</h2>
         <p className="text-slate-600">
-          Running Presidio and LLM analysis in parallel to detect PII entities across sampled records.
+          {runPresidio && runLlm
+            ? 'Running Presidio and LLM analysis in parallel to detect PII entities across sampled records.'
+            : runPresidio
+              ? 'Running Presidio analysis to detect PII entities across sampled records.'
+              : runLlm
+                ? 'Running LLM analysis to detect PII entities across sampled records.'
+                : 'Using dataset-provided entities. No additional detection selected.'}
         </p>
       </div>
 
+      {/* Dataset entities notice */}
+      {hasDatasetEntities && (
+        <Alert className="border-green-200 bg-green-50">
+          <Database className="size-4 text-green-600" />
+          <AlertDescription>
+            <div className="space-y-1">
+              <div className="font-medium text-green-900">Dataset Entities Available</div>
+              <div className="text-sm text-green-800">
+                Pre-identified entities from the uploaded dataset will be included in the human review step.
+              </div>
+            </div>
+          </AlertDescription>
+        </Alert>
+      )}
+
       {/* Important Notice */}
+      {runLlm && (
       <Alert className="border-amber-200 bg-amber-50">
         <AlertTriangle className="size-4 text-amber-600" />
         <AlertDescription>
@@ -72,10 +114,13 @@ export function Anonymization() {
           </div>
         </AlertDescription>
       </Alert>
+      )}
 
       {/* Side-by-Side Processing */}
-      <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
+      {(runPresidio || runLlm) ? (
+      <div className={`grid grid-cols-1 ${runPresidio && runLlm ? 'md:grid-cols-2' : ''} gap-6`}>
         {/* Presidio Processing */}
+        {runPresidio && (
         <Card className="p-6">
           <div className="space-y-6">
             <div className="flex items-center gap-2">
@@ -132,8 +177,10 @@ export function Anonymization() {
             )}
           </div>
         </Card>
+        )}
 
         {/* LLM Processing */}
+        {runLlm && (
         <Card className="p-6">
           <div className="space-y-6">
             <div className="flex items-center gap-2">
@@ -190,7 +237,21 @@ export function Anonymization() {
             )}
           </div>
         </Card>
+        )}
       </div>
+      ) : (
+        <Card className="p-6 border-green-200 bg-green-50">
+          <div className="flex items-center gap-3">
+            <CheckCircle className="size-6 text-green-600" />
+            <div>
+              <h3 className="font-semibold text-green-900">No additional detection needed</h3>
+              <p className="text-sm text-green-800 mt-1">
+                Proceeding with dataset-provided entities only. Continue to human review.
+              </p>
+            </div>
+          </div>
+        </Card>
+      )}
 
       {/* Combined Results */}
       {isComplete && (
diff --git a/evaluation/ai-assistant/src/app/pages/HumanReview.tsx b/evaluation/ai-assistant/src/app/pages/HumanReview.tsx
index aa53e954f..9e9da31fc 100644
--- a/evaluation/ai-assistant/src/app/pages/HumanReview.tsx
+++ b/evaluation/ai-assistant/src/app/pages/HumanReview.tsx
@@ -1,12 +1,12 @@
-import { useState } from 'react';
+import { useState, useMemo } from 'react';
 import { useNavigate } from 'react-router';
 import { Button } from '../components/ui/button';
 import { Progress } from '../components/ui/progress';
 import { Alert, AlertDescription } from '../components/ui/alert';
-import { ArrowRight, Users, CheckCircle, ChevronLeft, ChevronRight } from 'lucide-react';
+import { ArrowRight, Users, CheckCircle, ChevronLeft, ChevronRight, CheckCheck } from 'lucide-react';
 import { EntityComparison } from '../components/EntityComparison';
 import { mockRecords } from '../lib/mockData';
-import type { Entity } from '../types';
+import type { Entity, SetupConfig } from '../types';
 
 export function HumanReview() {
   const navigate = useNavigate();
@@ -14,6 +14,17 @@ export function HumanReview() {
   const [reviewedRecords, setReviewedRecords] = useState<Set<string>>(new Set());
   const [goldenSet, setGoldenSet] = useState<Record<string, Entity[]>>({});
 
+  const setupConfig = useMemo<SetupConfig | null>(() => {
+    try {
+      const raw = sessionStorage.getItem('setupConfig');
+      return raw ? JSON.parse(raw) : null;
+    } catch {
+      return null;
+    }
+  }, []);
+
+  const hasDatasetEntities = setupConfig?.hasDatasetEntities ?? false;
+
   const currentRecord = mockRecords[currentRecordIndex];
   const totalRecords = mockRecords.length;
   const reviewProgress = (reviewedRecords.size / totalRecords) * 100;
@@ -60,6 +71,37 @@ export function HumanReview() {
     navigate('/evaluation');
   };
 
+  const handleAutoConfirmAll = () => {
+    // Auto-confirm all entities from all sources for all records
+    const allReviewed = new Set<string>();
+    const autoGolden: Record<string, Entity[]> = {};
+
+    mockRecords.forEach(record => {
+      allReviewed.add(record.id);
+      const entities: Entity[] = [];
+      const seen = new Set<string>();
+
+      const addUnique = (e: Entity) => {
+        const key = `${e.text}-${e.start}-${e.end}-${e.entity_type}`;
+        if (!seen.has(key)) {
+          seen.add(key);
+          entities.push(e);
+        }
+      };
+
+      record.presidioEntities.forEach(addUnique);
+      record.llmEntities.forEach(addUnique);
+      if ('datasetEntities' in record) {
+        (record as any).datasetEntities?.forEach(addUnique);
+      }
+
+      autoGolden[record.id] = entities;
+    });
+
+    setReviewedRecords(allReviewed);
+    setGoldenSet(autoGolden);
+  };
+
   const isReviewed = reviewedRecords.has(currentRecord.id);
   const canContinue = reviewedRecords.size === totalRecords;
 
@@ -79,7 +121,20 @@ export function HumanReview() {
           <div className="space-y-2">
             <div className="flex items-center justify-between">
               <span className="font-medium text-blue-900">Review Progress</span>
-              <span className="text-sm text-blue-800">{reviewedRecords.size} of {totalRecords} records reviewed ({reviewProgress.toFixed(0)}%)</span>
+              <div className="flex items-center gap-3">
+                <span className="text-sm text-blue-800">{reviewedRecords.size} of {totalRecords} records reviewed ({reviewProgress.toFixed(0)}%)</span>
+                {!canContinue && (
+                  <Button
+                    size="sm"
+                    variant="outline"
+                    className="border-blue-300 text-blue-700 hover:bg-blue-100"
+                    onClick={handleAutoConfirmAll}
+                  >
+                    <CheckCheck className="size-4 mr-1" />
+                    Auto Confirm All
+                  </Button>
+                )}
+              </div>
             </div>
             <Progress value={reviewProgress} className="h-2" />
           </div>
@@ -128,6 +183,7 @@ export function HumanReview() {
         recordText={currentRecord.text}
         presidioEntities={currentRecord.presidioEntities}
         llmEntities={currentRecord.llmEntities}
+        datasetEntities={'datasetEntities' in currentRecord ? (currentRecord as any).datasetEntities : []}
         onConfirm={handleConfirm}
         onReject={handleReject}
         onAddManual={handleAddManual}
@@ -194,15 +250,27 @@ export function HumanReview() {
 
       {/* Actions */}
       <div className="flex items-center justify-between pt-4">
-        <div className="text-sm text-slate-600">
-          {canContinue ? (
-            <span className="text-green-700 font-medium">
-              ✓ All records reviewed - ready to proceed
-            </span>
-          ) : (
-            <span>
-              Review all records to continue ({reviewedRecords.size}/{totalRecords} completed)
-            </span>
+        <div className="flex items-center gap-4">
+          <div className="text-sm text-slate-600">
+            {canContinue ? (
+              <span className="text-green-700 font-medium">
+                ✓ All records reviewed - ready to proceed
+              </span>
+            ) : (
+              <span>
+                Review all records to continue ({reviewedRecords.size}/{totalRecords} completed)
+              </span>
+            )}
+          </div>
+          {!canContinue && (
+            <Button
+              size="sm"
+              variant="outline"
+              onClick={handleAutoConfirmAll}
+            >
+              <CheckCheck className="size-4 mr-1" />
+              Auto Confirm All
+            </Button>
           )}
         </div>
         <Button
diff --git a/evaluation/ai-assistant/src/app/pages/Setup.tsx b/evaluation/ai-assistant/src/app/pages/Setup.tsx
index e227f84e5..cc0689c43 100644
--- a/evaluation/ai-assistant/src/app/pages/Setup.tsx
+++ b/evaluation/ai-assistant/src/app/pages/Setup.tsx
@@ -3,59 +3,107 @@ import { useNavigate } from 'react-router';
 import { Card } from '../components/ui/card';
 import { Button } from '../components/ui/button';
 import { Label } from '../components/ui/label';
-import { RadioGroup, RadioGroupItem } from '../components/ui/radio-group';
-import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '../components/ui/select';
+import { Input } from '../components/ui/input';
 import { Checkbox } from '../components/ui/checkbox';
 import { Alert, AlertDescription } from '../components/ui/alert';
-import { Database, Shield, ArrowRight, Cloud, CloudOff } from 'lucide-react';
-import { mockDatasets } from '../lib/mockData';
-import type { ComplianceFramework } from '../types';
-
-const complianceInfo = {
-  hipaa: {
-    label: 'HIPAA',
-    description: 'Health Insurance Portability and Accountability Act - for protected health information (PHI)',
-    requirements: 'Requires strict handling of medical records, patient data, and health-related PII',
-  },
-  gdpr: {
-    label: 'GDPR',
-    description: 'General Data Protection Regulation - European data privacy law',
-    requirements: 'Emphasizes data subject rights, consent, and cross-border data transfers',
-  },
-  ccpa: {
-    label: 'CCPA',
-    description: 'California Consumer Privacy Act - California state privacy law',
-    requirements: 'Focuses on consumer rights to know, delete, and opt-out of data sales',
-  },
-  general: {
-    label: 'General',
-    description: 'Standard data protection practices',
-    requirements: 'Basic PII protection without specific regulatory requirements',
-  },
-};
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '../components/ui/select';
+import { Database, Shield, ArrowRight, Cloud, FileText, CheckCircle, Loader2, X, Plus } from 'lucide-react';
+import { api } from '../lib/api';
+import type { ComplianceFramework, UploadedDataset } from '../types';
 
 export function Setup() {
   const navigate = useNavigate();
-  const [selectedDataset, setSelectedDataset] = useState('');
+  const [datasets, setDatasets] = useState<UploadedDataset[]>([
+    // Seed with the example dataset
+    {
+      id: 'ds-001',
+      filename: 'Example - Patient Records',
+      format: 'csv',
+      record_count: 1500,
+      has_entities: false,
+      columns: ['text'],
+    },
+  ]);
+  const [selectedDatasetId, setSelectedDatasetId] = useState('');
+  const [showAddForm, setShowAddForm] = useState(false);
+  const [filePath, setFilePath] = useState('');
+  const [fileFormat, setFileFormat] = useState<'csv' | 'json'>('csv');
+  const [textColumn, setTextColumn] = useState('text');
+  const [entitiesColumn, setEntitiesColumn] = useState('');
+  const [loading, setLoading] = useState(false);
+  const [loadError, setLoadError] = useState<string | null>(null);
+  const [previewRecords, setPreviewRecords] = useState<any[]>([]);
   const [complianceFrameworks, setComplianceFrameworks] = useState<ComplianceFramework[]>(['general']);
   const [cloudRestriction, setCloudRestriction] = useState<'allowed' | 'restricted'>('allowed');
+  const [runPresidio, setRunPresidio] = useState(true);
+  const [runLlm, setRunLlm] = useState(true);
+
+  const selectedDataset = datasets.find(d => d.id === selectedDatasetId) ?? null;
+  const canProceed = selectedDataset !== null;
+
+  const handleSelectDataset = async (value: string) => {
+    if (value === '__add_new__') {
+      setShowAddForm(true);
+      return;
+    }
+    setSelectedDatasetId(value);
+    setShowAddForm(false);
+    setPreviewRecords([]);
+
+    // Fetch preview for loaded datasets (skip for the seed example)
+    if (value.startsWith('upload-')) {
+      try {
+        const preview = await api.datasets.preview(value);
+        setPreviewRecords(preview);
+      } catch { /* ignore */ }
+    }
+  };
 
-  const canProceed = selectedDataset !== '';
-  const selectedDatasetObj = mockDatasets.find(d => d.id === selectedDataset);
-
-  const handleComplianceToggle = (framework: ComplianceFramework) => {
-    setComplianceFrameworks(prev => {
-      if (prev.includes(framework)) {
-        if (prev.length === 1) return prev;
-        return prev.filter(f => f !== framework);
-      } else {
-        return [...prev, framework];
-      }
-    });
+  const handleLoadDataset = async () => {
+    if (!filePath.trim()) {
+      setLoadError('Please provide an absolute file path.');
+      return;
+    }
+
+    setLoading(true);
+    setLoadError(null);
+    try {
+      const dataset: UploadedDataset = await api.datasets.load({
+        path: filePath.trim(),
+        format: fileFormat,
+        text_column: textColumn.trim() || 'text',
+        entities_column: entitiesColumn.trim() || undefined,
+      });
+
+      setDatasets(prev => [...prev, dataset]);
+      setSelectedDatasetId(dataset.id);
+      setShowAddForm(false);
+
+      const preview = await api.datasets.preview(dataset.id);
+      setPreviewRecords(preview);
+
+      // Reset form fields
+      setFilePath('');
+      setTextColumn('text');
+      setEntitiesColumn('');
+    } catch (err: any) {
+      setLoadError(err.message || 'Failed to load dataset');
+    } finally {
+      setLoading(false);
+    }
   };
 
   const handleContinue = () => {
-    if (canProceed) {
+    if (canProceed && selectedDataset) {
+      const config = {
+        datasetId: selectedDataset.id,
+        complianceFrameworks,
+        cloudRestriction,
+        runPresidio,
+        runLlm,
+        hasDatasetEntities: selectedDataset.has_entities,
+      };
+      sessionStorage.setItem('setupConfig', JSON.stringify(config));
       navigate('/sampling');
     }
   };
@@ -74,121 +122,229 @@ export function Setup() {
         <div className="space-y-4">
           <div className="flex items-center gap-2 mb-4">
             <Database className="size-5 text-blue-600" />
-            <h3 className="font-semibold text-slate-900">Dataset Selection</h3>
+            <h3 className="font-semibold text-slate-900">Dataset</h3>
           </div>
 
-          <div className="space-y-3">
-            <Label>Select Dataset</Label>
-            <Select value={selectedDataset} onValueChange={setSelectedDataset}>
-              <SelectTrigger>
-                <SelectValue placeholder="Choose a dataset to evaluate..." />
+          {/* Dataset Dropdown */}
+          <div>
+            <Label htmlFor="dataset-select">Select a dataset</Label>
+            <Select value={showAddForm ? '__add_new__' : selectedDatasetId} onValueChange={handleSelectDataset}>
+              <SelectTrigger className="mt-1" id="dataset-select">
+                <SelectValue placeholder="Choose a dataset…" />
               </SelectTrigger>
               <SelectContent>
-                {mockDatasets.map(dataset => (
-                  <SelectItem key={dataset.id} value={dataset.id}>
-                    <div className="flex items-center gap-2">
-                      <span>{dataset.name}</span>
-                      <span className="text-xs text-slate-500">
-                        ({dataset.recordCount.toLocaleString()} records)
-                      </span>
-                    </div>
+                {datasets.map(ds => (
+                  <SelectItem key={ds.id} value={ds.id}>
+                    {ds.filename} — {ds.record_count.toLocaleString()} records
                   </SelectItem>
                 ))}
+                <SelectItem value="__add_new__">
+                  <span className="flex items-center gap-1">
+                    <Plus className="size-3" />
+                    Add new dataset…
+                  </span>
+                </SelectItem>
               </SelectContent>
             </Select>
-
-            {selectedDatasetObj && (
-              <Alert>
-                <AlertDescription>
-                  <div className="space-y-1">
-                    <div className="flex items-center gap-2">
-                      <span className="font-medium">Type:</span>
-                      <span className="capitalize">{selectedDatasetObj.type}</span>
-                    </div>
-                    <div className="text-sm text-slate-600">{selectedDatasetObj.description}</div>
-                  </div>
-                </AlertDescription>
-              </Alert>
-            )}
           </div>
-        </div>
-      </Card>
 
-      {/* Compliance Framework */}
-      <Card className="p-6">
-        <div className="space-y-4">
-          <div className="flex items-center gap-2 mb-4">
-            <Shield className="size-5 text-blue-600" />
-            <h3 className="font-semibold text-slate-900">Compliance Context</h3>
-          </div>
+          {/* Add Dataset Form */}
+          {showAddForm && (
+            <div className="space-y-4 p-4 bg-slate-50 border border-slate-200 rounded-lg">
+              <div className="flex items-center justify-between">
+                <Label className="font-medium text-slate-900">Load Dataset from File</Label>
+                <Button size="sm" variant="ghost" onClick={() => { setShowAddForm(false); setLoadError(null); }}>
+                  <X className="size-4" />
+                </Button>
+              </div>
 
-          <div className="space-y-3">
-            <Label>Regulatory Frameworks (select one or more)</Label>
-            <div className="space-y-2">
-              {Object.entries(complianceInfo).map(([key, info]) => (
-                <div key={key} className="flex items-start space-x-3 p-3 rounded-lg border border-slate-200 hover:bg-slate-50">
-                  <Checkbox
-                    id={key}
-                    checked={complianceFrameworks.includes(key as ComplianceFramework)}
-                    onCheckedChange={() => handleComplianceToggle(key as ComplianceFramework)}
-                    className="mt-1"
+              <div className="space-y-3">
+                <div>
+                  <Label htmlFor="file-path">Absolute File Path</Label>
+                  <Input
+                    id="file-path"
+                    placeholder="/path/to/dataset.csv"
+                    value={filePath}
+                    onChange={(e) => setFilePath(e.target.value)}
+                    className="mt-1 font-mono text-sm"
                   />
-                  <div className="flex-1">
-                    <Label htmlFor={key} className="cursor-pointer">
-                      <div className="font-medium">{info.label}</div>
-                      <div className="text-sm text-slate-600 mt-1">{info.description}</div>
-                      {complianceFrameworks.includes(key as ComplianceFramework) && (
-                        <div className="text-xs text-slate-500 mt-2 pl-3 border-l-2 border-blue-600">
-                          {info.requirements}
-                        </div>
-                      )}
-                    </Label>
-                  </div>
                 </div>
-              ))}
-            </div>
-          </div>
-        </div>
-      </Card>
 
-      {/* Cloud Access */}
-      <Card className="p-6">
-        <div className="space-y-4">
-          <div className="flex items-center gap-2 mb-4">
-            <Cloud className="size-5 text-blue-600" />
-            <h3 className="font-semibold text-slate-900">Data Access Constraints</h3>
-          </div>
+                <div>
+                  <Label htmlFor="file-format">Format</Label>
+                  <Select value={fileFormat} onValueChange={(val) => setFileFormat(val as 'csv' | 'json')}>
+                    <SelectTrigger className="mt-1">
+                      <SelectValue />
+                    </SelectTrigger>
+                    <SelectContent>
+                      <SelectItem value="csv">CSV</SelectItem>
+                      <SelectItem value="json">JSON</SelectItem>
+                    </SelectContent>
+                  </Select>
+                </div>
 
-          <div className="space-y-3">
-            <Label>Cloud Processing</Label>
-            <RadioGroup value={cloudRestriction} onValueChange={(val) => setCloudRestriction(val as 'allowed' | 'restricted')}>
-              <div className="flex items-start space-x-3 p-3 rounded-lg border border-slate-200 hover:bg-slate-50">
-                <RadioGroupItem value="allowed" id="cloud-allowed" className="mt-1" />
-                <div className="flex-1">
-                  <Label htmlFor="cloud-allowed" className="cursor-pointer flex items-center gap-2">
-                    <Cloud className="size-4" />
-                    <span className="font-medium">Cloud Processing Allowed</span>
-                  </Label>
-                  <div className="text-sm text-slate-600 mt-1">
-                    Data can be processed using cloud-based LLM services (Azure AI Foundry)
+                <div className="grid grid-cols-2 gap-3">
+                  <div>
+                    <Label htmlFor="text-col">Text Column Name</Label>
+                    <Input
+                      id="text-col"
+                      placeholder="text"
+                      value={textColumn}
+                      onChange={(e) => setTextColumn(e.target.value)}
+                      className="mt-1 text-sm"
+                    />
+                  </div>
+                  <div>
+                    <Label htmlFor="entities-col">Entities Column Name <span className="text-slate-400">(optional)</span></Label>
+                    <Input
+                      id="entities-col"
+                      placeholder="entities"
+                      value={entitiesColumn}
+                      onChange={(e) => setEntitiesColumn(e.target.value)}
+                      className="mt-1 text-sm"
+                    />
                   </div>
                 </div>
+
+                {loadError && (
+                  <Alert className="border-red-200 bg-red-50">
+                    <AlertDescription className="text-red-800 text-sm">{loadError}</AlertDescription>
+                  </Alert>
+                )}
+
+                <Button onClick={handleLoadDataset} disabled={loading || !filePath.trim()}>
+                  {loading ? (
+                    <>
+                      <Loader2 className="size-4 mr-2 animate-spin" />
+                      Loading...
+                    </>
+                  ) : (
+                    'Load Dataset'
+                  )}
+                </Button>
               </div>
+            </div>
+          )}
 
-              <div className="flex items-start space-x-3 p-3 rounded-lg border border-slate-200 hover:bg-slate-50">
-                <RadioGroupItem value="restricted" id="cloud-restricted" className="mt-1" />
-                <div className="flex-1">
-                  <Label htmlFor="cloud-restricted" className="cursor-pointer flex items-center gap-2">
-                    <CloudOff className="size-4" />
-                    <span className="font-medium">Cloud Processing Restricted</span>
-                  </Label>
-                  <div className="text-sm text-slate-600 mt-1">
-                    Data must remain on-premises; LLM judging will use local deployment
+          {/* Selected dataset summary */}
+          {selectedDataset && !showAddForm && (
+            <div className="space-y-4">
+              <div className="p-4 bg-green-50 border border-green-200 rounded-lg">
+                <div className="flex items-start gap-3">
+                  <CheckCircle className="size-5 text-green-600 mt-0.5" />
+                  <div>
+                    <div className="font-medium text-green-900">{selectedDataset.filename}</div>
+                    <div className="text-sm text-green-800 mt-1 space-y-0.5">
+                      <div>{selectedDataset.record_count.toLocaleString()} records • {selectedDataset.format.toUpperCase()} format</div>
+                      <div>Columns: {selectedDataset.columns.join(', ')}</div>
+                      <div>
+                        {selectedDataset.has_entities ? (
+                          <span className="text-green-700 font-medium">✓ Contains pre-identified entities</span>
+                        ) : (
+                          <span className="text-slate-600">Text only — no pre-identified entities</span>
+                        )}
+                      </div>
+                    </div>
                   </div>
                 </div>
               </div>
-            </RadioGroup>
+
+              {/* Preview Records */}
+              {previewRecords.length > 0 && (
+                <div>
+                  <Label className="mb-2 block">Preview (first {previewRecords.length} records)</Label>
+                  <div className="space-y-2 max-h-60 overflow-y-auto">
+                    {previewRecords.map((record: any, i: number) => (
+                      <div key={i} className="p-3 bg-slate-50 border border-slate-200 rounded text-sm">
+                        <div className="flex items-start gap-2">
+                          <FileText className="size-4 text-slate-400 mt-0.5 flex-shrink-0" />
+                          <div className="flex-1 min-w-0">
+                            <p className="text-slate-800 line-clamp-2">{record.text}</p>
+                            {record.dataset_entities?.length > 0 && (
+                              <div className="flex flex-wrap gap-1 mt-1">
+                                {record.dataset_entities.map((e: any, j: number) => (
+                                  <span key={j} className="inline-block px-1.5 py-0.5 bg-blue-100 text-blue-800 text-xs rounded">
+                                    {e.entity_type}: {e.text}
+                                  </span>
+                                ))}
+                              </div>
+                            )}
+                          </div>
+                        </div>
+                      </div>
+                    ))}
+                  </div>
+                </div>
+              )}
+
+              {/* Detection Options — only when dataset has entities */}
+              {selectedDataset.has_entities && (
+                <div className="p-4 bg-blue-50 border border-blue-200 rounded-lg space-y-3">
+                  <Label className="text-blue-900 font-medium">Detection Options</Label>
+                  <p className="text-sm text-blue-800">
+                    Your dataset includes pre-identified entities. Choose which additional detection to run:
+                  </p>
+                  <div className="space-y-2">
+                    <div className="flex items-center space-x-3">
+                      <Checkbox
+                        id="run-presidio"
+                        checked={runPresidio}
+                        onCheckedChange={(checked) => setRunPresidio(checked === true)}
+                      />
+                      <Label htmlFor="run-presidio" className="cursor-pointer">
+                        <span className="font-medium">Run Presidio detection</span>
+                        <span className="text-sm text-blue-700 ml-2">— compare against baseline PII detection</span>
+                      </Label>
+                    </div>
+                    <div className="flex items-center space-x-3">
+                      <Checkbox
+                        id="run-llm"
+                        checked={runLlm}
+                        onCheckedChange={(checked) => setRunLlm(checked === true)}
+                      />
+                      <Label htmlFor="run-llm" className="cursor-pointer">
+                        <span className="font-medium">Run LLM detection</span>
+                        <span className="text-sm text-blue-700 ml-2">— AI-assisted entity detection</span>
+                      </Label>
+                    </div>
+                  </div>
+                  {!runPresidio && !runLlm && (
+                    <p className="text-xs text-blue-700">
+                      Only dataset-provided entities will be used for tagging.
+                    </p>
+                  )}
+                </div>
+              )}
+            </div>
+          )}
+        </div>
+      </Card>
+
+      {/* Compliance Framework (disabled) */}
+      <Card className="p-6 opacity-50 pointer-events-none">
+        <div className="space-y-4">
+          <div className="flex items-center justify-between mb-4">
+            <div className="flex items-center gap-2">
+              <Shield className="size-5 text-slate-400" />
+              <h3 className="font-semibold text-slate-400">Compliance Context</h3>
+            </div>
+            <span className="text-xs text-slate-400 bg-slate-100 px-2 py-1 rounded">Coming soon</span>
+          </div>
+          <p className="text-sm text-slate-400">Support for compliance frameworks (HIPAA, GDPR, CCPA) will be added soon.</p>
+        </div>
+      </Card>
+
+      {/* Cloud Access (disabled) */}
+      <Card className="p-6 opacity-50 pointer-events-none">
+        <div className="space-y-4">
+          <div className="flex items-center justify-between mb-4">
+            <div className="flex items-center gap-2">
+              <Cloud className="size-5 text-slate-400" />
+              <h3 className="font-semibold text-slate-400">Data Access Constraints</h3>
+            </div>
+            <span className="text-xs text-slate-400 bg-slate-100 px-2 py-1 rounded">Coming soon</span>
           </div>
+          <p className="text-sm text-slate-400">Currently only cloud-based LLM processing is supported. On-premises / local deployment options will be added soon.</p>
         </div>
       </Card>
 
diff --git a/evaluation/ai-assistant/src/app/types.ts b/evaluation/ai-assistant/src/app/types.ts
index 6b6119079..dc8fd01cd 100644
--- a/evaluation/ai-assistant/src/app/types.ts
+++ b/evaluation/ai-assistant/src/app/types.ts
@@ -13,7 +13,7 @@ export interface Dataset {
 
 export interface Entity {
   text: string;
-  type: string;
+  entity_type: string;
   start: number;
   end: number;
   score?: number;
@@ -24,9 +24,28 @@ export interface Record {
   text: string;
   presidioEntities: Entity[];
   llmEntities: Entity[];
+  datasetEntities?: Entity[];
   goldenEntities?: Entity[];
 }
 
+export interface UploadedDataset {
+  id: string;
+  filename: string;
+  format: 'csv' | 'json';
+  record_count: number;
+  has_entities: boolean;
+  columns: string[];
+}
+
+export interface SetupConfig {
+  datasetId: string;
+  complianceFrameworks: ComplianceFramework[];
+  cloudRestriction: 'allowed' | 'restricted';
+  runPresidio: boolean;
+  runLlm: boolean;
+  hasDatasetEntities: boolean;
+}
+
 export interface EvaluationMetrics {
   precision: number;
   recall: number;