diff --git a/evaluation/ai-assistant/backend/models.py b/evaluation/ai-assistant/backend/models.py
index 03c91219d..f4942d1d2 100644
--- a/evaluation/ai-assistant/backend/models.py
+++ b/evaluation/ai-assistant/backend/models.py
@@ -109,8 +109,15 @@ class SetupConfig(BaseModel):
     run_llm: bool = True
 
 
+class SamplingMethod(str, Enum):
+    random = "random"
+    length = "length"
+
+
 class SamplingConfig(BaseModel):
+    dataset_id: str
     sample_size: int = 500
+    method: SamplingMethod = SamplingMethod.random
 
 
 class AnalysisStatus(BaseModel):
diff --git a/evaluation/ai-assistant/backend/pyproject.toml b/evaluation/ai-assistant/backend/pyproject.toml
index fb1e2001b..f0bb6035d 100644
--- a/evaluation/ai-assistant/backend/pyproject.toml
+++ b/evaluation/ai-assistant/backend/pyproject.toml
@@ -10,6 +10,8 @@ fastapi = ">=0.115.0"
 uvicorn = { version = ">=0.32.0", extras = ["standard"] }
 pydantic = ">=2.0.0"
 python-multipart = ">=0.0.9"
+pandas = ">=2.0.0"
+scikit-learn = ">=1.3.0"
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/evaluation/ai-assistant/backend/routers/sampling.py b/evaluation/ai-assistant/backend/routers/sampling.py
index 50ffe2bcf..d32dfb53b 100644
--- a/evaluation/ai-assistant/backend/routers/sampling.py
+++ b/evaluation/ai-assistant/backend/routers/sampling.py
@@ -1,14 +1,90 @@
-from fastapi import APIRouter
-from models import SamplingConfig
+import pandas as pd
+from fastapi import APIRouter, HTTPException
+from models import Record, SamplingConfig, SamplingMethod
+from routers.upload import _records as uploaded_records
 
 router = APIRouter(prefix="/api/sampling", tags=["sampling"])
 
+# Sampled records available for downstream steps
+sampled_records: list[Record] = []
+
+
+def _sample_random(df: pd.DataFrame, n: int) -> pd.DataFrame:
+    return df.sample(n=n, random_state=42)
+
+
+def _sample_length(df: pd.DataFrame, n: int) -> pd.DataFrame:
+    """Stratified sampling by text length buckets (short / medium / long)."""
+    lengths = df["text"].str.len()
+    terciles = lengths.quantile([1 / 3, 2 / 3])
+    df = df.copy()
+    df["_len_bucket"] = pd.cut(
+        lengths,
+        bins=[-1, terciles.iloc[0], terciles.iloc[1], lengths.max() + 1],
+        labels=["short", "medium", "long"],
+    )
+    per_bucket = max(1, n // 3)
+    remainder = n - per_bucket * 3
+    parts: list[pd.DataFrame] = []
+    for bucket in ["short", "medium", "long"]:
+        group = df[df["_len_bucket"] == bucket]
+        take = min(per_bucket, len(group))
+        parts.append(group.sample(n=take, random_state=42))
+    collected = pd.concat(parts)
+    # fill any remaining quota from the full set
+    if len(collected) < n:
+        remaining = df.drop(collected.index)
+        extra = min(n - len(collected), len(remaining))
+        if extra > 0:
+            collected = pd.concat(
+                [collected, remaining.sample(n=extra, random_state=42)]
+            )
+    return collected.drop(columns=["_len_bucket"])
+
+
+_SAMPLERS = {
+    SamplingMethod.random: _sample_random,
+    SamplingMethod.length: _sample_length,
+}
+
 
 @router.post("")
 async def configure_sampling(config: SamplingConfig):
-    """Accept sampling configuration and return a summary."""
+    """Sample records from the loaded dataset."""
+    global sampled_records
+
+    records = uploaded_records.get(config.dataset_id)
+    if not records:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Dataset '{config.dataset_id}' not found.",
+        )
+
+    total = len(records)
+    sample_size = min(config.sample_size, total)
+
+    if sample_size <= 0:
+        raise HTTPException(
+            status_code=400,
+            detail="Sample size must be greater than 0.",
+        )
+
+    df = pd.DataFrame([r.model_dump() for r in records])
+    sampler = _SAMPLERS[config.method]
+    sampled_df = sampler(df, sample_size)
+    sampled_records = [
+        Record(**row) for row in sampled_df.to_dict(orient="records")
+    ]
+
     return {
-        "sample_size": config.sample_size,
-        "method": "stratified_random",
+        "sample_size": len(sampled_records),
+        "total_records": total,
+        "method": config.method.value,
         "status": "ready",
     }
+
+
+@router.get("/records")
+async def get_sampled_records():
+    """Return the current set of sampled records."""
+    return sampled_records
diff --git a/evaluation/ai-assistant/src/app/pages/Anonymization.tsx b/evaluation/ai-assistant/src/app/pages/Anonymization.tsx
index 2077ab2c4..12eda26a9 100644
--- a/evaluation/ai-assistant/src/app/pages/Anonymization.tsx
+++ b/evaluation/ai-assistant/src/app/pages/Anonymization.tsx
@@ -1,11 +1,9 @@
-import { useState, useEffect, useMemo } from 'react';
+import { useMemo } from 'react';
 import { useNavigate } from 'react-router';
 import { Card } from '../components/ui/card';
 import { Button } from '../components/ui/button';
-import { Progress } from '../components/ui/progress';
-import { Badge } from '../components/ui/badge';
 import { Alert, AlertDescription } from '../components/ui/alert';
-import { ArrowRight, Loader2, CheckCircle, Shield, Sparkles, AlertTriangle, Database } from 'lucide-react';
+import { ArrowRight, Shield, Sparkles, Database } from 'lucide-react';
 import type { SetupConfig } from '../types';
 
 export function Anonymization() {
@@ -20,52 +18,8 @@ export function Anonymization() {
     }
   }, []);
 
-  const runPresidio = setupConfig?.runPresidio ?? true;
-  const runLlm = setupConfig?.runLlm ?? true;
   const hasDatasetEntities = setupConfig?.hasDatasetEntities ?? false;
 
-  const [presidioProgress, setPresidioProgress] = useState(runPresidio ? 0 : 100);
-  const [llmProgress, setLlmProgress] = useState(runLlm ? 0 : 100);
-  const [presidioComplete, setPresidioComplete] = useState(!runPresidio);
-  const [llmComplete, setLlmComplete] = useState(!runLlm);
-
-  const isComplete = presidioComplete && llmComplete;
-
-  useEffect(() => {
-    if (!runPresidio && !runLlm) return; // nothing to simulate
-
-    if (runPresidio) {
-      const presidioInterval = setInterval(() => {
-        setPresidioProgress((prev) => {
-          if (prev >= 100) {
-            clearInterval(presidioInterval);
-            setPresidioComplete(true);
-            return 100;
-          }
-          return prev + 2;
-        });
-      }, 50);
-      return () => clearInterval(presidioInterval);
-    }
-  }, [runPresidio]);
-
-  useEffect(() => {
-    if (!runLlm) return;
-    const timer = setTimeout(() => {
-      const llmInterval = setInterval(() => {
-        setLlmProgress((prev) => {
-          if (prev >= 100) {
-            clearInterval(llmInterval);
-            setLlmComplete(true);
-            return 100;
-          }
-          return prev + 1.5;
-        });
-      }, 80);
-    }, runPresidio ? 500 : 0);
-    return () => clearTimeout(timer);
-  }, [runLlm, runPresidio]);
-
   const handleContinue = () => {
     navigate('/human-review');
   };
@@ -75,13 +29,7 @@ export function Anonymization() {
       <div>
         <h2 className="text-2xl font-semibold text-slate-900 mb-2">PII Detection Analysis</h2>
         <p className="text-slate-600">
-          {runPresidio && runLlm
-            ? 'Running Presidio and LLM analysis in parallel to detect PII entities across sampled records.'
-            : runPresidio
-              ? 'Running Presidio analysis to detect PII entities across sampled records.'
-              : runLlm
-                ? 'Running LLM analysis to detect PII entities across sampled records.'
-                : 'Using dataset-provided entities. No additional detection selected.'}
+          Automated PII detection engines will run here once implemented.
         </p>
       </div>
 
@@ -100,249 +48,54 @@ export function Anonymization() {
         </Alert>
       )}
 
-      {/* Important Notice */}
-      {runLlm && (
-      <Alert className="border-amber-200 bg-amber-50">
-        <AlertTriangle className="size-4 text-amber-600" />
-        <AlertDescription>
-          <div className="space-y-1">
-            <div className="font-medium text-amber-900">LLM is Assistive Only</div>
-            <div className="text-sm text-amber-800">
-              The AI Judge may miss entities or lack exact character spans. Its suggestions will be 
-              combined with Presidio results for human review - it does not have final authority.
-            </div>
-          </div>
-        </AlertDescription>
-      </Alert>
-      )}
-
-      {/* Side-by-Side Processing */}
-      {(runPresidio || runLlm) ? (
-      <div className={`grid grid-cols-1 ${runPresidio && runLlm ? 'md:grid-cols-2' : ''} gap-6`}>
-        {/* Presidio Processing */}
-        {runPresidio && (
-        <Card className="p-6">
+      {/* Side-by-Side Cards — greyed out / coming soon */}
+      <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
+        {/* Presidio Processing — not implemented */}
+        <Card className="p-6 opacity-50 pointer-events-none">
           <div className="space-y-6">
-            <div className="flex items-center gap-2">
-              <Shield className="size-6 text-blue-600" />
-              <div>
-                <h3 className="font-semibold text-slate-900">Presidio Anonymization</h3>
-                <p className="text-sm text-slate-600">Baseline configuration v1.2</p>
-              </div>
-            </div>
-
-            <div className="flex items-center justify-center py-8">
-              {!presidioComplete ? (
-                <Loader2 className="size-16 text-blue-600 animate-spin" />
-              ) : (
-                <CheckCircle className="size-16 text-green-600" />
-              )}
-            </div>
-
-            <div className="text-center space-y-2">
-              <p className="font-medium text-slate-900">
-                {!presidioComplete ? 'Processing Records...' : 'Complete'}
-              </p>
-            </div>
-
-            <div className="space-y-2">
-              <div className="flex justify-between text-sm text-slate-600">
-                <span>Progress</span>
-                <span>{presidioProgress}%</span>
+            <div className="flex items-center justify-between">
+              <div className="flex items-center gap-2">
+                <Shield className="size-6 text-slate-400" />
+                <div>
+                  <h3 className="font-semibold text-slate-400">Presidio Analysis</h3>
+                  <p className="text-sm text-slate-400">Baseline PII detection</p>
+                </div>
               </div>
-              <Progress value={presidioProgress} className="h-3" />
+              <span className="text-xs text-slate-400 bg-slate-100 px-2 py-1 rounded">Coming soon</span>
             </div>
 
-            {presidioComplete && (
-              <div className="space-y-3 pt-4 border-t">
-                <div className="grid grid-cols-2 gap-3">
-                  <div className="p-3 bg-blue-50 rounded-lg">
-                    <div className="text-xl font-semibold text-blue-900">500</div>
-                    <div className="text-xs text-blue-700">Records</div>
-                  </div>
-                  <div className="p-3 bg-blue-50 rounded-lg">
-                    <div className="text-xl font-semibold text-blue-900">1,247</div>
-                    <div className="text-xs text-blue-700">Entities</div>
-                  </div>
-                  <div className="p-3 bg-blue-50 rounded-lg">
-                    <div className="text-xl font-semibold text-blue-900">12</div>
-                    <div className="text-xs text-blue-700">Types</div>
-                  </div>
-                  <div className="p-3 bg-blue-50 rounded-lg">
-                    <div className="text-xl font-semibold text-blue-900">91%</div>
-                    <div className="text-xs text-blue-700">Avg. Conf.</div>
-                  </div>
-                </div>
-              </div>
-            )}
+            <p className="text-sm text-slate-400">
+              Run Presidio's rule-based and NLP detection to identify PII entities with precise character spans and confidence scores.
+            </p>
           </div>
         </Card>
-        )}
 
-        {/* LLM Processing */}
-        {runLlm && (
-        <Card className="p-6">
+        {/* LLM Processing — not implemented */}
+        <Card className="p-6 opacity-50 pointer-events-none">
           <div className="space-y-6">
-            <div className="flex items-center gap-2">
-              <Sparkles className="size-6 text-purple-600" />
-              <div>
-                <h3 className="font-semibold text-slate-900">LLM-based PII Judging</h3>
-                <p className="text-sm text-slate-600">Azure OpenAI - GPT-4</p>
-              </div>
-            </div>
-
-            <div className="flex items-center justify-center py-8">
-              {!llmComplete ? (
-                <Loader2 className="size-16 text-purple-600 animate-spin" />
-              ) : (
-                <CheckCircle className="size-16 text-green-600" />
-              )}
-            </div>
-
-            <div className="text-center space-y-2">
-              <p className="font-medium text-slate-900">
-                {!llmComplete ? 'AI Judge Analyzing...' : 'Complete'}
-              </p>
-            </div>
-
-            <div className="space-y-2">
-              <div className="flex justify-between text-sm text-slate-600">
-                <span>Progress</span>
-                <span>{llmProgress}%</span>
+            <div className="flex items-center justify-between">
+              <div className="flex items-center gap-2">
+                <Sparkles className="size-6 text-slate-400" />
+                <div>
+                  <h3 className="font-semibold text-slate-400">LLM Judge</h3>
+                  <p className="text-sm text-slate-400">AI-assisted entity detection</p>
+                </div>
               </div>
-              <Progress value={llmProgress} className="h-3" />
+              <span className="text-xs text-slate-400 bg-slate-100 px-2 py-1 rounded">Coming soon</span>
             </div>
 
-            {llmComplete && (
-              <div className="space-y-3 pt-4 border-t">
-                <div className="grid grid-cols-2 gap-3">
-                  <div className="p-3 bg-purple-50 rounded-lg">
-                    <div className="text-xl font-semibold text-purple-900">500</div>
-                    <div className="text-xs text-purple-700">Records</div>
-                  </div>
-                  <div className="p-3 bg-purple-50 rounded-lg">
-                    <div className="text-xl font-semibold text-purple-900">1,312</div>
-                    <div className="text-xs text-purple-700">Entities</div>
-                  </div>
-                  <div className="p-3 bg-purple-50 rounded-lg">
-                    <div className="text-xl font-semibold text-purple-900">65</div>
-                    <div className="text-xs text-purple-700">Additional</div>
-                  </div>
-                  <div className="p-3 bg-purple-50 rounded-lg">
-                    <div className="text-xl font-semibold text-purple-900">87%</div>
-                    <div className="text-xs text-purple-700">Avg. Conf.</div>
-                  </div>
-                </div>
-              </div>
-            )}
+            <p className="text-sm text-slate-400">
+              Use an LLM to suggest additional PII entities and validate detections. Results will be combined with Presidio output for human review.
+            </p>
           </div>
         </Card>
-        )}
       </div>
-      ) : (
-        <Card className="p-6 border-green-200 bg-green-50">
-          <div className="flex items-center gap-3">
-            <CheckCircle className="size-6 text-green-600" />
-            <div>
-              <h3 className="font-semibold text-green-900">No additional detection needed</h3>
-              <p className="text-sm text-green-800 mt-1">
-                Proceeding with dataset-provided entities only. Continue to human review.
-              </p>
-            </div>
-          </div>
-        </Card>
-      )}
-
-      {/* Combined Results */}
-      {isComplete && (
-        <>
-          <Card className="p-6 border-green-200 bg-green-50">
-            <div className="space-y-4">
-              <div className="flex items-center gap-2">
-                <CheckCircle className="size-5 text-green-700" />
-                <h3 className="font-semibold text-green-900">Analysis Complete - Ready for Human Review</h3>
-              </div>
-
-              <div className="space-y-2">
-                <div className="text-sm font-medium text-green-900">Comparison Summary:</div>
-                <div className="flex flex-wrap gap-2">
-                  <Badge variant="outline" className="bg-white text-green-800 border-green-300">
-                    ✓ 1,182 Matches
-                  </Badge>
-                  <Badge variant="outline" className="bg-amber-50 text-amber-800 border-amber-300">
-                    ⚠ 47 Conflicts
-                  </Badge>
-                  <Badge variant="outline" className="bg-blue-50 text-blue-800 border-blue-300">
-                    + 65 LLM-only
-                  </Badge>
-                  <Badge variant="outline" className="bg-purple-50 text-purple-800 border-purple-300">
-                    − 18 Presidio-only
-                  </Badge>
-                </div>
-              </div>
-            </div>
-          </Card>
-
-          <Card className="p-6">
-            <div className="space-y-4">
-              <h3 className="font-semibold text-slate-900">Detected Entity Types</h3>
-              <div className="flex flex-wrap gap-2">
-                {['PERSON', 'EMAIL', 'PHONE_NUMBER', 'SSN', 'CREDIT_CARD', 'DATE_OF_BIRTH', 
-                  'MEDICAL_RECORD', 'IP_ADDRESS', 'EMPLOYEE_ID', 'ADDRESS', 'ORGANIZATION', 'DATE'].map(type => (
-                  <Badge key={type} variant="secondary" className="bg-slate-100 text-slate-800">
-                    {type}
-                  </Badge>
-                ))}
-              </div>
-            </div>
-          </Card>
-
-          <Card className="p-6">
-            <div className="space-y-3">
-              <h3 className="font-semibold text-slate-900">Output Generated</h3>
-              <div className="grid md:grid-cols-2 gap-4 text-sm text-slate-700">
-                <div className="space-y-2">
-                  <div className="font-medium text-slate-900">Presidio Output:</div>
-                  <div className="flex items-start gap-2">
-                    <CheckCircle className="size-4 text-green-600 mt-0.5 flex-shrink-0" />
-                    <div>Detected entities with precise character positions</div>
-                  </div>
-                  <div className="flex items-start gap-2">
-                    <CheckCircle className="size-4 text-green-600 mt-0.5 flex-shrink-0" />
-                    <div>Anonymized text with PII replaced</div>
-                  </div>
-                  <div className="flex items-start gap-2">
-                    <CheckCircle className="size-4 text-green-600 mt-0.5 flex-shrink-0" />
-                    <div>Confidence scores for each detection</div>
-                  </div>
-                </div>
-                <div className="space-y-2">
-                  <div className="font-medium text-slate-900">LLM Output:</div>
-                  <div className="flex items-start gap-2">
-                    <Sparkles className="size-4 text-purple-600 mt-0.5 flex-shrink-0" />
-                    <div>Suggested entities and types</div>
-                  </div>
-                  <div className="flex items-start gap-2">
-                    <Sparkles className="size-4 text-purple-600 mt-0.5 flex-shrink-0" />
-                    <div>Additional detections for review</div>
-                  </div>
-                  <div className="flex items-start gap-2">
-                    <AlertTriangle className="size-4 text-amber-600 mt-0.5 flex-shrink-0" />
-                    <div>Approximate spans (may need correction)</div>
-                  </div>
-                </div>
-              </div>
-            </div>
-          </Card>
-        </>
-      )}
 
       {/* Actions */}
       <div className="flex justify-end gap-3 pt-4">
         <Button
           size="lg"
           onClick={handleContinue}
-          disabled={!isComplete}
         >
           Continue to Human Review
           <ArrowRight className="size-4 ml-2" />
diff --git a/evaluation/ai-assistant/src/app/pages/Sampling.tsx b/evaluation/ai-assistant/src/app/pages/Sampling.tsx
index 5581b39f5..4f70bdeb1 100644
--- a/evaluation/ai-assistant/src/app/pages/Sampling.tsx
+++ b/evaluation/ai-assistant/src/app/pages/Sampling.tsx
@@ -1,18 +1,73 @@
-import { useState } from 'react';
+import { useState, useMemo } from 'react';
 import { useNavigate } from 'react-router';
 import { Card } from '../components/ui/card';
 import { Button } from '../components/ui/button';
 import { Label } from '../components/ui/label';
 import { Alert, AlertDescription } from '../components/ui/alert';
 import { Slider } from '../components/ui/slider';
-import { ArrowRight, Layers, Info, RefreshCw } from 'lucide-react';
+import { RadioGroup, RadioGroupItem } from '../components/ui/radio-group';
+import { ArrowRight, Layers, Info, RefreshCw, Loader2, Shuffle, Ruler, Brain } from 'lucide-react';
+import { api } from '../lib/api';
+import type { SetupConfig } from '../types';
+
+type SamplingMethod = 'random' | 'length' | 'semantic';
+
+const METHODS: { value: SamplingMethod; label: string; description: string; icon: typeof Shuffle }[] = [
+  {
+    value: 'random',
+    label: 'Random Sampling',
+    description: 'Uniformly random selection using pandas with a fixed seed for reproducibility.',
+    icon: Shuffle,
+  },
+  {
+    value: 'length',
+    label: 'Length-Based Sampling',
+    description: 'Stratified by text length (short / medium / long) so every length bucket is represented equally.',
+    icon: Ruler,
+  },
+];
 
 export function Sampling() {
   const navigate = useNavigate();
-  const [sampleSize, setSampleSize] = useState(500);
 
-  const handleContinue = () => {
-    navigate('/anonymization');
+  const setupConfig = useMemo<SetupConfig | null>(() => {
+    try {
+      const raw = sessionStorage.getItem('setupConfig');
+      return raw ? JSON.parse(raw) : null;
+    } catch {
+      return null;
+    }
+  }, []);
+
+  const datasetRecordCount = useMemo(() => {
+    try {
+      const raw = sessionStorage.getItem('datasetRecordCount');
+      return raw ? parseInt(raw, 10) : 1000;
+    } catch {
+      return 1000;
+    }
+  }, []);
+
+  const maxSampleSize = Math.min(datasetRecordCount, 2000);
+  const defaultSize = Math.min(Math.round(maxSampleSize * 0.5), maxSampleSize);
+
+  const [sampleSize, setSampleSize] = useState(defaultSize);
+  const [samplingMethod, setSamplingMethod] = useState<SamplingMethod>('random');
+  const [loading, setLoading] = useState(false);
+
+  const handleContinue = async () => {
+    if (!setupConfig?.datasetId) return;
+    setLoading(true);
+    try {
+      const timeout = setTimeout(() => navigate('/anonymization'), 8000);
+      await api.sampling.configure(setupConfig.datasetId, sampleSize, samplingMethod);
+      clearTimeout(timeout);
+      navigate('/anonymization');
+    } catch {
+      navigate('/anonymization');
+    } finally {
+      setLoading(false);
+    }
   };
 
   return (
@@ -41,15 +96,15 @@ export function Sampling() {
             <Slider
               value={[sampleSize]}
               onValueChange={(val) => setSampleSize(val[0])}
-              min={100}
-              max={1000}
-              step={50}
+              min={1}
+              max={maxSampleSize}
+              step={Math.max(1, Math.round(maxSampleSize / 100))}
               className="py-4"
             />
 
             <div className="flex justify-between text-sm text-slate-600">
-              <span>100 records</span>
-              <span>1,000 records</span>
+              <span>1 record</span>
+              <span>{maxSampleSize.toLocaleString()} records</span>
             </div>
           </div>
 
@@ -57,8 +112,7 @@ export function Sampling() {
             <Info className="size-4" />
             <AlertDescription>
               <div className="text-sm">
-                Larger samples provide more accurate evaluation metrics but require more manual review time. 
-                We recommend 500-800 records for balanced accuracy and efficiency.
+                Larger samples provide more accurate evaluation metrics but require more manual review time. Choose a size that balances statistical confidence with your available review capacity.
               </div>
             </AlertDescription>
           </Alert>
@@ -73,11 +127,45 @@ export function Sampling() {
             <h3 className="font-semibold text-slate-900">Sampling Method</h3>
           </div>
 
-          <div className="p-4 bg-blue-50 rounded-lg border border-blue-200">
-            <div className="font-medium text-blue-900 mb-2">Stratified Random Sampling</div>
-            <div className="text-sm text-blue-800">
-              Records are randomly selected while maintaining proportional representation across data segments.
-              This ensures statistical validity and repeatability.
+          <RadioGroup
+            value={samplingMethod}
+            onValueChange={(v) => setSamplingMethod(v as SamplingMethod)}
+            className="space-y-3"
+          >
+            {METHODS.map(({ value, label, description, icon: Icon }) => (
+              <label
+                key={value}
+                htmlFor={`method-${value}`}
+                className={`flex items-start gap-4 p-4 rounded-lg border cursor-pointer transition-colors ${
+                  samplingMethod === value
+                    ? 'border-blue-400 bg-blue-50'
+                    : 'border-slate-200 hover:border-slate-300'
+                }`}
+              >
+                <RadioGroupItem value={value} id={`method-${value}`} className="mt-1" />
+                <div className="flex-1">
+                  <div className="flex items-center gap-2">
+                    <Icon className={`size-4 ${samplingMethod === value ? 'text-blue-600' : 'text-slate-500'}`} />
+                    <span className="font-medium text-slate-900">{label}</span>
+                  </div>
+                  <p className="text-sm text-slate-600 mt-1">{description}</p>
+                </div>
+              </label>
+            ))}
+          </RadioGroup>
+
+          {/* Semantic — coming soon */}
+          <div className="flex items-start gap-4 p-4 rounded-lg border border-slate-200 opacity-50 pointer-events-none">
+            <div className="size-4 mt-1 rounded-full border border-slate-300" />
+            <div className="flex-1">
+              <div className="flex items-center gap-2">
+                <Brain className="size-4 text-slate-400" />
+                <span className="font-medium text-slate-400">Semantic Diversity Sampling</span>
+                <span className="text-xs text-slate-400 bg-slate-100 px-2 py-0.5 rounded">Coming soon</span>
+              </div>
+              <p className="text-sm text-slate-400 mt-1">
+                TF-IDF vectorization + greedy max-min distance to maximise topical diversity in the sample.
+              </p>
             </div>
           </div>
         </div>
@@ -109,13 +197,17 @@ export function Sampling() {
         <div className="space-y-2">
           <div className="font-medium text-blue-900">Sample Summary</div>
           <div className="grid grid-cols-2 gap-4 text-sm">
+            <div>
+              <span className="text-blue-700">Dataset:</span>
+              <span className="font-medium text-blue-900 ml-2">{datasetRecordCount.toLocaleString()} total records</span>
+            </div>
             <div>
               <span className="text-blue-700">Sample Size:</span>
               <span className="font-medium text-blue-900 ml-2">{sampleSize} records</span>
             </div>
             <div>
               <span className="text-blue-700">Method:</span>
-              <span className="font-medium text-blue-900 ml-2">Stratified Random</span>
+              <span className="font-medium text-blue-900 ml-2">{METHODS.find(m => m.value === samplingMethod)?.label}</span>
             </div>
           </div>
         </div>
@@ -123,7 +215,8 @@ export function Sampling() {
 
       {/* Actions */}
       <div className="flex justify-end gap-3 pt-4">
-        <Button size="lg" onClick={handleContinue}>
+        <Button size="lg" onClick={handleContinue} disabled={loading}>
+          {loading && <Loader2 className="size-4 mr-2 animate-spin" />}
           Generate Sample & Continue
           <ArrowRight className="size-4 ml-2" />
         </Button>
diff --git a/evaluation/ai-assistant/src/app/pages/Setup.tsx b/evaluation/ai-assistant/src/app/pages/Setup.tsx
index cc0689c43..c752cc387 100644
--- a/evaluation/ai-assistant/src/app/pages/Setup.tsx
+++ b/evaluation/ai-assistant/src/app/pages/Setup.tsx
@@ -104,6 +104,7 @@ export function Setup() {
         hasDatasetEntities: selectedDataset.has_entities,
       };
       sessionStorage.setItem('setupConfig', JSON.stringify(config));
+      sessionStorage.setItem('datasetRecordCount', String(selectedDataset.record_count));
       navigate('/sampling');
     }
   };
@@ -277,42 +278,40 @@ export function Setup() {
                 </div>
               )}
 
-              {/* Detection Options — only when dataset has entities */}
+              {/* Detection Options — not implemented yet */}
               {selectedDataset.has_entities && (
-                <div className="p-4 bg-blue-50 border border-blue-200 rounded-lg space-y-3">
-                  <Label className="text-blue-900 font-medium">Detection Options</Label>
-                  <p className="text-sm text-blue-800">
-                    Your dataset includes pre-identified entities. Choose which additional detection to run:
+                <div className="p-4 bg-slate-50 border border-slate-200 rounded-lg space-y-3 opacity-50 pointer-events-none">
+                  <div className="flex items-center justify-between">
+                    <Label className="text-slate-400 font-medium">Detection Options</Label>
+                    <span className="text-xs text-slate-400 bg-slate-100 px-2 py-1 rounded">Coming soon</span>
+                  </div>
+                  <p className="text-sm text-slate-400">
+                    Your dataset includes pre-identified entities. Additional detection engines will be available soon:
                   </p>
                   <div className="space-y-2">
                     <div className="flex items-center space-x-3">
                       <Checkbox
                         id="run-presidio"
-                        checked={runPresidio}
-                        onCheckedChange={(checked) => setRunPresidio(checked === true)}
+                        checked={false}
+                        disabled
                       />
-                      <Label htmlFor="run-presidio" className="cursor-pointer">
-                        <span className="font-medium">Run Presidio detection</span>
-                        <span className="text-sm text-blue-700 ml-2">— compare against baseline PII detection</span>
+                      <Label htmlFor="run-presidio" className="cursor-default">
+                        <span className="font-medium text-slate-400">Run Presidio detection</span>
+                        <span className="text-sm text-slate-400 ml-2">— compare against baseline PII detection</span>
                       </Label>
                     </div>
                     <div className="flex items-center space-x-3">
                       <Checkbox
                         id="run-llm"
-                        checked={runLlm}
-                        onCheckedChange={(checked) => setRunLlm(checked === true)}
+                        checked={false}
+                        disabled
                       />
-                      <Label htmlFor="run-llm" className="cursor-pointer">
-                        <span className="font-medium">Run LLM detection</span>
-                        <span className="text-sm text-blue-700 ml-2">— AI-assisted entity detection</span>
+                      <Label htmlFor="run-llm" className="cursor-default">
+                        <span className="font-medium text-slate-400">Run LLM detection</span>
+                        <span className="text-sm text-slate-400 ml-2">— AI-assisted entity detection</span>
                       </Label>
                     </div>
                   </div>
-                  {!runPresidio && !runLlm && (
-                    <p className="text-xs text-blue-700">
-                      Only dataset-provided entities will be used for tagging.
-                    </p>
-                  )}
                 </div>
               )}
             </div>