Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion evaluation/ai-assistant/backend/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

from routers import analysis, datasets, decision, evaluation, review, sampling
from routers import analysis, datasets, decision, evaluation, review, sampling, upload

app = FastAPI(title="Presidio Evaluation Flow API", version="0.1.0")

Expand All @@ -13,6 +13,7 @@
)

app.include_router(datasets.router)
app.include_router(upload.router)
app.include_router(sampling.router)
app.include_router(analysis.router)
app.include_router(review.router)
Expand Down
82 changes: 41 additions & 41 deletions evaluation/ai-assistant/backend/mock_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,32 +53,32 @@
"Contact: john.smith@email.com, Phone: 555-0123. SSN: 123-45-6789."
),
presidio_entities=[
Entity(text="John Smith", type="PERSON", start=8, end=18, score=0.95),
Entity(text="03/15/1985", type="DATE_OF_BIRTH", start=24, end=34, score=0.92),
Entity(text="john.smith@email.com", type="EMAIL", start=77, end=97, score=0.98),
Entity(text="555-0123", type="PHONE_NUMBER", start=106, end=114, score=0.89),
Entity(text="123-45-6789", type="US_SSN", start=121, end=132, score=0.99),
Entity(text="John Smith", entity_type="PERSON", start=8, end=18, score=0.95),
Entity(text="03/15/1985", entity_type="DATE_OF_BIRTH", start=24, end=34, score=0.92),
Entity(text="john.smith@email.com", entity_type="EMAIL", start=77, end=97, score=0.98),
Entity(text="555-0123", entity_type="PHONE_NUMBER", start=106, end=114, score=0.89),
Entity(text="123-45-6789", entity_type="US_SSN", start=121, end=132, score=0.99),
],
llm_entities=[
Entity(text="John Smith", type="PERSON", start=8, end=18, score=0.96),
Entity(text="03/15/1985", type="DATE_OF_BIRTH", start=24, end=34, score=0.94),
Entity(text="2025-01-10", type="DATE", start=52, end=62, score=0.88),
Entity(text="john.smith@email.com", type="EMAIL", start=77, end=97, score=0.97),
Entity(text="555-0123", type="PHONE_NUMBER", start=106, end=114, score=0.91),
Entity(text="123-45-6789", type="US_SSN", start=121, end=132, score=0.98),
Entity(text="John Smith", entity_type="PERSON", start=8, end=18, score=0.96),
Entity(text="03/15/1985", entity_type="DATE_OF_BIRTH", start=24, end=34, score=0.94),
Entity(text="2025-01-10", entity_type="DATE", start=52, end=62, score=0.88),
Entity(text="john.smith@email.com", entity_type="EMAIL", start=77, end=97, score=0.97),
Entity(text="555-0123", entity_type="PHONE_NUMBER", start=106, end=114, score=0.91),
Entity(text="123-45-6789", entity_type="US_SSN", start=121, end=132, score=0.98),
],
),
Record(
id="rec-002",
text="Dr. Sarah Johnson reviewed the case. Medical Record #MR-445521. Insurance Policy: POL-8821-USA.",
presidio_entities=[
Entity(text="Sarah Johnson", type="PERSON", start=4, end=17, score=0.93),
Entity(text="MR-445521", type="MEDICAL_RECORD", start=55, end=64, score=0.87),
Entity(text="Sarah Johnson", entity_type="PERSON", start=4, end=17, score=0.93),
Entity(text="MR-445521", entity_type="MEDICAL_RECORD", start=55, end=64, score=0.87),
],
llm_entities=[
Entity(text="Dr. Sarah Johnson", type="PERSON", start=0, end=17, score=0.95),
Entity(text="MR-445521", type="MEDICAL_RECORD", start=55, end=64, score=0.89),
Entity(text="POL-8821-USA", type="INSURANCE_POLICY", start=84, end=96, score=0.82),
Entity(text="Dr. Sarah Johnson", entity_type="PERSON", start=0, end=17, score=0.95),
Entity(text="MR-445521", entity_type="MEDICAL_RECORD", start=55, end=64, score=0.89),
Entity(text="POL-8821-USA", entity_type="INSURANCE_POLICY", start=84, end=96, score=0.82),
],
),
Record(
Expand All @@ -88,19 +88,19 @@
"Salary: $85,000. Emergency contact: Mike Doe at 555-9876."
),
presidio_entities=[
Entity(text="EMP-8821", type="EMPLOYEE_ID", start=13, end=21, score=0.91),
Entity(text="Jane Doe", type="PERSON", start=23, end=31, score=0.94),
Entity(text="2023-06-01", type="DATE", start=41, end=51, score=0.96),
Entity(text="Mike Doe", type="PERSON", start=89, end=97, score=0.92),
Entity(text="555-9876", type="PHONE_NUMBER", start=101, end=109, score=0.88),
Entity(text="EMP-8821", entity_type="EMPLOYEE_ID", start=13, end=21, score=0.91),
Entity(text="Jane Doe", entity_type="PERSON", start=23, end=31, score=0.94),
Entity(text="2023-06-01", entity_type="DATE", start=41, end=51, score=0.96),
Entity(text="Mike Doe", entity_type="PERSON", start=89, end=97, score=0.92),
Entity(text="555-9876", entity_type="PHONE_NUMBER", start=101, end=109, score=0.88),
],
llm_entities=[
Entity(text="EMP-8821", type="EMPLOYEE_ID", start=13, end=21, score=0.90),
Entity(text="Jane Doe", type="PERSON", start=23, end=31, score=0.96),
Entity(text="2023-06-01", type="DATE", start=41, end=51, score=0.94),
Entity(text="$85,000", type="SALARY", start=61, end=68, score=0.79),
Entity(text="Mike Doe", type="PERSON", start=89, end=97, score=0.93),
Entity(text="555-9876", type="PHONE_NUMBER", start=101, end=109, score=0.90),
Entity(text="EMP-8821", entity_type="EMPLOYEE_ID", start=13, end=21, score=0.90),
Entity(text="Jane Doe", entity_type="PERSON", start=23, end=31, score=0.96),
Entity(text="2023-06-01", entity_type="DATE", start=41, end=51, score=0.94),
Entity(text="$85,000", entity_type="SALARY", start=61, end=68, score=0.79),
Entity(text="Mike Doe", entity_type="PERSON", start=89, end=97, score=0.93),
Entity(text="555-9876", entity_type="PHONE_NUMBER", start=101, end=109, score=0.90),
],
),
Record(
Expand All @@ -110,13 +110,13 @@
"Customer: alice.wong@company.com. IP: 192.168.1.100"
),
presidio_entities=[
Entity(text="4532", type="CREDIT_CARD", start=22, end=26, score=0.65),
Entity(text="alice.wong@company.com", type="EMAIL", start=64, end=86, score=0.97),
Entity(text="192.168.1.100", type="IP_ADDRESS", start=92, end=105, score=0.99),
Entity(text="4532", entity_type="CREDIT_CARD", start=22, end=26, score=0.65),
Entity(text="alice.wong@company.com", entity_type="EMAIL", start=64, end=86, score=0.97),
Entity(text="192.168.1.100", entity_type="IP_ADDRESS", start=92, end=105, score=0.99),
],
llm_entities=[
Entity(text="alice.wong@company.com", type="EMAIL", start=64, end=86, score=0.98),
Entity(text="192.168.1.100", type="IP_ADDRESS", start=92, end=105, score=0.97),
Entity(text="alice.wong@company.com", entity_type="EMAIL", start=64, end=86, score=0.98),
Entity(text="192.168.1.100", entity_type="IP_ADDRESS", start=92, end=105, score=0.97),
],
),
Record(
Expand All @@ -126,13 +126,13 @@
"Doctor notes indicate history of diabetes."
),
presidio_entities=[
Entity(text="Robert Chen", type="PERSON", start=17, end=28, score=0.94),
Entity(text="ABC-123", type="MEDICATION_CODE", start=41, end=48, score=0.71),
Entity(text="Robert Chen", entity_type="PERSON", start=17, end=28, score=0.94),
Entity(text="ABC-123", entity_type="MEDICATION_CODE", start=41, end=48, score=0.71),
],
llm_entities=[
Entity(text="Robert Chen", type="PERSON", start=17, end=28, score=0.95),
Entity(text="ABC-123", type="MEDICATION_CODE", start=41, end=48, score=0.73),
Entity(text="diabetes", type="MEDICAL_CONDITION", start=97, end=105, score=0.86),
Entity(text="Robert Chen", entity_type="PERSON", start=17, end=28, score=0.95),
Entity(text="ABC-123", entity_type="MEDICATION_CODE", start=41, end=48, score=0.73),
Entity(text="diabetes", entity_type="MEDICAL_CONDITION", start=97, end=105, score=0.86),
],
),
]
Expand Down Expand Up @@ -189,15 +189,15 @@
EntityMiss(
record_id="rec-004",
record_text="Credit card ending in 4532 was used for transaction. Customer: alice.wong@company.com.",
missed_entity=Entity(text="4532", type="CREDIT_CARD", start=22, end=26, score=0.65),
missed_entity=Entity(text="4532", entity_type="CREDIT_CARD", start=22, end=26, score=0.65),
miss_type=MissType.false_negative,
entity_type="CREDIT_CARD",
risk_level=RiskLevel.high,
),
EntityMiss(
record_id="rec-002",
record_text="Dr. Sarah Johnson reviewed the case. Insurance Policy: POL-8821-USA.",
missed_entity=Entity(text="POL-8821-USA", type="INSURANCE_POLICY", start=56, end=68),
missed_entity=Entity(text="POL-8821-USA", entity_type="INSURANCE_POLICY", start=56, end=68),
miss_type=MissType.false_negative,
entity_type="INSURANCE_POLICY",
risk_level=RiskLevel.medium,
Expand All @@ -208,15 +208,15 @@
"Prescription for Robert Chen: Medication ABC-123, dosage 50mg. "
"Doctor notes indicate history of diabetes."
),
missed_entity=Entity(text="diabetes", type="MEDICAL_CONDITION", start=97, end=105),
missed_entity=Entity(text="diabetes", entity_type="MEDICAL_CONDITION", start=97, end=105),
miss_type=MissType.false_negative,
entity_type="MEDICAL_CONDITION",
risk_level=RiskLevel.high,
),
EntityMiss(
record_id="rec-003",
record_text="Employee ID: EMP-8821, Jane Doe, started 2023-06-01. Salary: $85,000.",
missed_entity=Entity(text="$85,000", type="SALARY", start=61, end=68),
missed_entity=Entity(text="$85,000", entity_type="SALARY", start=61, end=68),
miss_type=MissType.false_negative,
entity_type="SALARY",
risk_level=RiskLevel.medium,
Expand Down
25 changes: 22 additions & 3 deletions evaluation/ai-assistant/backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Dataset(BaseModel):

class Entity(BaseModel):
text: str
type: str
entity_type: str
start: int
end: int
score: Optional[float] = None
Expand All @@ -38,8 +38,9 @@ class Entity(BaseModel):
class Record(BaseModel):
id: str
text: str
presidio_entities: list[Entity]
llm_entities: list[Entity]
presidio_entities: list[Entity] = []
llm_entities: list[Entity] = []
dataset_entities: list[Entity] = []
golden_entities: Optional[list[Entity]] = None


Expand Down Expand Up @@ -84,10 +85,28 @@ class EntityMiss(BaseModel):
# --- Request / Response models ---


class DatasetLoadRequest(BaseModel):
path: str
format: str # "csv" | "json"
text_column: str = "text"
entities_column: str | None = None


class UploadedDataset(BaseModel):
id: str
filename: str
format: str # "csv" | "json"
record_count: int
has_entities: bool
columns: list[str]


class SetupConfig(BaseModel):
dataset_id: str
compliance_frameworks: list[ComplianceFramework]
cloud_restriction: str # "allowed" | "restricted"
run_presidio: bool = True
run_llm: bool = True


class SamplingConfig(BaseModel):
Expand Down
1 change: 1 addition & 0 deletions evaluation/ai-assistant/backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ python = "^3.9"
fastapi = ">=0.115.0"
uvicorn = { version = ">=0.32.0", extras = ["standard"] }
pydantic = ">=2.0.0"
python-multipart = ">=0.0.9"

[build-system]
requires = ["poetry-core"]
Expand Down
Loading