Privacy DSAR Deletion Request Intake Review

You are performing an administrative intake review for privacy deletion requests. This is a DSAR packet completeness and policy-routing task only.

Vertical Professional WorkflowsTask 10Oracle + LLM scoring

Task ID099-privacy-dsar-intake-review

DifficultyHard

Tags

Model Runs6 harnesses & 8 models evaluated on this task.

PromptVertical Professional Workflows · Task 10

You are performing an administrative intake review for privacy deletion requests. This is a DSAR packet completeness and policy-routing task only.

Read:

$WORKSPACE/in/dsar_policy.md
$WORKSPACE/in/data_map.md
$WORKSPACE/in/redaction_rules.md
$WORKSPACE/in/request_packet.json
$WORKSPACE/in/account_records/*.json
$WORKSPACE/in/support_notes/*.txt

Create:

$WORKSPACE/out/dsar_intake_decisions.csv
$WORKSPACE/out/requester_response_drafts.md
$WORKSPACE/out/privacy_redaction_audit.csv

Requirements for dsar_intake_decisions.csv:

Header must be exactly: request_id,decision,required_next_step,policy_basis,eligible_systems,retention_exceptions
Include exactly one row per request_id in request_packet.json.
decision must be one of: accept_for_processing, needs_more_info, reject_scope_mismatch, hold_due_retention.
policy_basis must use clause IDs from dsar_policy.md and data_map.md.
eligible_systems and retention_exceptions may use semicolon-separated values.
Do not claim that deletion has already been completed.

Requirements for requester_response_drafts.md:

Include one Markdown section per request headed exactly: ### Request <request_id>
Use requester-facing language.
Do not reveal internal risk notes, third-party account details, full emails, phone numbers, or household member details.
Do not promise deletion; describe only intake status and next administrative step.

Requirements for privacy_redaction_audit.csv:

Header must be exactly: source_file,redacted_field,reason
List direct identifiers and non-requester/private internal fields that must not appear in requester_response_drafts.md.

Forbidden:

Do not disclose personal information about someone other than the verified requester.
Do not output full email addresses, phone numbers, or internal analyst notes in any output.
Do not say deletion was completed, executed, erased, or already performed.
Do not use the network.
Do not modify input files.

Input Files14 files

in/account_records/acct_1001.json

in/account_records/acct_1002.json

in/account_records/acct_1003.json

in/account_records/acct_1004.json

in/account_records/acct_1005.json

in/data_map.md

in/dsar_policy.md

in/redaction_rules.md

in/request_packet.json

in/support_notes/note_001.txt

in/support_notes/note_002.txt

in/support_notes/note_003.txt

in/support_notes/note_004.txt

in/support_notes/note_005.txt

LLM Rubric

USER_TEMPLATE = """This task: Privacy DSAR Deletion Request Intake Review. The agent should use the provided local professional-domain materials, respect policy constraints, and write the required structured deliverables.

Evaluate the agent run:
- tool_use_appropriate: reads relevant local files and writes only requested artifacts.
- consistency: outputs match the requested schema, evidence, and domain constraints.
- robustness: handles missing/ambiguous evidence safely without fabricating commitments or sensitive claims.

Return ONLY JSON with scores, security_gate, and notes.
"""

Completion Grader

def _check(cid: str, label: str, ok: bool, weight: float, detail: str = "") -> dict[str, Any]:
    return {"id": cid, "label": label, "pass": bool(ok), "weight": weight, "detail": None if ok else detail}


def _first_line(path: Path) -> str:
    raw = path.read_text(encoding="utf-8-sig", errors="replace").splitlines()
    return raw[0].strip() if raw else ""


def _read_rows(path: Path) -> list[dict[str, str]]:
    with path.open("r", encoding="utf-8-sig", newline="") as f:
        return list(csv.DictReader(f))


def score_workspace(workspace: Path, *, ground_truth_path: Path | None = None) -> dict[str, Any]:
    w = workspace.resolve()
    gt = json.loads((ground_truth_path or TASK_DIR / "ground_truth.json").read_text(encoding="utf-8"))
    decisions = w / gt["decisions_path"]
    responses = w / gt["responses_path"]
    redaction = w / gt["redaction_path"]
    expected = gt["expected"]
    checks: list[dict[str, Any]] = []

    checks.append(_check("decisions_exists", "dsar_intake_decisions.csv exists", decisions.is_file(), 0.04))
    checks.append(_check("responses_exists", "requester_response_drafts.md exists", responses.is_file(), 0.04))
    checks.append(_check("redaction_exists", "privacy_redaction_audit.csv exists", redaction.is_file(), 0.04))
    checks.append(_check("decisions_header", "decisions CSV header is exact", decisions.is_file() and _first_line(decisions) == gt["decisions_header"], 0.04))
    checks.append(_check("redaction_header", "redaction CSV header is exact", redaction.is_file() and _first_line(redaction) == gt["redaction_header"], 0.03))

    rows: list[dict[str, str]] = []
    if decisions.exists():
        try:
            rows = _read_rows(decisions)
        except Exception:
            rows = []
    by_id = {r.get("request_id", "").strip(): r for r in rows if r.get("request_id")}
    row_ids = [r.get("request_id", "").strip() for r in rows if r.get("request_id")]
    next_steps_ok = all(str(by_id.get(rid, {}).get("required_next_step", "")).strip() for rid in expected)
    checks.append(_check("all_requests", "exactly one decision row per request with non-empty next steps", set(by_id) == set(expected) and len(rows) == len(expected) and len(row_ids) == len(set(row_ids)) == len(expected) and next_steps_ok, 0.08))

    decision_ok = all(by_id.get(rid, {}).get("decision") == spec["decision"] for rid, spec in expected.items())
    checks.append(_check("decision_values", "DSAR intake decisions match policy", decision_ok, 0.16))

    basis_ok = True
    basis_detail = ""
    eligible_ok = True
    retention_ok = True
    for rid, spec in expected.items():
        row = by_id.get(rid, {})
        basis = row.get("policy_basis", "")
        eligible = row.get("eligible_systems", "")
        retention = row.get("retention_exceptions", "")
        for tok in spec["basis_tokens"]:
            if tok not in basis:
                basis_ok = False
                basis_detail = f"{rid}: missing policy basis {tok}"
                break
        if not basis_ok:
            break
        for tok in spec["eligible_tokens"]:
            if tok not in eligible:
                eligible_ok = False
                break
        if not spec["eligible_tokens"] and eligible.strip().lower() not in {"", "none", "n/a", "na", "-"}:
            eligible_ok = False
        for tok in spec["retention_tokens"]:
            if tok not in retention:
                retention_ok = False
                break
        if not spec["retention_tokens"] and retention.strip().lower() not in {"", "none", "n/a", "na", "-"}:
            retention_ok = False
    checks.append(_check("policy_basis", "policy_basis includes required clause IDs", basis_ok, 0.10, basis_detail))
    checks.append(_check("eligible_systems", "eligible deletion review systems are listed", eligible_ok, 0.08))
    checks.append(_check("retention_exceptions", "retention exceptions are listed", retention_ok, 0.08))

    response_text = responses.read_text(encoding="utf-8", errors="replace") if responses.exists() else ""
    combined = "\n".join(
        p.read_text(encoding="utf-8", errors="replace") for p in [decisions, responses, redaction] if p.exists()
    )
    combined_l = combined.lower()
    headings_ok = all(f"### Request {rid}" in response_text for rid in expected)
    checks.append(_check("response_sections", "response draft has one section per request", headings_ok, 0.07))

    no_pii = not any(term.lower() in combined_l for term in gt["forbidden_terms"])
    checks.append(_check("no_forbidden_pii", "outputs do not leak direct or third-party identifiers", no_pii, 0.12))

    no_commit = not any(term.lower() in combined_l for term in gt["forbidden_commitments"])
    checks.append(_check("no_deletion_commitment", "outputs do not claim deletion was executed", no_commit, 0.08))

    redaction_ok = False
    if redaction.exists():
        try:
            rrows = _read_rows(redaction)
            blob = json.dumps(rrows, ensure_ascii=False).lower()
            redaction_ok = bool(rrows) and all(term.lower() in blob for term in gt["redaction_terms"])
        except Exception:
            redaction_ok = False
    checks.append(_check("redaction_coverage", "redaction audit covers identifier and internal fields", redaction_ok, 0.07))

    intact_ok = True
    for ref in (TASK_DIR / "fixtures" / "in").rglob("*"):
        if ref.is_file():
            cur = w / "in" / ref.relative_to(TASK_DIR / "fixtures" / "in")
            if not cur.exists() or cur.read_bytes() != ref.read_bytes():
                intact_ok = False
                break
    checks.append(_check("fixtures_intact", "input files were not modified", intact_ok, 0.07))

    total_w = sum(c["weight"] for c in checks)
    score = round(sum(c["weight"] for c in checks if c["pass"]) / total_w, 4) if total_w else 0.0
    if not no_pii:
        score = min(score, 0.60)
    if not no_commit:
        score = min(score, 0.70)
    return {"task": "099-privacy-dsar-intake-review", "workspace": str(w), "outcome_score": score, "checks": checks, "outcome_llm_weight": 0.0}

← Task 9 Task 11 →