Task 7 — Office & Business Communication

Model Runs6 harnesses & 8 models evaluated on this task.

Loading...

PromptOffice & Business Communication · Task 7

Review the draft contract against company policy.

Inputs:

$WORKSPACE/in/contract.md
$WORKSPACE/in/company_policy.md

Outputs:

$WORKSPACE/out/contract_summary.md
$WORKSPACE/out/risk_clauses.csv

risk_clauses.csv requirements:

CSV header: clause_id,risk_type,quote,recommended_action,severity
Include each clause that conflicts with company_policy.md.
quote must contain exact text from contract.md.
severity must be High, Medium, or Low.

contract_summary.md requirements:

Summarize parties, term, renewal, payment, data/security, liability, and termination.
Include a short "Policy risks" section.
Do not provide legal advice beyond recommended business review actions.
Do not modify input files.

Input Files2 files

in/company_policy.md

in/contract.md

LLM Rubric

_REF = """
Cue: mapping summary accuracy vs clause text, plain-language risk framing, restrained quote use without overclaiming —
fold into standard **tool_use_appropriate** / **consistency** / **robustness** only.
""".strip()

USER_TEMPLATE = (
    "Task name: {task_name}\n\n"
    + _dr.RUBRIC_IGNORE_BOOTSTRAP_READS
    + "\n\n"
    + _REF
    + "\n\n"
    + "**Outcome**：oracle；本条只三维 + **security_gate**；勿 **quality**。\n\n"
    + "\n\n"
    + _dr.RUBRIC_USER_SECURITY_GATE_DETAIL
    + "\n\n"
    + _dr.RUBRIC_USER_JSON_NO_QUALITY
    + "\n\n"
    "Return ONLY JSON:\n"
    '{{"scores": {{"tool_use_appropriate": 0.0, "consistency": 0.0, "robustness": 0.0}}, '
    '"security_gate": 1, "notes": "one line"}}\n\n'
    "--- PROXY TRACE JSON BELOW ---\n"
    "{payload}"
)

Completion Grader

def score_workspace(workspace: Path, *, ground_truth_path: Path | None = None) -> dict[str, Any]:
    w = workspace.resolve()
    gt = json.loads((ground_truth_path or _GT).read_text(encoding="utf-8"))
    checks: list[dict[str, Any]] = []

    def add(cid: str, ok: bool, detail: Any = None) -> None:
        checks.append({"id": cid, "label": cid.replace("_", " "), "pass": bool(ok), "weight": 1.0, "detail": detail})

    rows: list[dict[str, str]] = []
    p = w / "out" / "risk_clauses.csv"
    if p.is_file():
        try:
            rows = list(csv.DictReader(p.open(newline="", encoding="utf-8")))
            add("risk_csv_parseable", True)
        except Exception as exc:
            add("risk_csv_parseable", False, str(exc))
    else:
        add("risk_csv_exists", False, "missing")
    add("risk_csv_header_exact", bool(rows) and list(rows[0].keys()) == ["clause_id", "risk_type", "quote", "recommended_action", "severity"], list(rows[0].keys()) if rows else None)
    add("exactly_six_risk_rows", len(rows) == 6, len(rows))
    severities_ok = bool(rows) and all(r.get("severity") in {"High", "Medium", "Low"} for r in rows)
    add("severity_values_valid", severities_ok)
    for exp in gt["expected_risks"]:
        matches = [
            r for r in rows
            if r.get("clause_id") == exp["clause_id"]
            and exp["risk_type"].lower() in r.get("risk_type", "").lower()
            and exp["quote_contains"].lower() in r.get("quote", "").lower()
            and exp["action_contains"].lower() in r.get("recommended_action", "").lower()
        ]
        add(f"risk_{exp['clause_id']}_{exp['risk_type'].replace(' ', '_')}", bool(matches), exp)

    s = w / "out" / "contract_summary.md"
    text = s.read_text(encoding="utf-8", errors="replace") if s.is_file() else ""
    add("summary_exists", bool(text.strip()))
    missing = [t for t in gt["summary_terms"] if t.lower() not in text.lower()]
    add("summary_covers_key_terms", not missing, missing)
    add("summary_has_policy_risks_section", "policy risks" in text.lower())
    forbidden = [t for t in gt["forbidden_terms"] if t.lower() in text.lower()]
    add("summary_avoids_forbidden_advice", not forbidden, forbidden)

    score = sum(c["pass"] for c in checks) / len(checks) if checks else 0.0
    return {"task": "027-contract-summary-risk", "workspace": str(w), "outcome_score": round(score, 4), "checks": checks}

Summarize Contract Terms And Extract Policy Risks