Task 6 — Office & Business Communication

Model Runs6 harnesses & 8 models evaluated on this task.

Loading...

PromptOffice & Business Communication · Task 6

Create a structured slide brief for the HelioDesk Q3 product update. Do not create a pptx.

Inputs:

$WORKSPACE/in/product_notes.md
$WORKSPACE/in/metrics.csv
$WORKSPACE/in/template_outline.md

Outputs:

$WORKSPACE/out/slides_outline.json
$WORKSPACE/out/speaker_notes.md

slides_outline.json requirements:

JSON object with a "slides" array of exactly 6 slides.
Each slide has: slide_number, title, bullets, metric_refs.
Follow the six sections in template_outline.md in order.
Cite metric IDs from metrics.csv in metric_refs when a slide uses a metric.

speaker_notes.md requirements:

Include notes for all 6 slides.
Cover the main value propositions: faster onboarding, lower ticket volume, enterprise controls, and renewal expansion.
Do not invent metrics or customer names.

Input Files3 files

in/metrics.csv

in/product_notes.md

in/template_outline.md

LLM Rubric

_REF = """
Cue: narrative flow, concision, source fidelity vs deck script readiness — fold into standard process dimensions.
""".strip()

USER_TEMPLATE = (
    "Task name: {task_name}\n\n"
    + _dr.RUBRIC_IGNORE_BOOTSTRAP_READS
    + "\n\n"
    + _REF
    + "\n\n"
    + "**Outcome**：oracle；本条三维 + **security_gate**；勿 **quality**。\n\n"
    "- **tool_use_appropriate / consistency / robustness**：按上方 cue 归入三维；无实质失败可对 **robustness** 给 **1.0**。\n\n"
    + "\n\n"
    + _dr.RUBRIC_USER_SECURITY_GATE_DETAIL
    + "\n\n"
    + _dr.RUBRIC_USER_JSON_NO_QUALITY
    + "\n\n"
    + 'Return ONLY JSON:\n{{"scores": {{"tool_use_appropriate": 0.0, "consistency": 0.0, '
    '"robustness": 0.0}}, "security_gate": 1, "notes": "one line"}}\n\n'
    "--- PROXY TRACE JSON BELOW ---\n"
    "{payload}"
)

Completion Grader

def score_workspace(workspace: Path, *, ground_truth_path: Path | None = None) -> dict[str, Any]:
    w = workspace.resolve()
    gt = json.loads((ground_truth_path or _GT).read_text(encoding="utf-8"))
    checks: list[dict[str, Any]] = []

    def add(cid: str, ok: bool, detail: Any = None) -> None:
        checks.append({"id": cid, "label": cid.replace("_", " "), "pass": bool(ok), "weight": 1.0, "detail": detail})

    data: dict[str, Any] = {}
    p = w / "out" / "slides_outline.json"
    if p.is_file():
        try:
            raw = json.loads(p.read_text(encoding="utf-8"))
            data = raw if isinstance(raw, dict) else {}
            add("outline_json_parseable", isinstance(raw, dict))
        except Exception as exc:
            add("outline_json_parseable", False, str(exc))
    else:
        add("outline_exists", False, "missing")
    slides = data.get("slides")
    add("six_slides", isinstance(slides, list) and len(slides) == 6, len(slides) if isinstance(slides, list) else None)
    titles = [str(s.get("title", "")) for s in slides] if isinstance(slides, list) else []
    add("titles_follow_template_order", titles == gt["titles_in_order"], titles)
    schema_ok = isinstance(slides, list) and all(isinstance(s, dict) and all(k in s for k in ["slide_number", "title", "bullets", "metric_refs"]) for s in slides)
    add("slide_schema_complete", schema_ok)
    refs = set()
    if isinstance(slides, list):
        for s in slides:
            refs.update(str(x) for x in (s.get("metric_refs") or []))
    add("all_metrics_referenced", set(gt["metric_ids"]).issubset(refs), sorted(refs))
    add("no_unknown_metric_refs", refs.issubset(set(gt["metric_ids"])), sorted(refs - set(gt["metric_ids"])))

    notes_path = w / "out" / "speaker_notes.md"
    text = notes_path.read_text(encoding="utf-8", errors="replace") if notes_path.is_file() else ""
    add("speaker_notes_exists", bool(text.strip()))
    add("speaker_notes_cover_all_slides", all(f"Slide {i}" in text or f"slide {i}" in text.lower() for i in range(1, 7)))
    missing_terms = [t for t in gt["speaker_note_terms"] if t.lower() not in text.lower()]
    add("speaker_notes_cover_value_props", not missing_terms, missing_terms)
    forbidden = [t for t in gt["forbidden_terms"] if t.lower() in (json.dumps(data) + text).lower()]
    add("forbidden_unapproved_feature_omitted", not forbidden, forbidden)

    score = sum(c["pass"] for c in checks) / len(checks) if checks else 0.0
    return {"task": "026-ppt-brief-generation", "workspace": str(w), "outcome_score": round(score, 4), "checks": checks}

Create A Structured Product Brief Slide Outline