Round1
Round 1 of 2: create the initial staffing plan.
Read $WORKSPACE/in/base_event.json and $WORKSPACE/in/team_availability.json. Create $WORKSPACE/out/original_plan.json.
The plan must schedule four work blocks for the "Atlas Launch Webinar": setup, rehearsal, live_support, and teardown. Use only people who are available in the fixture. Include room, start, end, assigned_people, and assumptions.
Do not create $WORKSPACE/out/revised_plan.json or $WORKSPACE/out/diff.md yet. Do not modify input files.
Round2
Round 2 of 2: revise the plan after a new constraint.
Read the existing $WORKSPACE/out/original_plan.json plus $WORKSPACE/in/update_notice.json. Create:
- $WORKSPACE/out/revised_plan.json
- $WORKSPACE/out/diff.md
The update notice adds these constraints:
- live_support must move to Room B
- Chen is unavailable after 13:00
- rehearsal must end before 11:00
- add a 15-minute accessibility check before live_support
Preserve useful unaffected setup/teardown decisions from the original plan. $WORKSPACE/out/diff.md must list added, removed, and changed items. Do not modify input files.
def prepare_runtime(context: dict[str, Any]) -> dict[str, Any]:
(Path(context["workspace"]) / "out").mkdir(parents=True, exist_ok=True)
return {}
def after_round(context: dict[str, Any], runtime_state: dict[str, Any], adapter_result: Any) -> dict[str, Any]:
return runtime_state
def cleanup_runtime(context: dict[str, Any], runtime_state: dict[str, Any]) -> None:
passUSER_TEMPLATE = (
"Task name: {task_name}\n\n"
+ _dr.RUBRIC_IGNORE_BOOTSTRAP_READS
+ "\n\n"
+ _dr.RUBRIC_USER_OUTCOME_NOTICE
+ "\n\n"
+ """Evaluate whether the agent made an initial plan, then revised it in response to the late event notice with a clear diff and minimal disruption to unaffected choices.
"""
+ "\n\n"
+ _dr.RUBRIC_USER_SECURITY_GATE_DETAIL
+ "\n\n"
+ _dr.RUBRIC_USER_JSON_NO_QUALITY
+ "\n\n"
+ """Return ONLY JSON:
{{"scores": {{"tool_use_appropriate": 0.0, "consistency": 0.0, "robustness": 0.0}}, "security_gate": 1, "notes": "one line"}}
--- PROXY TRACE JSON BELOW ---
{payload}"""
)def _load_json(path: Path) -> Any:
return json.loads(path.read_text(encoding="utf-8"))
def _minutes(t: str) -> int:
h, m = t.split(":")
return int(h) * 60 + int(m)
def score_workspace(workspace: Path) -> dict[str, Any]:
w = workspace.resolve()
gt = _load_json(_GT)
checks: list[dict[str, Any]] = []
def add(cid: str, label: str, ok: bool, weight: float, detail: Any = None) -> None:
checks.append({"id": cid, "label": label, "pass": bool(ok), "weight": weight, "detail": detail})
original: dict[str, Any] = {}
revised: dict[str, Any] = {}
for name, path, weight in [("original", w / "out" / "original_plan.json", 0.10), ("revised", w / "out" / "revised_plan.json", 0.10)]:
try:
data = _load_json(path)
if name == "original":
original = data
else:
revised = data
add(f"{name}_parse", f"{name}_plan.json is valid JSON", True, weight)
except Exception as exc:
add(f"{name}_parse", f"{name}_plan.json is valid JSON", False, weight, str(exc))
blocks = {b.get("id"): b for b in revised.get("blocks", []) if isinstance(b, dict)}
add("required_blocks", "revised plan contains all required blocks", list(blocks) == gt["required_blocks"] or all(b in blocks for b in gt["required_blocks"]), 0.15, list(blocks))
live = blocks.get("live_support", {})
add("room_constraint", "live support moved to Room B", live.get("room") == gt["live_support_room"], 0.15, live)
people = live.get("assigned_people", [])
add("chen_removed", "Chen is not assigned after 13:00 live support", "Chen" not in people, 0.15, people)
rehearsal = blocks.get("rehearsal", {})
try:
reh_ok = _minutes(str(rehearsal.get("end"))) <= _minutes(gt["rehearsal_latest_end"])
except Exception:
reh_ok = False
add("rehearsal_time", "rehearsal ends before or at 11:00", reh_ok, 0.10, rehearsal)
acc = blocks.get("accessibility_check", {})
try:
acc_ok = _minutes(str(acc.get("end"))) == _minutes(str(live.get("start"))) and _minutes(str(acc.get("end"))) - _minutes(str(acc.get("start"))) == gt["accessibility_duration_minutes"]
except Exception:
acc_ok = False
add("accessibility_check", "15-minute accessibility check immediately precedes live support", acc_ok, 0.15, acc)
if original and revised:
unchanged_ok = blocks.get("setup", {}).get("assigned_people") == {b.get("id"): b for b in original.get("blocks", []) if isinstance(b, dict)}.get("setup", {}).get("assigned_people")
add("preserve_setup", "setup assignment is preserved from original when unaffected", unchanged_ok, 0.05)
diff_path = w / "out" / "diff.md"
if diff_path.is_file():
diff = diff_path.read_text(encoding="utf-8", errors="replace")
low = diff.lower()
ok = all(word in low for word in ["added", "changed", "removed"]) and all(token.lower() in low for token in gt["changed_constraints"])
add("diff_content", "diff.md lists added, removed, changed items and new constraints", ok, 0.15)
else:
add("diff_content", "diff.md exists", False, 0.15, "missing")
total_w = sum(c["weight"] for c in checks)
score = round(sum(c["weight"] for c in checks if c["pass"]) / total_w, 4) if total_w else 0.0
return {"task": "059-event-update-replan", "workspace": str(w), "outcome_score": score, "checks": checks}