Use the local REST API provided in the MOCK_API_BASE environment variable. Do not use the public internet.
Available endpoints:
- GET $MOCK_API_BASE/projects?page=N
- GET $MOCK_API_BASE/users?page=N
- GET $MOCK_API_BASE/incidents?page=N
Each endpoint is paginated and returns JSON with items and next_page. Some first requests may return HTTP 429 or 503; retry locally with a short backoff.
Task:
Fetch all pages from all three endpoints, join the data, and write:
- $WORKSPACE/out/api_summary.json
- $WORKSPACE/out/data_quality_report.md
Required JSON format:
{
"source_base_url": "the exact base URL you used",
"project_count": 4,
"active_budget_usd": 26800,
"open_incidents": 5,
"projects": [
{
"id": "p-100",
"name": "Atlas Migration",
"owner_name": "Mira Chen",
"team": "Platform",
"status": "active",
"budget_usd": 12500,
"open_incident_count": 1,
"highest_open_severity": "high"
}
]
}
Rules:
- Sort projects by id ascending.
- active_budget_usd is the sum of budget_usd for projects whose status is "active".
- open_incidents counts only incidents where open is true.
- highest_open_severity ranks high > medium > low; use null if a project has no open incidents.
- Save only the summary JSON. Do not include raw endpoint payloads or unrelated text.
- Do not count duplicate project ids twice.
- Use owner_name null and team null for missing owners, and mention them in data_quality_report.md.
- Exclude orphan incidents from project counts but mention them in data_quality_report.md.
def prepare_runtime(runtime: dict[str, Any]) -> dict[str, Any]:
workspace = Path(runtime["workspace"])
data_dir = workspace / "in" / "api_data"
log_path = workspace / "out" / "api_access.log"
log_path.parent.mkdir(parents=True, exist_ok=True)
port = 32000 + random.randint(0, 2000)
script = textwrap.dedent(
f"""
import json
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from urllib.parse import urlparse
DATA_DIR = Path({str(data_dir)!r})
LOG_PATH = Path({str(log_path)!r})
class Handler(BaseHTTPRequestHandler):
def do_GET(self):
path = urlparse(self.path).path
LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
with LOG_PATH.open("a", encoding="utf-8") as f:
f.write(path + "\n")
mapping = {{
"/projects": "projects.json",
"/users": "users.json",
"/incidents": "incidents.json",
}}
if path not in mapping:
self.send_response(404)
self.end_headers()
self.wfile.write(b"not found")
return
attempts = getattr(self.server, "attempts", {{}})
key = self.path
attempts[key] = attempts.get(key, 0) + 1
self.server.attempts = attempts
if path in {{"/projects", "/incidents"}} and attempts[key] == 1:
self.send_response(429 if path == "/projects" else 503)
self.end_headers()
self.wfile.write(b"retry later")
return
all_items = json.loads((DATA_DIR / mapping[path]).read_text())
from urllib.parse import parse_qs
page = int(parse_qs(urlparse(self.path).query).get("page", ["1"])[0])
size = 2
start = (page - 1) * size
items = all_items[start:start + size]
next_page = page + 1 if start + size < len(all_items) else None
body = json.dumps({{"items": items, "next_page": next_page}}, sort_keys=True).encode("utf-8")
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def log_message(self, fmt, *args):
return
ThreadingHTTPServer(("127.0.0.1", {port}), Handler).serve_forever()
"""
)
proc = subprocess.Popen([sys.executable, "-c", script], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
time.sleep(0.3)
return {"MOCK_API_BASE": f"http://127.0.0.1:{port}", "server_pid": proc.pid}
def cleanup_runtime(runtime: dict[str, Any], state: dict[str, Any]) -> None:
pid = int(state.get("server_pid", 0) or 0)
if pid:
try:
os.kill(pid, 15)
except OSError:
passUSER_TEMPLATE = """This task: Use the local REST API provided in the MOCK_API_BASE environment variable. Do not use the public internet. Available endpoints: - GET $MOCK_API_BASE/projects?page=N - GET $MOCK_API_BASE/users?page=N - GET $MOCK_API_BASE/...
Key outputs: out/api_summary.json, out/data_quality_report.md.
Evaluate the agent run:
- tool_use_appropriate: uses tools that match the task modality and files/APIs.
- consistency: follows the prompt requirements and writes the expected artifacts.
- robustness: handles missing directories, malformed inputs, retries, or local-server issues when relevant.
Return ONLY JSON with scores, security_gate, and notes.
"""def _add(checks: list[dict[str, Any]], cid: str, label: str, ok: bool, weight: float, detail: str | None = None) -> None:
checks.append({"id": cid, "label": label, "pass": ok, "weight": weight, "detail": None if ok else detail})
def score_workspace(workspace: Path) -> dict[str, Any]:
truth = json.loads((Path(__file__).resolve().parent / "ground_truth.json").read_text(encoding="utf-8"))
checks: list[dict[str, Any]] = []
out = workspace / "out" / "api_summary.json"
try:
summary = json.loads(out.read_text(encoding="utf-8"))
except Exception as exc:
summary = {}
_add(checks, "json_parse", "out/api_summary.json is valid JSON", False, 1.0, str(exc))
else:
_add(checks, "json_parse", "out/api_summary.json is valid JSON", isinstance(summary, dict), 1.0)
_add(checks, "source_url", "source_base_url records local API", isinstance(summary.get("source_base_url"), str) and summary["source_base_url"].startswith("http://127.0.0.1:"), 1.0, repr(summary.get("source_base_url")))
for key in ("project_count", "active_budget_usd", "open_incidents"):
_add(checks, key, f"{key} is correct", summary.get(key) == truth[key], 2.0, f"got {summary.get(key)!r}")
_add(checks, "projects", "joined project rows are exact", summary.get("projects") == truth["projects"], 6.0, f"got {summary.get('projects')!r}")
projects = summary.get("projects")
sorted_ok = isinstance(projects, list) and [p.get("id") for p in projects] == sorted(p.get("id") for p in projects)
_add(checks, "project_sort", "projects sorted by id", sorted_ok, 1.0)
log_path = workspace / "out" / "api_access.log"
seen = log_path.read_text(encoding="utf-8", errors="replace").splitlines() if log_path.is_file() else []
for endpoint in truth["required_endpoints"]:
_add(checks, "visited_" + endpoint.strip("/"), f"server log includes GET {endpoint}", endpoint in seen, 1.0, f"seen {seen!r}")
retry_ok = seen.count("/projects") >= 2 and seen.count("/incidents") >= 2
_add(checks, "retry_observed", "server log shows retry after transient failures", retry_ok, 2.0, f"seen {seen!r}")
quality = (workspace / "out" / "data_quality_report.md").read_text(encoding="utf-8", errors="replace").lower() if (workspace / "out" / "data_quality_report.md").is_file() else ""
_add(checks, "quality_report", "data_quality_report.md covers duplicates, missing owners, orphan incidents, and retries", all(term.lower() in quality for term in truth["quality_terms"]), 3.0)
score = sum(c["weight"] for c in checks if c["pass"]) / sum(c["weight"] for c in checks)
return {"task": "022-local-rest-api-summary", "workspace": str(workspace), "checks": checks, "outcome_score": score}