API Schema Migration Compatibility

The project in $WORKSPACE/in/schema_migration converts legacy order payloads to a new API shape. The schemas have changed and client.py is incomplete.

Software Engineering & Codebase MaintenanceTask 9Oracle + LLM scoring

Task ID042-api-schema-migration

DifficultyHard

Tags

Model Runs6 harnesses & 8 models evaluated on this task.

PromptSoftware Engineering & Codebase Maintenance · Task 9

The project in $WORKSPACE/in/schema_migration converts legacy order payloads to a new API shape. The schemas have changed and client.py is incomplete.

Before editing, read migration_notes.md, pii_policy.md, and sample_payloads.jsonl; they define version-specific field mappings, PII exclusions, and dirty sample records.

Your job:

Update $WORKSPACE/in/schema_migration/client.py.
Do not edit old_schema.json, new_schema.json, migration_notes.md, pii_policy.md, sample_payloads.jsonl, or files under tests/.
Make python -m pytest $WORKSPACE/in/schema_migration/tests pass.
Preserve backward compatibility with legacy payloads and produce the new schema fields exactly.
Support both legacy v1 payloads and already-v2 payloads.
Support legacy v1.1 and v1.2 payloads described in migration_notes.md, including nested customer, lines, shipTo, order_ref, and unit_price_cents.
Preserve unknown v2 metadata fields under metadata.unknownFields.
For legacy payloads, preserve unmapped extra fields under metadata.unknownFields, except obvious PII-like keys such as ssn or credit_card.
Default blank or null shipping methods to "standard".
Add error aggregation via convert_many(payloads), returning at least (converted, errors) without stopping on the first bad payload. Each error must include index, path, and error.
If warnings are generated, return them as a third item or include them in a compatible result object. Each warning must include index, path, and warning.
Write $WORKSPACE/in/schema_migration/conversion_audit.json when convert_many is called, summarizing converted_count, error_count, warning_count, pii_dropped_count, and unknown_fields_count.
Already-v2 payloads must be idempotent: do not drop metadata.unknownFields or rewrite valid line items.
Add CLI support so python -m client input.jsonl output.json reads JSONL payloads, writes the converted records as JSON, and writes the audit file next to client.py.
Hidden checks include multiple line items, alternate legacy shipping postal fields, and error records with useful paths such as customer_id or items[0].qty.

Do not call external APIs. Do not replace the converter with hard-coded fixture-only output.

Input Files7 files

in/schema_migration/client.py

in/schema_migration/migration_notes.md

in/schema_migration/new_schema.json

in/schema_migration/old_schema.json

in/schema_migration/pii_policy.md

in/schema_migration/sample_payloads.jsonl

in/schema_migration/tests/test_client.py

LLM Rubric

_REF = """
Assess safe edit strategy, backward-compat awareness, and trace clarity — three standard process dims.
""".strip()

USER_TEMPLATE = (
    "Task name: {task_name}\n\n"
    + _dr.RUBRIC_IGNORE_BOOTSTRAP_READS
    + "\n\n"
    + _REF
    + "\n\n"
    + "\n\n"
    + _dr.RUBRIC_USER_SECURITY_GATE_DETAIL
    + "\n\n"
    + _dr.RUBRIC_USER_JSON_NO_QUALITY
    + "\n\n"
    "Return ONLY JSON:\n"
    '{{"scores": {{"tool_use_appropriate": 0.0, "consistency": 0.0, "robustness": 0.0}}, '
    '"security_gate": 1, "notes": "one line"}}\n\n'
    "--- PROXY TRACE JSON BELOW ---\n"
    "{payload}"
)

Completion Grader

def _md5(path: Path) -> str:
    return hashlib.md5(path.read_bytes()).hexdigest()


def score_workspace(workspace: Path) -> dict[str, Any]:
    w = Path(workspace).resolve()
    project = w / "in" / "schema_migration"
    if not project.exists():
        project = w / "schema_migration"
    checks: list[dict[str, Any]] = []

    def add(cid: str, ok: bool, weight: float, detail: Any = None) -> None:
        checks.append({"id": cid, "pass": bool(ok), "weight": weight, "detail": detail})

    env = os.environ.copy()
    env["PYTHONPATH"] = str(project)
    result = subprocess.run(["python3", "-m", "pytest", "tests"], cwd=project, env=env, capture_output=True, text=True, timeout=20)
    pytest_score = 1.0 if result.returncode == 0 else 0.0
    add("pytest", result.returncode == 0, 0.25, result.stdout[-1000:] + result.stderr[-1000:])

    direct_score = 0.0
    try:
        spec = importlib.util.spec_from_file_location("client_under_test", project / "client.py")
        assert spec and spec.loader
        mod = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(mod)
        sample = {
            "id": "A100",
            "customer_id": "C-7",
            "customer_name": "Mina Park",
            "items": [{"sku": "PEN", "qty": 3, "price_cents": 129}],
            "ship_to": {"country": "US", "postal": "02139"},
            "shipping_method": "standard",
            "campaign": "spring",
            "ssn": "000-00-0000",
        }
        converted_sample = mod.convert_order(sample)
        expected = json.loads(json.dumps(_GT["required_output"]))
        expected["metadata"]["unknownFields"] = {"campaign": "spring"}
        base_ok = converted_sample == expected
        multi = {
            "id": "C300",
            "customer_id": "C-9",
            "customer_name": "Ava",
            "items": [
                {"sku": "INK", "qty": "2", "price_cents": "250"},
                {"sku": "PAD", "qty": 1, "price_cents": 99},
            ],
            "ship_to": {"country": "CA", "postalCode": "H2X"},
            "shipping_method": None,
            "gift_note": "front desk",
            "credit_card": "4111",
        }
        converted_multi = mod.convert_order(multi)
        multi_ok = (
            converted_multi["lineItems"] == [
                {"sku": "INK", "quantity": 2, "unitPriceCents": 250},
                {"sku": "PAD", "quantity": 1, "unitPriceCents": 99},
            ]
            and converted_multi["shipping"]["method"] == "standard"
            and converted_multi["shipping"]["address"]["postalCode"] == "H2X"
            and converted_multi["metadata"].get("unknownFields") == {"gift_note": "front desk"}
        )
        sensitive = {
            "id": "D400",
            "customer_id": "C-10",
            "customer_name": "Noor",
            "items": [{"sku": "BAG", "qty": 1, "price_cents": 1200}],
            "ship_to": {"country": "GB", "postal": "SW1A"},
            "shipping": "express",
            "channel": "kiosk",
            "loyalty_id": "L-88",
            "phone_number": "+44-0000",
            "passport_number": "X123",
        }
        converted_sensitive = mod.convert_order(sensitive)
        sensitive_ok = (
            converted_sensitive["shipping"]["method"] == "express"
            and converted_sensitive["metadata"].get("unknownFields") == {"channel": "kiosk", "loyalty_id": "L-88"}
        )
        v2 = {
            "orderId": "B200",
            "buyer": {"id": "C-8", "displayName": "Owen"},
            "lineItems": [{"sku": "BOX", "quantity": 1, "unitPriceCents": 500}],
            "shipping": {"method": None, "address": {"country": "US", "postalCode": "10001"}},
            "metadata": {"source": "public-v2", "unknownFields": {"campaign": "spring"}},
        }
        idempotent = mod.convert_order(v2)["metadata"]["unknownFields"] == {"campaign": "spring"}
        v12 = {
            "order_ref": "E500",
            "customer": {"id": "C-11", "name": "Iris"},
            "lines": [{"sku": "MUG", "qty": "4", "unit_price_cents": "325"}],
            "shipTo": {"country": "US", "postal_code": "94105"},
            "shipping_method": "",
            "routing_tag": "beta",
            "card_number": "4111",
        }
        converted_v12 = mod.convert_order(v12)
        v12_ok = (
            converted_v12["orderId"] == "E500"
            and converted_v12["buyer"] == {"id": "C-11", "displayName": "Iris"}
            and converted_v12["lineItems"] == [{"sku": "MUG", "quantity": 4, "unitPriceCents": 325}]
            and converted_v12["shipping"]["method"] == "standard"
            and converted_v12["shipping"]["address"]["postalCode"] == "94105"
            and converted_v12["metadata"].get("unknownFields") == {"routing_tag": "beta"}
        )
        batch_result = mod.convert_many([sample, {"id": "bad"}])
        converted, errors = batch_result[0], batch_result[1]
        bad_batch = [{"id": "bad", "customer_id": "C", "customer_name": "Bad", "items": [{"sku": "X", "qty": 0, "price_cents": 1}], "ship_to": {}}]
        path_result = mod.convert_many(bad_batch)
        path_errors = path_result[1]
        path_text = json.dumps(path_errors, ensure_ascii=False).lower()
        batch_ok = (
            len(converted) == 1 and len(errors) == 1 and isinstance(errors[0].get("error"), str)
            and "index" in errors[0] and "path" in errors[0]
            and bool(path_errors) and ("items" in path_text or "qty" in path_text)
        )
        audit_path = project / "conversion_audit.json"
        audit_data = json.loads(audit_path.read_text(encoding="utf-8")) if audit_path.is_file() else {}
        audit_ok = (
            audit_data.get("converted_count") == 1
            and audit_data.get("error_count") == 1
            and "pii_dropped_count" in audit_data
            and "unknown_fields_count" in audit_data
        )
        cli_ok = False
        with tempfile.TemporaryDirectory() as tmp:
            input_path = Path(tmp) / "input.jsonl"
            output_path = Path(tmp) / "output.json"
            input_path.write_text(json.dumps(sample) + "\n" + json.dumps(v12) + "\n", encoding="utf-8")
            cli = subprocess.run(["python3", "-m", "client", str(input_path), str(output_path)], cwd=project, env=env, capture_output=True, text=True, timeout=20)
            if cli.returncode == 0 and output_path.is_file():
                out_data = json.loads(output_path.read_text(encoding="utf-8"))
                cli_ok = isinstance(out_data, list) and len(out_data) == 2 and out_data[1].get("orderId") == "E500"
        behavior_checks: dict[str, bool] = {
            "base_mapping": bool(base_ok),
            "multi_item_and_defaults": bool(multi_ok),
            "pii_filtering": bool(sensitive_ok),
            "v2_idempotent_unknown_fields": bool(idempotent),
            "v12_nested_payload": bool(v12_ok),
            "convert_many_errors": bool(batch_ok),
            "audit_written_in_project": bool(audit_ok),
            "cli_jsonl_conversion": bool(cli_ok),
        }
        behavior_weights = {
            "base_mapping": 0.14,
            "multi_item_and_defaults": 0.14,
            "pii_filtering": 0.12,
            "v2_idempotent_unknown_fields": 0.12,
            "v12_nested_payload": 0.14,
            "convert_many_errors": 0.18,
            "audit_written_in_project": 0.08,
            "cli_jsonl_conversion": 0.08,
        }
        behavior_score = sum(behavior_weights[key] for key, ok in behavior_checks.items() if ok)
        source = (project / "client.py").read_text(encoding="utf-8", errors="replace")
        term_score = sum(term in source for term in _GT["required_terms"]) / len(_GT["required_terms"])
        direct_score = 0.85 * behavior_score + 0.15 * term_score
        add("direct_mapping", direct_score >= 0.85, 0.60, {"score": round(direct_score, 4), "checks": behavior_checks, "term_score": round(term_score, 4)})
    except Exception as exc:
        add("direct_mapping", False, 0.60, str(exc))

    intact = [(_md5(project / rel) == digest) for rel, digest in _HASHES.items()]
    integrity = sum(intact) / len(intact)
    add("fixture_integrity", integrity == 1.0, 0.15, {"score": integrity})
    total = pytest_score * 0.25 + direct_score * 0.60 + integrity * 0.15
    caps = []
    if direct_score < 0.85:
        caps.append(0.74)
    if direct_score < 0.60:
        caps.append(0.58)
    if pytest_score < 1.0:
        caps.append(0.70)
    if integrity < 1.0:
        caps.append(0.70)
    if caps:
        total = min(total, min(caps))
    thresholds = _GT["scoring"]["thresholds"]
    level = "excellent" if total >= thresholds["excellent"] else "good" if total >= thresholds["good"] else "pass" if total >= thresholds["pass"] else "fail"
    return {"task": "042-api-schema-migration", "outcome_score": round(total, 4), "level": level, "checks": checks}

← Task 8 Task 10 →