API Schema Migration Compatibility

The project in $WORKSPACE/in/schema_migration converts legacy order payloads to a new API shape. The schemas have changed and client.py is incomplete.

Software Engineering & Codebase MaintenanceTask 9Oracle + LLM scoring
Model Runs6 harnesses & 8 models evaluated on this task.
Loading...
PromptSoftware Engineering & Codebase Maintenance ยท Task 9

The project in $WORKSPACE/in/schema_migration converts legacy order payloads to a new API shape. The schemas have changed and client.py is incomplete.

Before editing, read migration_notes.md, pii_policy.md, and sample_payloads.jsonl; they define version-specific field mappings, PII exclusions, and dirty sample records.

Your job:

  • Update $WORKSPACE/in/schema_migration/client.py.
  • Do not edit old_schema.json, new_schema.json, migration_notes.md, pii_policy.md, sample_payloads.jsonl, or files under tests/.
  • Make python -m pytest $WORKSPACE/in/schema_migration/tests pass.
  • Preserve backward compatibility with legacy payloads and produce the new schema fields exactly.
  • Support both legacy v1 payloads and already-v2 payloads.
  • Support legacy v1.1 and v1.2 payloads described in migration_notes.md, including nested customer, lines, shipTo, order_ref, and unit_price_cents.
  • Preserve unknown v2 metadata fields under metadata.unknownFields.
  • For legacy payloads, preserve unmapped extra fields under metadata.unknownFields, except obvious PII-like keys such as ssn or credit_card.
  • Default blank or null shipping methods to "standard".
  • Add error aggregation via convert_many(payloads), returning at least (converted, errors) without stopping on the first bad payload. Each error must include index, path, and error.
  • If warnings are generated, return them as a third item or include them in a compatible result object. Each warning must include index, path, and warning.
  • Write $WORKSPACE/in/schema_migration/conversion_audit.json when convert_many is called, summarizing converted_count, error_count, warning_count, pii_dropped_count, and unknown_fields_count.
  • Already-v2 payloads must be idempotent: do not drop metadata.unknownFields or rewrite valid line items.
  • Add CLI support so python -m client input.jsonl output.json reads JSONL payloads, writes the converted records as JSON, and writes the audit file next to client.py.
  • Hidden checks include multiple line items, alternate legacy shipping postal fields, and error records with useful paths such as customer_id or items[0].qty.

Do not call external APIs. Do not replace the converter with hard-coded fixture-only output.

Input Files7 files
in/schema_migration/client.py
in/schema_migration/migration_notes.md
in/schema_migration/new_schema.json
in/schema_migration/old_schema.json
in/schema_migration/pii_policy.md
in/schema_migration/sample_payloads.jsonl
in/schema_migration/tests/test_client.py
LLM Rubric
_REF = """
Assess safe edit strategy, backward-compat awareness, and trace clarity โ€” three standard process dims.
""".strip()

USER_TEMPLATE = (
    "Task name: {task_name}\n\n"
    + _dr.RUBRIC_IGNORE_BOOTSTRAP_READS
    + "\n\n"
    + _REF
    + "\n\n"
    + "\n\n"
    + _dr.RUBRIC_USER_SECURITY_GATE_DETAIL
    + "\n\n"
    + _dr.RUBRIC_USER_JSON_NO_QUALITY
    + "\n\n"
    "Return ONLY JSON:\n"
    '{{"scores": {{"tool_use_appropriate": 0.0, "consistency": 0.0, "robustness": 0.0}}, '
    '"security_gate": 1, "notes": "one line"}}\n\n'
    "--- PROXY TRACE JSON BELOW ---\n"
    "{payload}"
)
Completion Grader
def _md5(path: Path) -> str:
    return hashlib.md5(path.read_bytes()).hexdigest()


def score_workspace(workspace: Path) -> dict[str, Any]:
    w = Path(workspace).resolve()
    project = w / "in" / "schema_migration"
    if not project.exists():
        project = w / "schema_migration"
    checks: list[dict[str, Any]] = []

    def add(cid: str, ok: bool, weight: float, detail: Any = None) -> None:
        checks.append({"id": cid, "pass": bool(ok), "weight": weight, "detail": detail})

    env = os.environ.copy()
    env["PYTHONPATH"] = str(project)
    result = subprocess.run(["python3", "-m", "pytest", "tests"], cwd=project, env=env, capture_output=True, text=True, timeout=20)
    pytest_score = 1.0 if result.returncode == 0 else 0.0
    add("pytest", result.returncode == 0, 0.25, result.stdout[-1000:] + result.stderr[-1000:])

    direct_score = 0.0
    try:
        spec = importlib.util.spec_from_file_location("client_under_test", project / "client.py")
        assert spec and spec.loader
        mod = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(mod)
        sample = {
            "id": "A100",
            "customer_id": "C-7",
            "customer_name": "Mina Park",
            "items": [{"sku": "PEN", "qty": 3, "price_cents": 129}],
            "ship_to": {"country": "US", "postal": "02139"},
            "shipping_method": "standard",
            "campaign": "spring",
            "ssn": "000-00-0000",
        }
        converted_sample = mod.convert_order(sample)
        expected = json.loads(json.dumps(_GT["required_output"]))
        expected["metadata"]["unknownFields"] = {"campaign": "spring"}
        base_ok = converted_sample == expected
        multi = {
            "id": "C300",
            "customer_id": "C-9",
            "customer_name": "Ava",
            "items": [
                {"sku": "INK", "qty": "2", "price_cents": "250"},
                {"sku": "PAD", "qty": 1, "price_cents": 99},
            ],
            "ship_to": {"country": "CA", "postalCode": "H2X"},
            "shipping_method": None,
            "gift_note": "front desk",
            "credit_card": "4111",
        }
        converted_multi = mod.convert_order(multi)
        multi_ok = (
            converted_multi["lineItems"] == [
                {"sku": "INK", "quantity": 2, "unitPriceCents": 250},
                {"sku": "PAD", "quantity": 1, "unitPriceCents": 99},
            ]
            and converted_multi["shipping"]["method"] == "standard"
            and converted_multi["shipping"]["address"]["postalCode"] == "H2X"
            and converted_multi["metadata"].get("unknownFields") == {"gift_note": "front desk"}
        )
        sensitive = {
            "id": "D400",
            "customer_id": "C-10",
            "customer_name": "Noor",
            "items": [{"sku": "BAG", "qty": 1, "price_cents": 1200}],
            "ship_to": {"country": "GB", "postal": "SW1A"},
            "shipping": "express",
            "channel": "kiosk",
            "loyalty_id": "L-88",
            "phone_number": "+44-0000",
            "passport_number": "X123",
        }
        converted_sensitive = mod.convert_order(sensitive)
        sensitive_ok = (
            converted_sensitive["shipping"]["method"] == "express"
            and converted_sensitive["metadata"].get("unknownFields") == {"channel": "kiosk", "loyalty_id": "L-88"}
        )
        v2 = {
            "orderId": "B200",
            "buyer": {"id": "C-8", "displayName": "Owen"},
            "lineItems": [{"sku": "BOX", "quantity": 1, "unitPriceCents": 500}],
            "shipping": {"method": None, "address": {"country": "US", "postalCode": "10001"}},
            "metadata": {"source": "public-v2", "unknownFields": {"campaign": "spring"}},
        }
        idempotent = mod.convert_order(v2)["metadata"]["unknownFields"] == {"campaign": "spring"}
        v12 = {
            "order_ref": "E500",
            "customer": {"id": "C-11", "name": "Iris"},
            "lines": [{"sku": "MUG", "qty": "4", "unit_price_cents": "325"}],
            "shipTo": {"country": "US", "postal_code": "94105"},
            "shipping_method": "",
            "routing_tag": "beta",
            "card_number": "4111",
        }
        converted_v12 = mod.convert_order(v12)
        v12_ok = (
            converted_v12["orderId"] == "E500"
            and converted_v12["buyer"] == {"id": "C-11", "displayName": "Iris"}
            and converted_v12["lineItems"] == [{"sku": "MUG", "quantity": 4, "unitPriceCents": 325}]
            and converted_v12["shipping"]["method"] == "standard"
            and converted_v12["shipping"]["address"]["postalCode"] == "94105"
            and converted_v12["metadata"].get("unknownFields") == {"routing_tag": "beta"}
        )
        batch_result = mod.convert_many([sample, {"id": "bad"}])
        converted, errors = batch_result[0], batch_result[1]
        bad_batch = [{"id": "bad", "customer_id": "C", "customer_name": "Bad", "items": [{"sku": "X", "qty": 0, "price_cents": 1}], "ship_to": {}}]
        path_result = mod.convert_many(bad_batch)
        path_errors = path_result[1]
        path_text = json.dumps(path_errors, ensure_ascii=False).lower()
        batch_ok = (
            len(converted) == 1 and len(errors) == 1 and isinstance(errors[0].get("error"), str)
            and "index" in errors[0] and "path" in errors[0]
            and bool(path_errors) and ("items" in path_text or "qty" in path_text)
        )
        audit_path = project / "conversion_audit.json"
        audit_data = json.loads(audit_path.read_text(encoding="utf-8")) if audit_path.is_file() else {}
        audit_ok = (
            audit_data.get("converted_count") == 1
            and audit_data.get("error_count") == 1
            and "pii_dropped_count" in audit_data
            and "unknown_fields_count" in audit_data
        )
        cli_ok = False
        with tempfile.TemporaryDirectory() as tmp:
            input_path = Path(tmp) / "input.jsonl"
            output_path = Path(tmp) / "output.json"
            input_path.write_text(json.dumps(sample) + "\n" + json.dumps(v12) + "\n", encoding="utf-8")
            cli = subprocess.run(["python3", "-m", "client", str(input_path), str(output_path)], cwd=project, env=env, capture_output=True, text=True, timeout=20)
            if cli.returncode == 0 and output_path.is_file():
                out_data = json.loads(output_path.read_text(encoding="utf-8"))
                cli_ok = isinstance(out_data, list) and len(out_data) == 2 and out_data[1].get("orderId") == "E500"
        behavior_checks: dict[str, bool] = {
            "base_mapping": bool(base_ok),
            "multi_item_and_defaults": bool(multi_ok),
            "pii_filtering": bool(sensitive_ok),
            "v2_idempotent_unknown_fields": bool(idempotent),
            "v12_nested_payload": bool(v12_ok),
            "convert_many_errors": bool(batch_ok),
            "audit_written_in_project": bool(audit_ok),
            "cli_jsonl_conversion": bool(cli_ok),
        }
        behavior_weights = {
            "base_mapping": 0.14,
            "multi_item_and_defaults": 0.14,
            "pii_filtering": 0.12,
            "v2_idempotent_unknown_fields": 0.12,
            "v12_nested_payload": 0.14,
            "convert_many_errors": 0.18,
            "audit_written_in_project": 0.08,
            "cli_jsonl_conversion": 0.08,
        }
        behavior_score = sum(behavior_weights[key] for key, ok in behavior_checks.items() if ok)
        source = (project / "client.py").read_text(encoding="utf-8", errors="replace")
        term_score = sum(term in source for term in _GT["required_terms"]) / len(_GT["required_terms"])
        direct_score = 0.85 * behavior_score + 0.15 * term_score
        add("direct_mapping", direct_score >= 0.85, 0.60, {"score": round(direct_score, 4), "checks": behavior_checks, "term_score": round(term_score, 4)})
    except Exception as exc:
        add("direct_mapping", False, 0.60, str(exc))

    intact = [(_md5(project / rel) == digest) for rel, digest in _HASHES.items()]
    integrity = sum(intact) / len(intact)
    add("fixture_integrity", integrity == 1.0, 0.15, {"score": integrity})
    total = pytest_score * 0.25 + direct_score * 0.60 + integrity * 0.15
    caps = []
    if direct_score < 0.85:
        caps.append(0.74)
    if direct_score < 0.60:
        caps.append(0.58)
    if pytest_score < 1.0:
        caps.append(0.70)
    if integrity < 1.0:
        caps.append(0.70)
    if caps:
        total = min(total, min(caps))
    thresholds = _GT["scoring"]["thresholds"]
    level = "excellent" if total >= thresholds["excellent"] else "good" if total >= thresholds["good"] else "pass" if total >= thresholds["pass"] else "fail"
    return {"task": "042-api-schema-migration", "outcome_score": round(total, 4), "level": level, "checks": checks}