Add SQL Migration Preflight Checks and Rollback Safety

The SQLite billing migration in $WORKSPACE/in/billingdb/migration.sql is unsafe and incomplete.

Software Engineering & Codebase MaintenanceTask 20Oracle + LLM scoring
Model Runs6 harnesses & 8 models evaluated on this task.
Loading...
PromptSoftware Engineering & Codebase Maintenance ยท Task 20

The SQLite billing migration in $WORKSPACE/in/billingdb/migration.sql is unsafe and incomplete.

Read $WORKSPACE/in/billingdb/migration_policy.md, then repair the migration package.

Your job:

  • Edit $WORKSPACE/in/billingdb/migration.sql.
  • Create $WORKSPACE/in/billingdb/preflight_report.md.
  • Create $WORKSPACE/in/billingdb/rollback.sql.
  • Create $WORKSPACE/in/billingdb/postcheck.sql.
  • Create $WORKSPACE/in/billingdb/migration_report.md.

Required behavior:

  • Preserve all invoices and payments from schema.sql.
  • Add non-null status to invoices with default open.
  • Preserve historical invoice created_at.
  • Preserve orphan payments by moving them to payment_orphans before enforcing referential safety.
  • Enforce future payments.invoice_id references to valid invoices.
  • The migration must run inside an explicit transaction.
  • The migration must be idempotent: running it a second time must not duplicate or corrupt data.
  • Rollback must execute after the migration and restore the old table shape: invoices(id, customer_id, total_cents, created_at) and payments(id, invoice_id, amount_cents, created_at).

Constraints:

  • Do not modify schema.sql or migration_policy.md.
  • Do not delete orphan payment p4; preserve it in payment_orphans.
  • Do not disable foreign keys without restoring them.
  • Do not hard-code a solution that only works by deleting existing rows.
  • Do not use external services or non-SQLite features.
Input Files4 files
in/billingdb/migration.sql
in/billingdb/migration_policy.md
in/billingdb/schema.sql
in/billingdb/seed_notes.md
LLM Rubric
USER_TEMPLATE = """This task: Add SQL Migration Preflight Checks and Rollback Safety. The agent should follow the prompt, modify only the required local project files, and produce the requested artifacts.

Evaluate the agent run:
- tool_use_appropriate: uses code inspection, local tests, and file edits suited to the task.
- consistency: changes match the requested repair or report and preserve constraints.
- robustness: handles test failures, missing context, and verification steps without unrelated churn.

Return ONLY JSON with scores, security_gate, and notes.
"""
Completion Grader
def _md5(path: Path) -> str:
    return hashlib.md5(path.read_bytes()).hexdigest()


def _strip_comments(sql: str) -> str:
    sql = re.sub(r"--.*?$", "", sql, flags=re.MULTILINE)
    return re.sub(r"/\*.*?\*/", "", sql, flags=re.DOTALL)


def _orphans_match(rows: list[tuple[Any, ...]]) -> bool:
    if len(rows) != len(EXPECTED_ORPHANS):
        return False
    for row, expected in zip(rows, EXPECTED_ORPHANS):
        if tuple(row[:4]) != expected[:4]:
            return False
        reason = str(row[4] if len(row) > 4 else "").lower()
        if not (("missing" in reason or "non-existent" in reason or "nonexistent" in reason) and "invoice" in reason):
            return False
    return True


def score_workspace(workspace: Path) -> dict[str, Any]:
    db = Path(workspace).resolve() / "in" / "billingdb"
    checks: list[dict[str, Any]] = []

    def add(cid: str, ok: bool, weight: float, detail: Any = None) -> None:
        checks.append({"id": cid, "pass": bool(ok), "weight": weight, "detail": detail})

    migration = (db / "migration.sql").read_text(encoding="utf-8", errors="replace")
    conn = sqlite3.connect(":memory:")
    exec_score = data_score = constraint_score = idempotent_score = 0.0
    try:
        conn.executescript((db / "schema.sql").read_text(encoding="utf-8"))
        conn.executescript(migration)
        exec_score = 1.0
        add("migration_executes", True, 0.15)
        invoices = conn.execute("select id, customer_id, total_cents, created_at, status from invoices order by id").fetchall()
        payments = conn.execute("select id, invoice_id, amount_cents, created_at from payments order by id").fetchall()
        orphans = conn.execute("select id, invoice_id, amount_cents, created_at, reason from payment_orphans order by id").fetchall()
        data_score = sum([
            invoices == EXPECTED_INVOICES,
            payments == EXPECTED_PAYMENTS,
            _orphans_match(orphans),
        ]) / 3
        add("data_preservation", data_score == 1.0, 0.25, {"invoices": invoices, "payments": payments, "orphans": orphans})
        cols = conn.execute("pragma table_info(invoices)").fetchall()
        status_not_null = any(col[1] == "status" and col[3] == 1 for col in cols)
        fk_list = conn.execute("pragma foreign_key_list(payments)").fetchall()
        fk_exists = any(row[2] == "invoices" for row in fk_list)
        bad_insert_rejected = False
        try:
            conn.execute("insert into payments(id, invoice_id, amount_cents, created_at) values ('px','nope',1,'2024')")
        except sqlite3.IntegrityError:
            bad_insert_rejected = True
        constraint_score = sum([status_not_null, fk_exists, bad_insert_rejected]) / 3
        add("constraints", constraint_score == 1.0, 0.20, {"status_not_null": status_not_null, "fk_exists": fk_exists, "bad_insert_rejected": bad_insert_rejected})
        conn.executescript(migration)
        invoices2 = conn.execute("select id, customer_id, total_cents, created_at, status from invoices order by id").fetchall()
        payments2 = conn.execute("select id, invoice_id, amount_cents, created_at from payments order by id").fetchall()
        orphans2 = conn.execute("select id, invoice_id, amount_cents, created_at, reason from payment_orphans order by id").fetchall()
        idempotent_score = 1.0 if (invoices2 == EXPECTED_INVOICES and payments2 == EXPECTED_PAYMENTS and _orphans_match(orphans2)) else 0.0
        add("idempotent", idempotent_score == 1.0, 0.10)
    except Exception as exc:
        add("migration_executes", False, 0.15, str(exc))

    sql = _strip_comments(migration.lower())
    terms = ["begin", "commit", "payment_orphans", "foreign key", "status"]
    sql_quality = 0.8 * (sum(term in sql for term in terms) / len(terms)) + 0.2 * ("delete from payments" not in sql)
    add("sql_quality", sql_quality >= 0.60, 0.08, {"score": round(sql_quality, 4)})

    rollback_score = 0.0
    try:
        rconn = sqlite3.connect(":memory:")
        rconn.executescript((db / "schema.sql").read_text(encoding="utf-8"))
        rconn.executescript(migration)
        rconn.executescript((db / "rollback.sql").read_text(encoding="utf-8"))
        inv_cols = [row[1] for row in rconn.execute("pragma table_info(invoices)").fetchall()]
        pay_cols = [row[1] for row in rconn.execute("pragma table_info(payments)").fetchall()]
        inv_rows = rconn.execute("select id, customer_id, total_cents, created_at from invoices order by id").fetchall()
        rollback_score = 1.0 if inv_cols == ["id", "customer_id", "total_cents", "created_at"] and pay_cols == ["id", "invoice_id", "amount_cents", "created_at"] and len(inv_rows) == 3 else 0.0
    except Exception:
        rollback_score = 0.0
    add("rollback_executes", rollback_score == 1.0, 0.08)

    postcheck_score = 0.0
    try:
        pconn = sqlite3.connect(":memory:")
        pconn.executescript((db / "schema.sql").read_text(encoding="utf-8"))
        pconn.executescript(migration)
        pconn.executescript((db / "postcheck.sql").read_text(encoding="utf-8"))
        post_text = (db / "postcheck.sql").read_text(encoding="utf-8", errors="replace").lower()
        postcheck_score = sum(term in post_text for term in ["count", "payment_orphans", "foreign_key_check", "status"]) / 4
    except Exception:
        postcheck_score = 0.0
    add("postcheck_sql", postcheck_score >= 0.75, 0.06, {"score": postcheck_score})

    preflight = (db / "preflight_report.md").read_text(encoding="utf-8", errors="replace").lower() if (db / "preflight_report.md").is_file() else ""
    report = (db / "migration_report.md").read_text(encoding="utf-8", errors="replace").lower() if (db / "migration_report.md").is_file() else ""
    doc_score = (
        sum(term in preflight for term in ["orphan", "p4", "missing-invoice", "row count"]) / 4 * 0.45
        + sum(term in report for term in ["idempotent", "rollback", "postcheck", "payment_orphans", "foreign key"]) / 5 * 0.55
    )
    add("documentation", doc_score >= 0.75, 0.05, {"score": round(doc_score, 4)})

    integrity = ((db / "schema.sql").is_file() and _md5(db / "schema.sql") == SCHEMA_HASH) and ((db / "migration_policy.md").is_file() and _md5(db / "migration_policy.md") == POLICY_HASH)
    add("fixture_integrity", integrity, 0.03)

    total = exec_score * 0.15 + data_score * 0.25 + constraint_score * 0.20 + idempotent_score * 0.10 + sql_quality * 0.08 + rollback_score * 0.08 + postcheck_score * 0.06 + doc_score * 0.05 + (1.0 if integrity else 0.0) * 0.03
    if data_score < 1.0 or constraint_score < 1.0:
        total = min(total, 0.70)
    if not integrity:
        total = min(total, 0.60)
    level = "excellent" if total >= 0.9 else "good" if total >= 0.75 else "pass" if total >= 0.6 else "fail"
    return {"task": "086-sql-migration-preflight-rollback", "outcome_score": round(total, 4), "level": level, "checks": checks}