Fix a CLI Parsing Bug and Add Regression Tests

The CSV filtering CLI in $WORKSPACE/in/csvtool has parsing bugs around quoted fields, empty results, and descending sort.

Software Engineering & Codebase MaintenanceTask 21Oracle + LLM scoring
Model Runs6 harnesses & 8 models evaluated on this task.
Loading...
PromptSoftware Engineering & Codebase Maintenance ยท Task 21

The CSV filtering CLI in $WORKSPACE/in/csvtool has parsing bugs around quoted fields, empty results, and descending sort.

Your job:

  • Run python -m pytest tests from $WORKSPACE/in/csvtool.
  • Fix the CLI implementation.
  • Add focused regression tests for the bug. A good place is $WORKSPACE/in/csvtool/tests/test_cli_regression.py.
  • Verify the full test suite passes.

Required CLI behavior:

  • python -m csvtool.cli samples/orders.csv --where status=paid --select id,total --sort -created_at
  • Repeating --where applies all predicates with AND semantics, for example --where status=paid --where total=750.
  • CSV parsing and writing must preserve quoted commas correctly.
  • Empty result sets should still print the selected header and exit 0.
  • --sort -field means descending; --sort field means ascending. Numeric-looking values should sort numerically.
  • Bad --where expressions should exit non-zero with a clear message.
  • Selecting, sorting, or filtering on a missing field should exit non-zero with a clear message.

Constraints:

  • Do not modify existing tests in tests/test_cli_existing.py.
  • Do not hard-code sample rows or fixture file paths.
  • Do not introduce pandas, click, or other external dependencies.
  • Do not parse CSV by splitting lines on commas.
  • Do not remove CLI options or bypass filtering.
Input Files6 files
in/csvtool/README.md
in/csvtool/csvtool/__init__.py
in/csvtool/csvtool/cli.py
in/csvtool/csvtool/filtering.py
in/csvtool/samples/orders.csv
in/csvtool/tests/test_cli_existing.py
LLM Rubric
USER_TEMPLATE = """This task: Fix a CLI Parsing Bug and Add Regression Tests. The agent should follow the prompt, modify only the required local project files, and produce the requested artifacts.

Evaluate the agent run:
- tool_use_appropriate: uses code inspection, local tests, and file edits suited to the task.
- consistency: changes match the requested repair or report and preserve constraints.
- robustness: handles test failures, missing context, and verification steps without unrelated churn.

Return ONLY JSON with scores, security_gate, and notes.
"""
Completion Grader
def _md5(path: Path) -> str:
    return hashlib.md5(path.read_bytes()).hexdigest()


def score_workspace(workspace: Path) -> dict[str, Any]:
    project = Path(workspace).resolve() / "in" / "csvtool"
    checks: list[dict[str, Any]] = []

    def add(cid: str, ok: bool, weight: float, detail: Any = None) -> None:
        checks.append({"id": cid, "pass": bool(ok), "weight": weight, "detail": detail})

    env = os.environ.copy()
    env["PYTHONPATH"] = str(project)
    result = subprocess.run([sys.executable, "-m", "pytest", "tests"], cwd=project, env=env, capture_output=True, text=True, timeout=20)
    pytest_score = 1.0 if result.returncode == 0 else 0.0
    add("pytest", result.returncode == 0, 0.25, result.stdout[-1000:] + result.stderr[-1000:])

    hidden_score = 0.0
    try:
        cmd = [sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--where", "status=paid", "--select", "id,customer,total", "--sort", "-created_at"]
        proc = subprocess.run(cmd, cwd=project, env=env, capture_output=True, text=True, timeout=10)
        rows = list(csv.DictReader(proc.stdout.splitlines())) if proc.stdout.strip() else []
        desc_ok = rows == [
            {"id": "o1", "customer": "Ava, Inc", "total": "1200"},
            {"id": "o3", "customer": "Core, Labs", "total": "750"},
        ]
        empty = subprocess.run([sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--where", "status=missing", "--select", "id,total"], cwd=project, env=env, capture_output=True, text=True, timeout=10)
        multi = subprocess.run([sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--where", "status=paid", "--where", "total=750", "--select", "id,total"], cwd=project, env=env, capture_output=True, text=True, timeout=10)
        multi_rows = list(csv.DictReader(multi.stdout.splitlines())) if multi.stdout.strip() else []
        numeric = subprocess.run([sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--select", "id,total", "--sort", "total"], cwd=project, env=env, capture_output=True, text=True, timeout=10)
        numeric_rows = list(csv.DictReader(numeric.stdout.splitlines())) if numeric.stdout.strip() else []
        bad = subprocess.run([sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--where", "status"], cwd=project, env=env, capture_output=True, text=True, timeout=10)
        missing = subprocess.run([sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--select", "id,missing"], cwd=project, env=env, capture_output=True, text=True, timeout=10)
        hidden_score = sum([
            proc.returncode == 0 and desc_ok,
            empty.returncode == 0 and empty.stdout.strip() == "id,total",
            multi.returncode == 0 and multi_rows == [{"id": "o3", "total": "750"}],
            numeric.returncode == 0 and [r.get("id") for r in numeric_rows] == ["o2", "o3", "o1"],
            bad.returncode != 0 and "where" in bad.stderr.lower(),
            missing.returncode != 0 and "field" in missing.stderr.lower(),
        ]) / 6
        add("hidden_cli_behavior", hidden_score >= 1.0, 0.35, {"score": hidden_score, "stdout": proc.stdout, "bad_stderr": bad.stderr})
    except Exception as exc:
        add("hidden_cli_behavior", False, 0.35, str(exc))

    test_ok = (project / "tests" / "test_cli_existing.py").is_file() and _md5(project / "tests" / "test_cli_existing.py") == EXISTING_TEST_HASH
    add("existing_test_integrity", test_ok, 0.15)

    regression = project / "tests" / "test_cli_regression.py"
    reg_text = regression.read_text(encoding="utf-8", errors="replace").lower() if regression.is_file() else ""
    reg_score = sum(term in reg_text for term in ["quoted", "empty", "-created_at", "bad", "where", "missing", "numeric"]) / 7
    add("regression_tests_added", reg_score >= 0.6, 0.10, {"score": reg_score})

    source = (project / "csvtool" / "cli.py").read_text(encoding="utf-8", errors="replace") + (project / "csvtool" / "filtering.py").read_text(encoding="utf-8", errors="replace")
    quality = (
        0.35 * ("import csv" in source)
        + 0.25 * ("split(',')" not in source and ".split(\",\")" not in source)
        + 0.20 * ("reverse" in source or "startswith(\"-\")" in source or "startswith('-')" in source)
        + 0.10 * ("append" in source or "action=\"append\"" in source)
        + 0.10 * ("pandas" not in source and "click" not in source)
    )
    add("implementation_quality", quality >= 0.75, 0.10, {"score": round(quality, 4)})

    total = pytest_score * 0.25 + hidden_score * 0.35 + (1.0 if test_ok else 0.0) * 0.15 + reg_score * 0.10 + quality * 0.10
    if hidden_score < 1.0:
        total = min(total, 0.65)
    if not test_ok:
        total = min(total, 0.55)
    level = "excellent" if total >= 0.9 else "good" if total >= 0.75 else "pass" if total >= 0.6 else "fail"
    return {"task": "087-cli-parser-bug-tests", "outcome_score": round(total, 4), "level": level, "checks": checks}