Fix a CLI Parsing Bug and Add Regression Tests

The CSV filtering CLI in $WORKSPACE/in/csvtool has parsing bugs around quoted fields, empty results, and descending sort.

Software Engineering & Codebase MaintenanceTask 21Oracle + LLM scoring

Task ID087-cli-parser-bug-tests

DifficultyMedium-hard

Tags

Model Runs6 harnesses & 8 models evaluated on this task.

PromptSoftware Engineering & Codebase Maintenance · Task 21

The CSV filtering CLI in $WORKSPACE/in/csvtool has parsing bugs around quoted fields, empty results, and descending sort.

Your job:

Run python -m pytest tests from $WORKSPACE/in/csvtool.
Fix the CLI implementation.
Add focused regression tests for the bug. A good place is $WORKSPACE/in/csvtool/tests/test_cli_regression.py.
Verify the full test suite passes.

Required CLI behavior:

python -m csvtool.cli samples/orders.csv --where status=paid --select id,total --sort -created_at
Repeating --where applies all predicates with AND semantics, for example --where status=paid --where total=750.
CSV parsing and writing must preserve quoted commas correctly.
Empty result sets should still print the selected header and exit 0.
--sort -field means descending; --sort field means ascending. Numeric-looking values should sort numerically.
Bad --where expressions should exit non-zero with a clear message.
Selecting, sorting, or filtering on a missing field should exit non-zero with a clear message.

Constraints:

Do not modify existing tests in tests/test_cli_existing.py.
Do not hard-code sample rows or fixture file paths.
Do not introduce pandas, click, or other external dependencies.
Do not parse CSV by splitting lines on commas.
Do not remove CLI options or bypass filtering.

Input Files6 files

in/csvtool/README.md

in/csvtool/csvtool/__init__.py

in/csvtool/csvtool/cli.py

in/csvtool/csvtool/filtering.py

in/csvtool/samples/orders.csv

in/csvtool/tests/test_cli_existing.py

LLM Rubric

USER_TEMPLATE = """This task: Fix a CLI Parsing Bug and Add Regression Tests. The agent should follow the prompt, modify only the required local project files, and produce the requested artifacts.

Evaluate the agent run:
- tool_use_appropriate: uses code inspection, local tests, and file edits suited to the task.
- consistency: changes match the requested repair or report and preserve constraints.
- robustness: handles test failures, missing context, and verification steps without unrelated churn.

Return ONLY JSON with scores, security_gate, and notes.
"""

Completion Grader

def _md5(path: Path) -> str:
    return hashlib.md5(path.read_bytes()).hexdigest()


def score_workspace(workspace: Path) -> dict[str, Any]:
    project = Path(workspace).resolve() / "in" / "csvtool"
    checks: list[dict[str, Any]] = []

    def add(cid: str, ok: bool, weight: float, detail: Any = None) -> None:
        checks.append({"id": cid, "pass": bool(ok), "weight": weight, "detail": detail})

    env = os.environ.copy()
    env["PYTHONPATH"] = str(project)
    result = subprocess.run([sys.executable, "-m", "pytest", "tests"], cwd=project, env=env, capture_output=True, text=True, timeout=20)
    pytest_score = 1.0 if result.returncode == 0 else 0.0
    add("pytest", result.returncode == 0, 0.25, result.stdout[-1000:] + result.stderr[-1000:])

    hidden_score = 0.0
    try:
        cmd = [sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--where", "status=paid", "--select", "id,customer,total", "--sort", "-created_at"]
        proc = subprocess.run(cmd, cwd=project, env=env, capture_output=True, text=True, timeout=10)
        rows = list(csv.DictReader(proc.stdout.splitlines())) if proc.stdout.strip() else []
        desc_ok = rows == [
            {"id": "o1", "customer": "Ava, Inc", "total": "1200"},
            {"id": "o3", "customer": "Core, Labs", "total": "750"},
        ]
        empty = subprocess.run([sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--where", "status=missing", "--select", "id,total"], cwd=project, env=env, capture_output=True, text=True, timeout=10)
        multi = subprocess.run([sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--where", "status=paid", "--where", "total=750", "--select", "id,total"], cwd=project, env=env, capture_output=True, text=True, timeout=10)
        multi_rows = list(csv.DictReader(multi.stdout.splitlines())) if multi.stdout.strip() else []
        numeric = subprocess.run([sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--select", "id,total", "--sort", "total"], cwd=project, env=env, capture_output=True, text=True, timeout=10)
        numeric_rows = list(csv.DictReader(numeric.stdout.splitlines())) if numeric.stdout.strip() else []
        bad = subprocess.run([sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--where", "status"], cwd=project, env=env, capture_output=True, text=True, timeout=10)
        missing = subprocess.run([sys.executable, "-m", "csvtool.cli", "samples/orders.csv", "--select", "id,missing"], cwd=project, env=env, capture_output=True, text=True, timeout=10)
        hidden_score = sum([
            proc.returncode == 0 and desc_ok,
            empty.returncode == 0 and empty.stdout.strip() == "id,total",
            multi.returncode == 0 and multi_rows == [{"id": "o3", "total": "750"}],
            numeric.returncode == 0 and [r.get("id") for r in numeric_rows] == ["o2", "o3", "o1"],
            bad.returncode != 0 and "where" in bad.stderr.lower(),
            missing.returncode != 0 and "field" in missing.stderr.lower(),
        ]) / 6
        add("hidden_cli_behavior", hidden_score >= 1.0, 0.35, {"score": hidden_score, "stdout": proc.stdout, "bad_stderr": bad.stderr})
    except Exception as exc:
        add("hidden_cli_behavior", False, 0.35, str(exc))

    test_ok = (project / "tests" / "test_cli_existing.py").is_file() and _md5(project / "tests" / "test_cli_existing.py") == EXISTING_TEST_HASH
    add("existing_test_integrity", test_ok, 0.15)

    regression = project / "tests" / "test_cli_regression.py"
    reg_text = regression.read_text(encoding="utf-8", errors="replace").lower() if regression.is_file() else ""
    reg_score = sum(term in reg_text for term in ["quoted", "empty", "-created_at", "bad", "where", "missing", "numeric"]) / 7
    add("regression_tests_added", reg_score >= 0.6, 0.10, {"score": reg_score})

    source = (project / "csvtool" / "cli.py").read_text(encoding="utf-8", errors="replace") + (project / "csvtool" / "filtering.py").read_text(encoding="utf-8", errors="replace")
    quality = (
        0.35 * ("import csv" in source)
        + 0.25 * ("split(',')" not in source and ".split(\",\")" not in source)
        + 0.20 * ("reverse" in source or "startswith(\"-\")" in source or "startswith('-')" in source)
        + 0.10 * ("append" in source or "action=\"append\"" in source)
        + 0.10 * ("pandas" not in source and "click" not in source)
    )
    add("implementation_quality", quality >= 0.75, 0.10, {"score": round(quality, 4)})

    total = pytest_score * 0.25 + hidden_score * 0.35 + (1.0 if test_ok else 0.0) * 0.15 + reg_score * 0.10 + quality * 0.10
    if hidden_score < 1.0:
        total = min(total, 0.65)
    if not test_ok:
        total = min(total, 0.55)
    level = "excellent" if total >= 0.9 else "good" if total >= 0.75 else "pass" if total >= 0.6 else "fail"
    return {"task": "087-cli-parser-bug-tests", "outcome_score": round(total, 4), "level": level, "checks": checks}

← Task 20 Task 22 →