SWE-bench Single
SWE-bench Single run script
minisweagent.run.benchmarks.swebench_single
Run on a single SWE-Bench instance.
DEFAULT_OUTPUT_FILE
module-attribute
DEFAULT_OUTPUT_FILE = (
global_config_dir / "last_swebench_single_run.traj.json"
)
DEFAULT_CONFIG_FILE
module-attribute
DEFAULT_CONFIG_FILE = (
builtin_config_dir / "benchmarks" / "swebench.yaml"
)
app
module-attribute
app = Typer(rich_markup_mode='rich', add_completion=False)
main
main(
subset: str = Option(
"lite",
"--subset",
help="SWEBench subset to use or path to a dataset",
rich_help_panel="Data selection",
),
split: str = Option(
"dev",
"--split",
help="Dataset split",
rich_help_panel="Data selection",
),
instance_spec: str = Option(
0,
"-i",
"--instance",
help="SWE-Bench instance ID or index",
rich_help_panel="Data selection",
),
model_name: str | None = Option(
None,
"-m",
"--model",
help="Model to use",
rich_help_panel="Basic",
),
model_class: str | None = Option(
None,
"--model-class",
help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')",
rich_help_panel="Advanced",
),
agent_class: str | None = Option(
None,
"--agent-class",
help="Agent class to use (e.g., 'interactive' or 'minisweagent.agents.interactive.InteractiveAgent')",
rich_help_panel="Advanced",
),
environment_class: str | None = Option(
None,
"--environment-class",
help="Environment class to use (e.g., 'docker' or 'minisweagent.environments.docker.DockerEnvironment')",
rich_help_panel="Advanced",
),
yolo: bool = Option(
False,
"-y",
"--yolo",
help="Run without confirmation",
),
cost_limit: float | None = Option(
None,
"-l",
"--cost-limit",
help="Cost limit. Set to 0 to disable.",
),
config_spec: list[str] = Option(
[str(DEFAULT_CONFIG_FILE)],
"-c",
"--config",
help=_CONFIG_SPEC_HELP_TEXT,
rich_help_panel="Basic",
),
exit_immediately: bool = Option(
False,
"--exit-immediately",
help="Exit immediately when the agent wants to finish instead of prompting.",
rich_help_panel="Advanced",
),
output: Path | None = Option(
DEFAULT_OUTPUT_FILE,
"-o",
"--output",
help="Output trajectory file",
rich_help_panel="Basic",
),
) -> None
Run on a single SWE-Bench instance.
Source code in src/minisweagent/run/benchmarks/swebench_single.py
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | |