Skip to content

Test Models

Test Models class

Full source code
import logging
import time
from typing import Any

from pydantic import BaseModel

from minisweagent.models import GLOBAL_MODEL_STATS
from minisweagent.models.utils.actions_text import format_observation_messages
from minisweagent.models.utils.actions_toolcall import format_toolcall_observation_messages
from minisweagent.models.utils.actions_toolcall_response import (
    format_toolcall_observation_messages as format_response_api_observation_messages,
)
from minisweagent.models.utils.openai_multimodal import expand_multimodal_content


def make_output(content: str, actions: list[dict], cost: float = 1.0) -> dict:
    """Helper to create an output dict for DeterministicModel.

    Args:
        content: The response content string
        actions: List of action dicts, e.g., [{"command": "echo hello"}]
        cost: Cost to report for this output (default 1.0)
    """
    return {
        "role": "assistant",
        "content": content,
        "extra": {"actions": actions, "cost": cost, "timestamp": time.time()},
    }


def make_toolcall_output(content: str | None, tool_calls: list[dict], actions: list[dict]) -> dict:
    """Helper to create a toolcall output dict for DeterministicToolcallModel.

    Args:
        content: Optional text content (can be None for tool-only responses)
        tool_calls: List of tool call dicts in OpenAI format
        actions: List of parsed action dicts, e.g., [{"command": "echo hello", "tool_call_id": "call_123"}]
    """
    return {
        "role": "assistant",
        "content": content,
        "tool_calls": tool_calls,
        "extra": {"actions": actions, "cost": 1.0, "timestamp": time.time()},
    }


def make_response_api_output(content: str | None, actions: list[dict]) -> dict:
    """Helper to create an output dict for DeterministicResponseAPIToolcallModel.

    Args:
        content: Optional text content (can be None for tool-only responses)
        actions: List of action dicts with 'command' and 'tool_call_id' keys
    """
    output_items = []
    if content:
        output_items.append(
            {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": content}]}
        )
    for action in actions:
        output_items.append(
            {
                "type": "function_call",
                "call_id": action["tool_call_id"],
                "name": "bash",
                "arguments": f'{{"command": "{action["command"]}"}}',
            }
        )
    return {
        "object": "response",
        "output": output_items,
        "extra": {"actions": actions, "cost": 1.0, "timestamp": time.time()},
    }


def _process_test_actions(actions: list[dict]) -> bool:
    """Process special test actions. Returns True if the query should be retried."""
    for action in actions:
        if "raise" in action:
            raise action["raise"]
        cmd = action.get("command", "")
        if cmd.startswith("/sleep "):
            time.sleep(float(cmd.split("/sleep ")[1]))
            return True
        if cmd.startswith("/warning"):
            logging.warning(cmd.split("/warning")[1])
            return True
    return False


class DeterministicModelConfig(BaseModel):
    outputs: list[dict]
    """List of exact output messages to return in sequence. Each dict should have 'role', 'content', and 'extra' (with 'actions')."""
    model_name: str = "deterministic"
    cost_per_call: float = 1.0
    observation_template: str = (
        "{% if output.exception_info %}<exception>{{output.exception_info}}</exception>\n{% endif %}"
        "<returncode>{{output.returncode}}</returncode>\n<output>\n{{output.output}}</output>"
    )
    """Template used to render the observation after executing an action."""
    multimodal_regex: str = ""
    """Regex to extract multimodal content. Empty string disables multimodal processing."""


class DeterministicModel:
    def __init__(self, **kwargs):
        """Initialize with a list of output messages to return in sequence."""
        self.config = DeterministicModelConfig(**kwargs)
        self.current_index = -1

    def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
        self.current_index += 1
        output = self.config.outputs[self.current_index]
        if _process_test_actions(output.get("extra", {}).get("actions", [])):
            return self.query(messages, **kwargs)
        GLOBAL_MODEL_STATS.add(self.config.cost_per_call)
        return output

    def format_message(self, **kwargs) -> dict:
        return expand_multimodal_content(kwargs, pattern=self.config.multimodal_regex)

    def format_observation_messages(
        self, message: dict, outputs: list[dict], template_vars: dict | None = None
    ) -> list[dict]:
        """Format execution outputs into observation messages."""
        return format_observation_messages(
            outputs,
            observation_template=self.config.observation_template,
            template_vars=template_vars,
            multimodal_regex=self.config.multimodal_regex,
        )

    def get_template_vars(self, **kwargs) -> dict[str, Any]:
        return self.config.model_dump()

    def serialize(self) -> dict:
        return {
            "info": {
                "config": {
                    "model": self.config.model_dump(mode="json"),
                    "model_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
                },
            }
        }


class DeterministicToolcallModelConfig(BaseModel):
    outputs: list[dict]
    """List of exact output messages with tool_calls to return in sequence."""
    model_name: str = "deterministic_toolcall"
    cost_per_call: float = 1.0
    observation_template: str = (
        "{% if output.exception_info %}<exception>{{output.exception_info}}</exception>\n{% endif %}"
        "<returncode>{{output.returncode}}</returncode>\n<output>\n{{output.output}}</output>"
    )
    """Template used to render the observation after executing an action."""
    multimodal_regex: str = ""
    """Regex to extract multimodal content. Empty string disables multimodal processing."""


class DeterministicToolcallModel:
    def __init__(self, **kwargs):
        """Initialize with a list of toolcall output messages to return in sequence."""
        self.config = DeterministicToolcallModelConfig(**kwargs)
        self.current_index = -1

    def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
        self.current_index += 1
        output = self.config.outputs[self.current_index]
        if _process_test_actions(output.get("extra", {}).get("actions", [])):
            return self.query(messages, **kwargs)
        GLOBAL_MODEL_STATS.add(self.config.cost_per_call)
        return output

    def format_message(self, **kwargs) -> dict:
        return expand_multimodal_content(kwargs, pattern=self.config.multimodal_regex)

    def format_observation_messages(
        self, message: dict, outputs: list[dict], template_vars: dict | None = None
    ) -> list[dict]:
        """Format execution outputs into tool result messages."""
        actions = message.get("extra", {}).get("actions", [])
        return format_toolcall_observation_messages(
            actions=actions,
            outputs=outputs,
            observation_template=self.config.observation_template,
            template_vars=template_vars,
            multimodal_regex=self.config.multimodal_regex,
        )

    def get_template_vars(self, **kwargs) -> dict[str, Any]:
        return self.config.model_dump()

    def serialize(self) -> dict:
        return {
            "info": {
                "config": {
                    "model": self.config.model_dump(mode="json"),
                    "model_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
                },
            }
        }


class DeterministicResponseAPIToolcallModelConfig(BaseModel):
    outputs: list[dict]
    """List of exact Response API output messages to return in sequence."""
    model_name: str = "deterministic_response_api_toolcall"
    cost_per_call: float = 1.0
    observation_template: str = (
        "{% if output.exception_info %}<exception>{{output.exception_info}}</exception>\n{% endif %}"
        "<returncode>{{output.returncode}}</returncode>\n<output>\n{{output.output}}</output>"
    )
    """Template used to render the observation after executing an action."""
    multimodal_regex: str = ""
    """Regex to extract multimodal content. Empty string disables multimodal processing."""


class DeterministicResponseAPIToolcallModel:
    """Deterministic test model using OpenAI Responses API format."""

    def __init__(self, **kwargs):
        """Initialize with a list of Response API output messages to return in sequence."""
        self.config = DeterministicResponseAPIToolcallModelConfig(**kwargs)
        self.current_index = -1

    def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
        self.current_index += 1
        output = self.config.outputs[self.current_index]
        if _process_test_actions(output.get("extra", {}).get("actions", [])):
            return self.query(messages, **kwargs)
        GLOBAL_MODEL_STATS.add(self.config.cost_per_call)
        return output

    def format_message(self, **kwargs) -> dict:
        """Format message in Responses API format."""
        role = kwargs.get("role", "user")
        content = kwargs.get("content", "")
        extra = kwargs.get("extra")
        content_items = [{"type": "input_text", "text": content}] if isinstance(content, str) else content
        msg: dict = {"type": "message", "role": role, "content": content_items}
        if extra:
            msg["extra"] = extra
        return msg

    def format_observation_messages(
        self, message: dict, outputs: list[dict], template_vars: dict | None = None
    ) -> list[dict]:
        """Format execution outputs into function_call_output messages."""
        actions = message.get("extra", {}).get("actions", [])
        return format_response_api_observation_messages(
            actions=actions,
            outputs=outputs,
            observation_template=self.config.observation_template,
            template_vars=template_vars,
            multimodal_regex=self.config.multimodal_regex,
        )

    def get_template_vars(self, **kwargs) -> dict[str, Any]:
        return self.config.model_dump()

    def serialize(self) -> dict:
        return {
            "info": {
                "config": {
                    "model": self.config.model_dump(mode="json"),
                    "model_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
                },
            }
        }

minisweagent.models.test_models

DeterministicModelConfig

Bases: BaseModel

outputs instance-attribute

outputs: list[dict]

List of exact output messages to return in sequence. Each dict should have 'role', 'content', and 'extra' (with 'actions').

model_name class-attribute instance-attribute

model_name: str = 'deterministic'

cost_per_call class-attribute instance-attribute

cost_per_call: float = 1.0

observation_template class-attribute instance-attribute

observation_template: str = "{% if output.exception_info %}<exception>{{output.exception_info}}</exception>\n{% endif %}<returncode>{{output.returncode}}</returncode>\n<output>\n{{output.output}}</output>"

Template used to render the observation after executing an action.

multimodal_regex class-attribute instance-attribute

multimodal_regex: str = ''

Regex to extract multimodal content. Empty string disables multimodal processing.

DeterministicModel

DeterministicModel(**kwargs)

Initialize with a list of output messages to return in sequence.

Source code in src/minisweagent/models/test_models.py
105
106
107
108
def __init__(self, **kwargs):
    """Initialize with a list of output messages to return in sequence."""
    self.config = DeterministicModelConfig(**kwargs)
    self.current_index = -1

config instance-attribute

config = DeterministicModelConfig(**kwargs)

current_index instance-attribute

current_index = -1

query

query(messages: list[dict[str, str]], **kwargs) -> dict
Source code in src/minisweagent/models/test_models.py
110
111
112
113
114
115
116
def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
    self.current_index += 1
    output = self.config.outputs[self.current_index]
    if _process_test_actions(output.get("extra", {}).get("actions", [])):
        return self.query(messages, **kwargs)
    GLOBAL_MODEL_STATS.add(self.config.cost_per_call)
    return output

format_message

format_message(**kwargs) -> dict
Source code in src/minisweagent/models/test_models.py
118
119
def format_message(self, **kwargs) -> dict:
    return expand_multimodal_content(kwargs, pattern=self.config.multimodal_regex)

format_observation_messages

format_observation_messages(
    message: dict,
    outputs: list[dict],
    template_vars: dict | None = None,
) -> list[dict]

Format execution outputs into observation messages.

Source code in src/minisweagent/models/test_models.py
121
122
123
124
125
126
127
128
129
130
def format_observation_messages(
    self, message: dict, outputs: list[dict], template_vars: dict | None = None
) -> list[dict]:
    """Format execution outputs into observation messages."""
    return format_observation_messages(
        outputs,
        observation_template=self.config.observation_template,
        template_vars=template_vars,
        multimodal_regex=self.config.multimodal_regex,
    )

get_template_vars

get_template_vars(**kwargs) -> dict[str, Any]
Source code in src/minisweagent/models/test_models.py
132
133
def get_template_vars(self, **kwargs) -> dict[str, Any]:
    return self.config.model_dump()

serialize

serialize() -> dict
Source code in src/minisweagent/models/test_models.py
135
136
137
138
139
140
141
142
143
def serialize(self) -> dict:
    return {
        "info": {
            "config": {
                "model": self.config.model_dump(mode="json"),
                "model_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
            },
        }
    }

DeterministicToolcallModelConfig

Bases: BaseModel

outputs instance-attribute

outputs: list[dict]

List of exact output messages with tool_calls to return in sequence.

model_name class-attribute instance-attribute

model_name: str = 'deterministic_toolcall'

cost_per_call class-attribute instance-attribute

cost_per_call: float = 1.0

observation_template class-attribute instance-attribute

observation_template: str = "{% if output.exception_info %}<exception>{{output.exception_info}}</exception>\n{% endif %}<returncode>{{output.returncode}}</returncode>\n<output>\n{{output.output}}</output>"

Template used to render the observation after executing an action.

multimodal_regex class-attribute instance-attribute

multimodal_regex: str = ''

Regex to extract multimodal content. Empty string disables multimodal processing.

DeterministicToolcallModel

DeterministicToolcallModel(**kwargs)

Initialize with a list of toolcall output messages to return in sequence.

Source code in src/minisweagent/models/test_models.py
161
162
163
164
def __init__(self, **kwargs):
    """Initialize with a list of toolcall output messages to return in sequence."""
    self.config = DeterministicToolcallModelConfig(**kwargs)
    self.current_index = -1

config instance-attribute

current_index instance-attribute

current_index = -1

query

query(messages: list[dict[str, str]], **kwargs) -> dict
Source code in src/minisweagent/models/test_models.py
166
167
168
169
170
171
172
def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
    self.current_index += 1
    output = self.config.outputs[self.current_index]
    if _process_test_actions(output.get("extra", {}).get("actions", [])):
        return self.query(messages, **kwargs)
    GLOBAL_MODEL_STATS.add(self.config.cost_per_call)
    return output

format_message

format_message(**kwargs) -> dict
Source code in src/minisweagent/models/test_models.py
174
175
def format_message(self, **kwargs) -> dict:
    return expand_multimodal_content(kwargs, pattern=self.config.multimodal_regex)

format_observation_messages

format_observation_messages(
    message: dict,
    outputs: list[dict],
    template_vars: dict | None = None,
) -> list[dict]

Format execution outputs into tool result messages.

Source code in src/minisweagent/models/test_models.py
177
178
179
180
181
182
183
184
185
186
187
188
def format_observation_messages(
    self, message: dict, outputs: list[dict], template_vars: dict | None = None
) -> list[dict]:
    """Format execution outputs into tool result messages."""
    actions = message.get("extra", {}).get("actions", [])
    return format_toolcall_observation_messages(
        actions=actions,
        outputs=outputs,
        observation_template=self.config.observation_template,
        template_vars=template_vars,
        multimodal_regex=self.config.multimodal_regex,
    )

get_template_vars

get_template_vars(**kwargs) -> dict[str, Any]
Source code in src/minisweagent/models/test_models.py
190
191
def get_template_vars(self, **kwargs) -> dict[str, Any]:
    return self.config.model_dump()

serialize

serialize() -> dict
Source code in src/minisweagent/models/test_models.py
193
194
195
196
197
198
199
200
201
def serialize(self) -> dict:
    return {
        "info": {
            "config": {
                "model": self.config.model_dump(mode="json"),
                "model_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
            },
        }
    }

DeterministicResponseAPIToolcallModelConfig

Bases: BaseModel

outputs instance-attribute

outputs: list[dict]

List of exact Response API output messages to return in sequence.

model_name class-attribute instance-attribute

model_name: str = 'deterministic_response_api_toolcall'

cost_per_call class-attribute instance-attribute

cost_per_call: float = 1.0

observation_template class-attribute instance-attribute

observation_template: str = "{% if output.exception_info %}<exception>{{output.exception_info}}</exception>\n{% endif %}<returncode>{{output.returncode}}</returncode>\n<output>\n{{output.output}}</output>"

Template used to render the observation after executing an action.

multimodal_regex class-attribute instance-attribute

multimodal_regex: str = ''

Regex to extract multimodal content. Empty string disables multimodal processing.

DeterministicResponseAPIToolcallModel

DeterministicResponseAPIToolcallModel(**kwargs)

Deterministic test model using OpenAI Responses API format.

Initialize with a list of Response API output messages to return in sequence.

Source code in src/minisweagent/models/test_models.py
221
222
223
224
def __init__(self, **kwargs):
    """Initialize with a list of Response API output messages to return in sequence."""
    self.config = DeterministicResponseAPIToolcallModelConfig(**kwargs)
    self.current_index = -1

config instance-attribute

current_index instance-attribute

current_index = -1

query

query(messages: list[dict[str, str]], **kwargs) -> dict
Source code in src/minisweagent/models/test_models.py
226
227
228
229
230
231
232
def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
    self.current_index += 1
    output = self.config.outputs[self.current_index]
    if _process_test_actions(output.get("extra", {}).get("actions", [])):
        return self.query(messages, **kwargs)
    GLOBAL_MODEL_STATS.add(self.config.cost_per_call)
    return output

format_message

format_message(**kwargs) -> dict

Format message in Responses API format.

Source code in src/minisweagent/models/test_models.py
234
235
236
237
238
239
240
241
242
243
def format_message(self, **kwargs) -> dict:
    """Format message in Responses API format."""
    role = kwargs.get("role", "user")
    content = kwargs.get("content", "")
    extra = kwargs.get("extra")
    content_items = [{"type": "input_text", "text": content}] if isinstance(content, str) else content
    msg: dict = {"type": "message", "role": role, "content": content_items}
    if extra:
        msg["extra"] = extra
    return msg

format_observation_messages

format_observation_messages(
    message: dict,
    outputs: list[dict],
    template_vars: dict | None = None,
) -> list[dict]

Format execution outputs into function_call_output messages.

Source code in src/minisweagent/models/test_models.py
245
246
247
248
249
250
251
252
253
254
255
256
def format_observation_messages(
    self, message: dict, outputs: list[dict], template_vars: dict | None = None
) -> list[dict]:
    """Format execution outputs into function_call_output messages."""
    actions = message.get("extra", {}).get("actions", [])
    return format_response_api_observation_messages(
        actions=actions,
        outputs=outputs,
        observation_template=self.config.observation_template,
        template_vars=template_vars,
        multimodal_regex=self.config.multimodal_regex,
    )

get_template_vars

get_template_vars(**kwargs) -> dict[str, Any]
Source code in src/minisweagent/models/test_models.py
258
259
def get_template_vars(self, **kwargs) -> dict[str, Any]:
    return self.config.model_dump()

serialize

serialize() -> dict
Source code in src/minisweagent/models/test_models.py
261
262
263
264
265
266
267
268
269
def serialize(self) -> dict:
    return {
        "info": {
            "config": {
                "model": self.config.model_dump(mode="json"),
                "model_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
            },
        }
    }

make_output

make_output(
    content: str, actions: list[dict], cost: float = 1.0
) -> dict

Helper to create an output dict for DeterministicModel.

Parameters:

Name Type Description Default
content str

The response content string

required
actions list[dict]

List of action dicts, e.g., [{"command": "echo hello"}]

required
cost float

Cost to report for this output (default 1.0)

1.0
Source code in src/minisweagent/models/test_models.py
16
17
18
19
20
21
22
23
24
25
26
27
28
def make_output(content: str, actions: list[dict], cost: float = 1.0) -> dict:
    """Helper to create an output dict for DeterministicModel.

    Args:
        content: The response content string
        actions: List of action dicts, e.g., [{"command": "echo hello"}]
        cost: Cost to report for this output (default 1.0)
    """
    return {
        "role": "assistant",
        "content": content,
        "extra": {"actions": actions, "cost": cost, "timestamp": time.time()},
    }

make_toolcall_output

make_toolcall_output(
    content: str | None,
    tool_calls: list[dict],
    actions: list[dict],
) -> dict

Helper to create a toolcall output dict for DeterministicToolcallModel.

Parameters:

Name Type Description Default
content str | None

Optional text content (can be None for tool-only responses)

required
tool_calls list[dict]

List of tool call dicts in OpenAI format

required
actions list[dict]

List of parsed action dicts, e.g., [{"command": "echo hello", "tool_call_id": "call_123"}]

required
Source code in src/minisweagent/models/test_models.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def make_toolcall_output(content: str | None, tool_calls: list[dict], actions: list[dict]) -> dict:
    """Helper to create a toolcall output dict for DeterministicToolcallModel.

    Args:
        content: Optional text content (can be None for tool-only responses)
        tool_calls: List of tool call dicts in OpenAI format
        actions: List of parsed action dicts, e.g., [{"command": "echo hello", "tool_call_id": "call_123"}]
    """
    return {
        "role": "assistant",
        "content": content,
        "tool_calls": tool_calls,
        "extra": {"actions": actions, "cost": 1.0, "timestamp": time.time()},
    }

make_response_api_output

make_response_api_output(
    content: str | None, actions: list[dict]
) -> dict

Helper to create an output dict for DeterministicResponseAPIToolcallModel.

Parameters:

Name Type Description Default
content str | None

Optional text content (can be None for tool-only responses)

required
actions list[dict]

List of action dicts with 'command' and 'tool_call_id' keys

required
Source code in src/minisweagent/models/test_models.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def make_response_api_output(content: str | None, actions: list[dict]) -> dict:
    """Helper to create an output dict for DeterministicResponseAPIToolcallModel.

    Args:
        content: Optional text content (can be None for tool-only responses)
        actions: List of action dicts with 'command' and 'tool_call_id' keys
    """
    output_items = []
    if content:
        output_items.append(
            {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": content}]}
        )
    for action in actions:
        output_items.append(
            {
                "type": "function_call",
                "call_id": action["tool_call_id"],
                "name": "bash",
                "arguments": f'{{"command": "{action["command"]}"}}',
            }
        )
    return {
        "object": "response",
        "output": output_items,
        "extra": {"actions": actions, "cost": 1.0, "timestamp": time.time()},
    }