Litellm Response Model

LiteLLM Response API Model class

Read on GitHub

Full source code

import logging
import time
from collections.abc import Callable

import litellm

from minisweagent.exceptions import FormatError
from minisweagent.models import GLOBAL_MODEL_STATS
from minisweagent.models.litellm_model import LitellmModel, LitellmModelConfig
from minisweagent.models.utils.actions_toolcall_response import (
    BASH_TOOL_RESPONSE_API,
    finish_reason_from_responses_api,
    format_toolcall_observation_messages,
    parse_toolcall_actions_response,
)
from minisweagent.models.utils.retry import retry

logger = logging.getLogger("litellm_response_model")


class LitellmResponseModelConfig(LitellmModelConfig):
    pass


class LitellmResponseModel(LitellmModel):
    def __init__(self, *, config_class: Callable = LitellmResponseModelConfig, **kwargs):
        super().__init__(config_class=config_class, **kwargs)

    def _prepare_messages_for_api(self, messages: list[dict]) -> list[dict]:
        """Flatten response objects into their output items for stateless API calls."""
        result = []
        for msg in messages:
            if msg.get("object") == "response":
                for item in msg.get("output", []):
                    result.append({k: v for k, v in item.items() if k != "extra"})
            else:
                result.append({k: v for k, v in msg.items() if k != "extra"})
        return result

    def _query(self, messages: list[dict[str, str]], **kwargs):
        try:
            return litellm.responses(
                model=self.config.model_name,
                input=messages,
                tools=[BASH_TOOL_RESPONSE_API],
                **(self.config.model_kwargs | kwargs),
            )
        except litellm.exceptions.AuthenticationError as e:
            e.message += " You can permanently set your API key with `mini-extra config set KEY VALUE`."
            raise e

    def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
        for attempt in retry(logger=logger, abort_exceptions=self.abort_exceptions):
            with attempt:
                response = self._query(self._prepare_messages_for_api(messages), **kwargs)
        cost_output = self._calculate_cost(response)
        GLOBAL_MODEL_STATS.add(cost_output["cost"])
        try:
            actions = self._parse_actions(response)
        except FormatError as e:
            e.messages[0]["extra"].update(cost_output)
            # hasattr guard: litellm.responses() returns a pydantic object, but tests
            # may inject a plain dict; dict(response) is the correct fallback in that case.
            # Inner try: if serialization itself fails, repr() guarantees the key is always set.
            try:
                e.messages[0]["extra"]["response"] = (
                    response.model_dump(mode="json") if hasattr(response, "model_dump") else dict(response)
                )
            except Exception:
                e.messages[0]["extra"]["response"] = repr(response)
            raise
        message = response.model_dump() if hasattr(response, "model_dump") else dict(response)
        message["extra"] = {
            "actions": actions,
            **cost_output,
            "timestamp": time.time(),
        }
        return message

    def _parse_actions(self, response) -> list[dict]:
        return parse_toolcall_actions_response(
            getattr(response, "output", []),
            format_error_template=self.config.format_error_template,
            template_kwargs={"finish_reason": finish_reason_from_responses_api(response)},
        )

    def format_observation_messages(
        self, message: dict, outputs: list[dict], template_vars: dict | None = None
    ) -> list[dict]:
        """Format execution outputs into tool result messages."""
        actions = message.get("extra", {}).get("actions", [])
        return format_toolcall_observation_messages(
            actions=actions,
            outputs=outputs,
            observation_template=self.config.observation_template,
            template_vars=template_vars,
            multimodal_regex=self.config.multimodal_regex,
        )

When to use this model

Use this model class when you want to use OpenAI's Responses API with native tool calling.
This is particularly useful for models like GPT-5 that benefit from the extended thinking/reasoning capabilities provided by the Responses API.
This model maintains conversation state across turns using previous_response_id.

Usage

To use the Response API model, specify model_class: "litellm_response" in your agent config:

model:
  model_class: "litellm_response"
  model_name: "openai/gpt-5.2"
  model_kwargs:
    drop_params: true
    reasoning:
      effort: "high"

Or via command line:

mini -m "openai/gpt-5.2" --model-class litellm_response

minisweagent.models.litellm_response_model

logger `module-attribute`

logger = logging.getLogger('litellm_response_model')

LitellmResponseModelConfig

Bases: LitellmModelConfig

LitellmResponseModel

LitellmResponseModel(
    *,
    config_class: Callable = LitellmResponseModelConfig,
    **kwargs,
)

Bases: LitellmModel

Source code in src/minisweagent/models/litellm_response_model.py

def __init__(self, *, config_class: Callable = LitellmResponseModelConfig, **kwargs):
    super().__init__(config_class=config_class, **kwargs)

query

query(messages: list[dict[str, str]], **kwargs) -> dict

Source code in src/minisweagent/models/litellm_response_model.py

def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
    for attempt in retry(logger=logger, abort_exceptions=self.abort_exceptions):
        with attempt:
            response = self._query(self._prepare_messages_for_api(messages), **kwargs)
    cost_output = self._calculate_cost(response)
    GLOBAL_MODEL_STATS.add(cost_output["cost"])
    try:
        actions = self._parse_actions(response)
    except FormatError as e:
        e.messages[0]["extra"].update(cost_output)
        # hasattr guard: litellm.responses() returns a pydantic object, but tests
        # may inject a plain dict; dict(response) is the correct fallback in that case.
        # Inner try: if serialization itself fails, repr() guarantees the key is always set.
        try:
            e.messages[0]["extra"]["response"] = (
                response.model_dump(mode="json") if hasattr(response, "model_dump") else dict(response)
            )
        except Exception:
            e.messages[0]["extra"]["response"] = repr(response)
        raise
    message = response.model_dump() if hasattr(response, "model_dump") else dict(response)
    message["extra"] = {
        "actions": actions,
        **cost_output,
        "timestamp": time.time(),
    }
    return message

format_observation_messages

format_observation_messages(
    message: dict,
    outputs: list[dict],
    template_vars: dict | None = None,
) -> list[dict]

Format execution outputs into tool result messages.

Source code in src/minisweagent/models/litellm_response_model.py

def format_observation_messages(
    self, message: dict, outputs: list[dict], template_vars: dict | None = None
) -> list[dict]:
    """Format execution outputs into tool result messages."""
    actions = message.get("extra", {}).get("actions", [])
    return format_toolcall_observation_messages(
        actions=actions,
        outputs=outputs,
        observation_template=self.config.observation_template,
        template_vars=template_vars,
        multimodal_regex=self.config.multimodal_regex,
    )

bug_report Something broken/unclear?

Open an issue on GitHub!

help Open-ended discussions

Join our Slack!

Our projects

Litellm Response Model

Usage

minisweagent.models.litellm_response_model

logger module-attribute

LitellmResponseModelConfig

LitellmResponseModel

query

format_observation_messages

logger `module-attribute`