Litellm Response API Model

LiteLLM Response API Model class

Read on GitHub

Full source code

import logging
from collections.abc import Callable
from dataclasses import dataclass

import litellm
from tenacity import (
    before_sleep_log,
    retry,
    retry_if_not_exception_type,
    stop_after_attempt,
    wait_exponential,
)

from minisweagent.models.litellm_model import LitellmModel, LitellmModelConfig
from minisweagent.models.utils.openai_utils import coerce_responses_text

logger = logging.getLogger("litellm_response_api_model")


@dataclass
class LitellmResponseAPIModelConfig(LitellmModelConfig):
    pass


class LitellmResponseAPIModel(LitellmModel):
    def __init__(self, *, config_class: Callable = LitellmResponseAPIModelConfig, **kwargs):
        super().__init__(config_class=config_class, **kwargs)
        self._previous_response_id: str | None = None

    @retry(
        stop=stop_after_attempt(10),
        wait=wait_exponential(multiplier=1, min=4, max=60),
        before_sleep=before_sleep_log(logger, logging.WARNING),
        retry=retry_if_not_exception_type(
            (
                litellm.exceptions.UnsupportedParamsError,
                litellm.exceptions.NotFoundError,
                litellm.exceptions.PermissionDeniedError,
                litellm.exceptions.ContextWindowExceededError,
                litellm.exceptions.APIError,
                litellm.exceptions.AuthenticationError,
                KeyboardInterrupt,
            )
        ),
    )
    def _query(self, messages: list[dict[str, str]], **kwargs):
        try:
            resp = litellm.responses(
                model=self.config.model_name,
                input=messages if self._previous_response_id is None else messages[-1:],
                previous_response_id=self._previous_response_id,
                **(self.config.model_kwargs | kwargs),
            )
            self._previous_response_id = getattr(resp, "id", None)
            return resp
        except litellm.exceptions.AuthenticationError as e:
            e.message += " You can permanently set your API key with `mini-extra config set KEY VALUE`."
            raise e

    def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
        response = self._query(messages, **kwargs)
        print(response)
        text = coerce_responses_text(response)
        try:
            cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
        except Exception as e:
            logger.critical(
                f"Error calculating cost for model {self.config.model_name}: {e}. "
                "Please check the 'Updating the model registry' section in the documentation. "
                "http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
            )
            raise
        self.n_calls += 1
        self.cost += cost
        from minisweagent.models import GLOBAL_MODEL_STATS

        GLOBAL_MODEL_STATS.add(cost)
        return {
            "content": text,
        }

When to use this model

Use this model class when you want to use OpenAI's Responses API (previously called the Chat Completions API with streaming enabled).
This is particularly useful for models like GPT-5 that benefit from the extended thinking/reasoning capabilities provided by the Responses API.

Usage

To use the Response API model, specify model_class: "litellm_response" in your agent config:

model:
  model_class: "litellm_response"
  model_name: "openai/gpt-5.1"
  model_kwargs:
    drop_params: true
    reasoning:
      effort: "medium"
    text:
      verbosity: "medium"

Or via command line:

mini -m "openai/gpt-5-mini" --model-class litellm_response

minisweagent.models.litellm_response_api_model

logger `module-attribute`

logger = getLogger('litellm_response_api_model')

LitellmResponseAPIModelConfig `dataclass`

LitellmResponseAPIModelConfig(
    model_name: str,
    model_kwargs: dict[str, Any] = dict(),
    litellm_model_registry: Path | str | None = getenv(
        "LITELLM_MODEL_REGISTRY_PATH"
    ),
    set_cache_control: Literal["default_end"] | None = None,
    cost_tracking: Literal[
        "default", "ignore_errors"
    ] = getenv("MSWEA_COST_TRACKING", "default"),
)

Bases: LitellmModelConfig

LitellmResponseAPIModel

LitellmResponseAPIModel(
    *,
    config_class: Callable = LitellmResponseAPIModelConfig,
    **kwargs,
)

Bases: LitellmModel

Source code in src/minisweagent/models/litellm_response_api_model.py

def __init__(self, *, config_class: Callable = LitellmResponseAPIModelConfig, **kwargs):
    super().__init__(config_class=config_class, **kwargs)
    self._previous_response_id: str | None = None

query

query(messages: list[dict[str, str]], **kwargs) -> dict

Source code in src/minisweagent/models/litellm_response_api_model.py

def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
    response = self._query(messages, **kwargs)
    print(response)
    text = coerce_responses_text(response)
    try:
        cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
    except Exception as e:
        logger.critical(
            f"Error calculating cost for model {self.config.model_name}: {e}. "
            "Please check the 'Updating the model registry' section in the documentation. "
            "http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
        )
        raise
    self.n_calls += 1
    self.cost += cost
    from minisweagent.models import GLOBAL_MODEL_STATS

    GLOBAL_MODEL_STATS.add(cost)
    return {
        "content": text,
    }

bug_report Something broken/unclear?

Open an issue on GitHub!

help Open-ended discussions

Join our Slack!

Our projects