Skip to content

Litellm Response API Model

LiteLLM Response API Model class

Full source code
import logging
from collections.abc import Callable
from dataclasses import dataclass

import litellm
from tenacity import (
    before_sleep_log,
    retry,
    retry_if_not_exception_type,
    stop_after_attempt,
    wait_exponential,
)

from minisweagent.models.litellm_model import LitellmModel, LitellmModelConfig
from minisweagent.models.utils.openai_utils import coerce_responses_text

logger = logging.getLogger("litellm_response_api_model")


@dataclass
class LitellmResponseAPIModelConfig(LitellmModelConfig):
    pass


class LitellmResponseAPIModel(LitellmModel):
    def __init__(self, *, config_class: Callable = LitellmResponseAPIModelConfig, **kwargs):
        super().__init__(config_class=config_class, **kwargs)
        self._previous_response_id: str | None = None

    @retry(
        stop=stop_after_attempt(10),
        wait=wait_exponential(multiplier=1, min=4, max=60),
        before_sleep=before_sleep_log(logger, logging.WARNING),
        retry=retry_if_not_exception_type(
            (
                litellm.exceptions.UnsupportedParamsError,
                litellm.exceptions.NotFoundError,
                litellm.exceptions.PermissionDeniedError,
                litellm.exceptions.ContextWindowExceededError,
                litellm.exceptions.APIError,
                litellm.exceptions.AuthenticationError,
                KeyboardInterrupt,
            )
        ),
    )
    def _query(self, messages: list[dict[str, str]], **kwargs):
        try:
            resp = litellm.responses(
                model=self.config.model_name,
                input=messages if self._previous_response_id is None else messages[-1:],
                previous_response_id=self._previous_response_id,
                **(self.config.model_kwargs | kwargs),
            )
            self._previous_response_id = getattr(resp, "id", None)
            return resp
        except litellm.exceptions.AuthenticationError as e:
            e.message += " You can permanently set your API key with `mini-extra config set KEY VALUE`."
            raise e

    def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
        response = self._query(messages, **kwargs)
        print(response)
        text = coerce_responses_text(response)
        try:
            cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
        except Exception as e:
            logger.critical(
                f"Error calculating cost for model {self.config.model_name}: {e}. "
                "Please check the 'Updating the model registry' section in the documentation. "
                "http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
            )
            raise
        self.n_calls += 1
        self.cost += cost
        from minisweagent.models import GLOBAL_MODEL_STATS

        GLOBAL_MODEL_STATS.add(cost)
        return {
            "content": text,
        }

When to use this model

  • Use this model class when you want to use OpenAI's Responses API (previously called the Chat Completions API with streaming enabled).
  • This is particularly useful for models like GPT-5 that benefit from the extended thinking/reasoning capabilities provided by the Responses API.

Usage

To use the Response API model, specify model_class: "litellm_response" in your agent config:

model:
  model_class: "litellm_response"
  model_name: "openai/gpt-5.1"
  model_kwargs:
    drop_params: true
    reasoning:
      effort: "medium"
    text:
      verbosity: "medium"

Or via command line:

mini -m "openai/gpt-5-mini" --model-class litellm_response

minisweagent.models.litellm_response_api_model

logger module-attribute

logger = getLogger('litellm_response_api_model')

LitellmResponseAPIModelConfig dataclass

LitellmResponseAPIModelConfig(
    model_name: str,
    model_kwargs: dict[str, Any] = dict(),
    litellm_model_registry: Path | str | None = getenv(
        "LITELLM_MODEL_REGISTRY_PATH"
    ),
    set_cache_control: Literal["default_end"] | None = None,
    cost_tracking: Literal[
        "default", "ignore_errors"
    ] = getenv("MSWEA_COST_TRACKING", "default"),
)

LitellmResponseAPIModel

LitellmResponseAPIModel(
    *,
    config_class: Callable = LitellmResponseAPIModelConfig,
    **kwargs,
)

Bases: LitellmModel

Source code in src/minisweagent/models/litellm_response_api_model.py
26
27
28
def __init__(self, *, config_class: Callable = LitellmResponseAPIModelConfig, **kwargs):
    super().__init__(config_class=config_class, **kwargs)
    self._previous_response_id: str | None = None

query

query(messages: list[dict[str, str]], **kwargs) -> dict
Source code in src/minisweagent/models/litellm_response_api_model.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
    response = self._query(messages, **kwargs)
    print(response)
    text = coerce_responses_text(response)
    try:
        cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
    except Exception as e:
        logger.critical(
            f"Error calculating cost for model {self.config.model_name}: {e}. "
            "Please check the 'Updating the model registry' section in the documentation. "
            "http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
        )
        raise
    self.n_calls += 1
    self.cost += cost
    from minisweagent.models import GLOBAL_MODEL_STATS

    GLOBAL_MODEL_STATS.add(cost)
    return {
        "content": text,
    }