Litellm Response API Model
LiteLLM Response API Model class
Full source code
import logging
from collections.abc import Callable
from dataclasses import dataclass
import litellm
from tenacity import (
before_sleep_log,
retry,
retry_if_not_exception_type,
stop_after_attempt,
wait_exponential,
)
from minisweagent.models.litellm_model import LitellmModel, LitellmModelConfig
from minisweagent.models.utils.openai_utils import coerce_responses_text
logger = logging.getLogger("litellm_response_api_model")
@dataclass
class LitellmResponseAPIModelConfig(LitellmModelConfig):
pass
class LitellmResponseAPIModel(LitellmModel):
def __init__(self, *, config_class: Callable = LitellmResponseAPIModelConfig, **kwargs):
super().__init__(config_class=config_class, **kwargs)
self._previous_response_id: str | None = None
@retry(
stop=stop_after_attempt(10),
wait=wait_exponential(multiplier=1, min=4, max=60),
before_sleep=before_sleep_log(logger, logging.WARNING),
retry=retry_if_not_exception_type(
(
litellm.exceptions.UnsupportedParamsError,
litellm.exceptions.NotFoundError,
litellm.exceptions.PermissionDeniedError,
litellm.exceptions.ContextWindowExceededError,
litellm.exceptions.APIError,
litellm.exceptions.AuthenticationError,
KeyboardInterrupt,
)
),
)
def _query(self, messages: list[dict[str, str]], **kwargs):
try:
resp = litellm.responses(
model=self.config.model_name,
input=messages if self._previous_response_id is None else messages[-1:],
previous_response_id=self._previous_response_id,
**(self.config.model_kwargs | kwargs),
)
self._previous_response_id = getattr(resp, "id", None)
return resp
except litellm.exceptions.AuthenticationError as e:
e.message += " You can permanently set your API key with `mini-extra config set KEY VALUE`."
raise e
def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
response = self._query(messages, **kwargs)
print(response)
text = coerce_responses_text(response)
try:
cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
except Exception as e:
logger.critical(
f"Error calculating cost for model {self.config.model_name}: {e}. "
"Please check the 'Updating the model registry' section in the documentation. "
"http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
)
raise
self.n_calls += 1
self.cost += cost
from minisweagent.models import GLOBAL_MODEL_STATS
GLOBAL_MODEL_STATS.add(cost)
return {
"content": text,
}
When to use this model
- Use this model class when you want to use OpenAI's Responses API (previously called the Chat Completions API with streaming enabled).
- This is particularly useful for models like GPT-5 that benefit from the extended thinking/reasoning capabilities provided by the Responses API.
Usage
To use the Response API model, specify model_class: "litellm_response" in your agent config:
model:
model_class: "litellm_response"
model_name: "openai/gpt-5.1"
model_kwargs:
drop_params: true
reasoning:
effort: "medium"
text:
verbosity: "medium"
Or via command line:
mini -m "openai/gpt-5-mini" --model-class litellm_response
minisweagent.models.litellm_response_api_model
logger
module-attribute
logger = getLogger('litellm_response_api_model')
LitellmResponseAPIModelConfig
dataclass
LitellmResponseAPIModelConfig(
model_name: str,
model_kwargs: dict[str, Any] = dict(),
litellm_model_registry: Path | str | None = getenv(
"LITELLM_MODEL_REGISTRY_PATH"
),
set_cache_control: Literal["default_end"] | None = None,
cost_tracking: Literal[
"default", "ignore_errors"
] = getenv("MSWEA_COST_TRACKING", "default"),
)
Bases: LitellmModelConfig
LitellmResponseAPIModel
LitellmResponseAPIModel(
*,
config_class: Callable = LitellmResponseAPIModelConfig,
**kwargs,
)
Bases: LitellmModel
Source code in src/minisweagent/models/litellm_response_api_model.py
26 27 28 | |
query
query(messages: list[dict[str, str]], **kwargs) -> dict
Source code in src/minisweagent/models/litellm_response_api_model.py
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | |