diff --git a/litellm/__init__.py b/litellm/__init__.py index 6e42f2c1ea..50fa0e7675 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -197,6 +197,9 @@ telemetry = True max_tokens: int = DEFAULT_MAX_TOKENS # OpenAI Defaults drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False)) modify_params = bool(os.getenv("LITELLM_MODIFY_PARAMS", False)) +use_chat_completions_url_for_anthropic_messages: bool = bool( + os.getenv("LITELLM_USE_CHAT_COMPLETIONS_URL_FOR_ANTHROPIC_MESSAGES", False) +) # When True, routes OpenAI /v1/messages requests to chat/completions instead of the Responses API retry = True ### AUTH ### api_key: Optional[str] = None diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py index 6fe0fcd4fd..5b215c1fe5 100644 --- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py +++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py @@ -30,11 +30,17 @@ from .utils import AnthropicMessagesRequestUtils, mock_response # Providers that are routed directly to the OpenAI Responses API instead of # going through chat/completions. -_RESPONSES_API_PROVIDERS = frozenset({"openai", "azure", "azure_text"}) +_RESPONSES_API_PROVIDERS = frozenset({"openai"}) def _should_route_to_responses_api(custom_llm_provider: Optional[str]) -> bool: - """Return True when the provider should use the Responses API path.""" + """Return True when the provider should use the Responses API path. + + Set ``litellm.use_chat_completions_url_for_anthropic_messages = True`` to + opt out and route OpenAI/Azure requests through chat/completions instead. + """ + if litellm.use_chat_completions_url_for_anthropic_messages: + return False return custom_llm_provider in _RESPONSES_API_PROVIDERS ####### ENVIRONMENT VARIABLES ###################