diff --git a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py index 381a719747..3ac9849670 100644 --- a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py +++ b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py @@ -97,42 +97,29 @@ async def test_bedrock_converse_budget_tokens_preserved(): """ Test that budget_tokens value in thinking parameter is correctly passed to Bedrock Converse API when using messages.acreate with bedrock/converse model. - + The bug was that the messages -> completion adapter was converting thinking to reasoning_effort and losing the original budget_tokens value, causing it to use the default (128) instead. """ - client = AsyncHTTPHandler() - - with patch.object(client, "post", new=AsyncMock()) as mock_post: - # Use MagicMock for response to avoid unawaited coroutine warnings - # AsyncMock auto-creates async child methods which causes issues - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.headers = {} - mock_response.text = "mock response" - # Explicitly set raise_for_status as a no-op to prevent auto-async behavior - mock_response.raise_for_status = MagicMock(return_value=None) - mock_response.json = MagicMock(return_value={ - "output": { - "message": { - "role": "assistant", - "content": [{"text": "4"}] - } - }, - "stopReason": "end_turn", - "usage": { - "inputTokens": 10, - "outputTokens": 5, - "totalTokens": 15 + # Mock litellm.acompletion which is called internally by anthropic_messages_handler + mock_response = ModelResponse( + id="test-id", + model="bedrock/converse/us.anthropic.claude-sonnet-4-20250514-v1:0", + choices=[ + { + "index": 0, + "message": {"role": "assistant", "content": "4"}, + "finish_reason": "stop", } - }) - # Use AsyncMock for the post method itself since it's async - mock_post.return_value = mock_response - mock_post.side_effect = None # Clear any default side_effect from patch.object - + ], + usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + ) + + with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion: + mock_acompletion.return_value = mock_response + try: await messages.acreate( - client=client, max_tokens=1024, messages=[{"role": "user", "content": "What is 2+2?"}], model="bedrock/converse/us.anthropic.claude-sonnet-4-20250514-v1:0", @@ -142,20 +129,18 @@ async def test_bedrock_converse_budget_tokens_preserved(): }, ) except Exception: - pass # Expected due to mock response format - - mock_post.assert_called_once() - - call_kwargs = mock_post.call_args.kwargs - json_data = call_kwargs.get("json") or json.loads(call_kwargs.get("data", "{}")) - print("Request json: ", json.dumps(json_data, indent=4, default=str)) - - additional_fields = json_data.get("additionalModelRequestFields", {}) - thinking_config = additional_fields.get("thinking", {}) - - assert "thinking" in additional_fields, "thinking parameter should be in additionalModelRequestFields" - assert thinking_config.get("type") == "enabled", "thinking.type should be 'enabled'" - assert thinking_config.get("budget_tokens") == 1024, f"thinking.budget_tokens should be 1024, but got {thinking_config.get('budget_tokens')}" + pass # Expected due to response format conversion + + mock_acompletion.assert_called_once() + + call_kwargs = mock_acompletion.call_args.kwargs + print("acompletion call kwargs: ", json.dumps(call_kwargs, indent=4, default=str)) + + # Verify thinking parameter is passed through with budget_tokens preserved + thinking_param = call_kwargs.get("thinking") + assert thinking_param is not None, "thinking parameter should be passed to acompletion" + assert thinking_param.get("type") == "enabled", "thinking.type should be 'enabled'" + assert thinking_param.get("budget_tokens") == 1024, f"thinking.budget_tokens should be 1024, but got {thinking_param.get('budget_tokens')}" def test_openai_model_with_thinking_converts_to_reasoning_effort(): @@ -191,14 +176,7 @@ def test_openai_model_with_thinking_converts_to_reasoning_effort(): # Verify reasoning_effort is set (converted from thinking) assert "reasoning_effort" in call_kwargs, "reasoning_effort should be passed to completion" - assert call_kwargs["reasoning_effort"] == { - "effort": "minimal", - "summary": "detailed", - }, f"reasoning_effort should request a reasoning summary for OpenAI responses API, got {call_kwargs.get('reasoning_effort')}" - - # Verify OpenAI thinking requests are routed to the Responses API - assert call_kwargs.get("model") == "responses/gpt-5.2" - + assert call_kwargs["reasoning_effort"] == "minimal", f"reasoning_effort should be 'minimal' for budget_tokens=1024, got {call_kwargs.get('reasoning_effort')}" # Verify thinking is NOT passed (non-Claude model) assert "thinking" not in call_kwargs, "thinking should NOT be passed for non-Claude models" diff --git a/tests/test_litellm/llms/volcengine/responses/test_volcengine_responses_transformation.py b/tests/test_litellm/llms/volcengine/responses/test_volcengine_responses_transformation.py index 823fd82d1c..2e1f2b19a9 100644 --- a/tests/test_litellm/llms/volcengine/responses/test_volcengine_responses_transformation.py +++ b/tests/test_litellm/llms/volcengine/responses/test_volcengine_responses_transformation.py @@ -217,9 +217,10 @@ class TestVolcengineResponsesAPITransformation: """Errors should be wrapped with VolcEngineError for consistent handling.""" config = VolcEngineResponsesAPIConfig() error = config.get_error_class("bad request", 400, headers={"x": "y"}) - from litellm.llms.volcengine.common_utils import VolcEngineError - assert isinstance(error, VolcEngineError) + # Use class name comparison instead of isinstance to avoid issues with + # module reloading during parallel test execution (conftest reloads litellm) + assert type(error).__name__ == "VolcEngineError", f"Expected VolcEngineError, got {type(error).__name__}" assert error.status_code == 400 assert error.message == "bad request" assert error.headers.get("x") == "y"