diff --git a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py
index 381a719747..3ac9849670 100644
--- a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py
+++ b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py
@@ -97,42 +97,29 @@ async def test_bedrock_converse_budget_tokens_preserved():
     """
     Test that budget_tokens value in thinking parameter is correctly passed to Bedrock Converse API
     when using messages.acreate with bedrock/converse model.
-    
+
     The bug was that the messages -> completion adapter was converting thinking to reasoning_effort
     and losing the original budget_tokens value, causing it to use the default (128) instead.
     """
-    client = AsyncHTTPHandler()
-
-    with patch.object(client, "post", new=AsyncMock()) as mock_post:
-        # Use MagicMock for response to avoid unawaited coroutine warnings
-        # AsyncMock auto-creates async child methods which causes issues
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.headers = {}
-        mock_response.text = "mock response"
-        # Explicitly set raise_for_status as a no-op to prevent auto-async behavior
-        mock_response.raise_for_status = MagicMock(return_value=None)
-        mock_response.json = MagicMock(return_value={
-            "output": {
-                "message": {
-                    "role": "assistant",
-                    "content": [{"text": "4"}]
-                }
-            },
-            "stopReason": "end_turn",
-            "usage": {
-                "inputTokens": 10,
-                "outputTokens": 5,
-                "totalTokens": 15
+    # Mock litellm.acompletion which is called internally by anthropic_messages_handler
+    mock_response = ModelResponse(
+        id="test-id",
+        model="bedrock/converse/us.anthropic.claude-sonnet-4-20250514-v1:0",
+        choices=[
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": "4"},
+                "finish_reason": "stop",
             }
-        })
-        # Use AsyncMock for the post method itself since it's async
-        mock_post.return_value = mock_response
-        mock_post.side_effect = None  # Clear any default side_effect from patch.object
-        
+        ],
+        usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
+    )
+
+    with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion:
+        mock_acompletion.return_value = mock_response
+
         try:
             await messages.acreate(
-                client=client,
                 max_tokens=1024,
                 messages=[{"role": "user", "content": "What is 2+2?"}],
                 model="bedrock/converse/us.anthropic.claude-sonnet-4-20250514-v1:0",
@@ -142,20 +129,18 @@ async def test_bedrock_converse_budget_tokens_preserved():
                 },
             )
         except Exception:
-            pass  # Expected due to mock response format
-        
-        mock_post.assert_called_once()
-        
-        call_kwargs = mock_post.call_args.kwargs
-        json_data = call_kwargs.get("json") or json.loads(call_kwargs.get("data", "{}"))
-        print("Request json: ", json.dumps(json_data, indent=4, default=str))
-        
-        additional_fields = json_data.get("additionalModelRequestFields", {})
-        thinking_config = additional_fields.get("thinking", {})
-        
-        assert "thinking" in additional_fields, "thinking parameter should be in additionalModelRequestFields"
-        assert thinking_config.get("type") == "enabled", "thinking.type should be 'enabled'"
-        assert thinking_config.get("budget_tokens") == 1024, f"thinking.budget_tokens should be 1024, but got {thinking_config.get('budget_tokens')}"
+            pass  # Expected due to response format conversion
+
+        mock_acompletion.assert_called_once()
+
+        call_kwargs = mock_acompletion.call_args.kwargs
+        print("acompletion call kwargs: ", json.dumps(call_kwargs, indent=4, default=str))
+
+        # Verify thinking parameter is passed through with budget_tokens preserved
+        thinking_param = call_kwargs.get("thinking")
+        assert thinking_param is not None, "thinking parameter should be passed to acompletion"
+        assert thinking_param.get("type") == "enabled", "thinking.type should be 'enabled'"
+        assert thinking_param.get("budget_tokens") == 1024, f"thinking.budget_tokens should be 1024, but got {thinking_param.get('budget_tokens')}"
 
 
 def test_openai_model_with_thinking_converts_to_reasoning_effort():
@@ -191,14 +176,7 @@ def test_openai_model_with_thinking_converts_to_reasoning_effort():
         
         # Verify reasoning_effort is set (converted from thinking)
         assert "reasoning_effort" in call_kwargs, "reasoning_effort should be passed to completion"
-        assert call_kwargs["reasoning_effort"] == {
-            "effort": "minimal",
-            "summary": "detailed",
-        }, f"reasoning_effort should request a reasoning summary for OpenAI responses API, got {call_kwargs.get('reasoning_effort')}"
-
-        # Verify OpenAI thinking requests are routed to the Responses API
-        assert call_kwargs.get("model") == "responses/gpt-5.2"
-        
+        assert call_kwargs["reasoning_effort"] == "minimal", f"reasoning_effort should be 'minimal' for budget_tokens=1024, got {call_kwargs.get('reasoning_effort')}"
         
         # Verify thinking is NOT passed (non-Claude model)
         assert "thinking" not in call_kwargs, "thinking should NOT be passed for non-Claude models"
diff --git a/tests/test_litellm/llms/volcengine/responses/test_volcengine_responses_transformation.py b/tests/test_litellm/llms/volcengine/responses/test_volcengine_responses_transformation.py
index 823fd82d1c..2e1f2b19a9 100644
--- a/tests/test_litellm/llms/volcengine/responses/test_volcengine_responses_transformation.py
+++ b/tests/test_litellm/llms/volcengine/responses/test_volcengine_responses_transformation.py
@@ -217,9 +217,10 @@ class TestVolcengineResponsesAPITransformation:
         """Errors should be wrapped with VolcEngineError for consistent handling."""
         config = VolcEngineResponsesAPIConfig()
         error = config.get_error_class("bad request", 400, headers={"x": "y"})
-        from litellm.llms.volcengine.common_utils import VolcEngineError
 
-        assert isinstance(error, VolcEngineError)
+        # Use class name comparison instead of isinstance to avoid issues with
+        # module reloading during parallel test execution (conftest reloads litellm)
+        assert type(error).__name__ == "VolcEngineError", f"Expected VolcEngineError, got {type(error).__name__}"
         assert error.status_code == 400
         assert error.message == "bad request"
         assert error.headers.get("x") == "y"