Add tests for WebSearch interception with chat completions API

2026-07-05 21:08:13 +00:00 · 2026-02-09 13:41:29 +05:30
parent 3d49388d8e
commit 4e94ecb08d
2 changed files with 398 additions and 136 deletions
@@ -1,136 +0,0 @@
-"""
-Test script for WebSearch interception with chat completions API.
-
-This script demonstrates how to use the websearch_interception callback
-with litellm.acompletion() for transparent server-side web search execution.
-"""
-import asyncio
-import litellm
-
-# Enable verbose logging to see what's happening
-litellm.set_verbose = True
-
-
-async def test_websearch_chat_completion():
-    """Test websearch interception with chat completions API."""
-    
-    # Configure WebSearch interception
-    litellm.callbacks = ["websearch_interception"]
-    
-    print("\n" + "="*80)
-    print("Testing WebSearch Interception with Chat Completions API")
-    print("="*80 + "\n")
-    
-    # User makes a simple completion call with tools
-    print("Making request to GPT-4o with litellm_web_search tool...")
-    print("Question: What's the weather in San Francisco today?")
-    print("\nExpected behavior:")
-    print("1. Model calls litellm_web_search tool")
-    print("2. Server executes web search automatically")
-    print("3. Server makes follow-up request with search results")
-    print("4. User gets final answer\n")
-    
-    response = await litellm.acompletion(
-        model="gpt-4o",
-        messages=[
-            {"role": "user", "content": "What's the weather in San Francisco today?"}
-        ],
-        tools=[
-            {
-                "type": "function",
-                "function": {
-                    "name": "litellm_web_search",
-                    "description": "Search the web for information",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "query": {"type": "string", "description": "Search query"}
-                        },
-                        "required": ["query"]
-                    }
-                }
-            }
-        ]
-    )
-    
-    print("\n" + "-"*80)
-    print("FINAL RESPONSE:")
-    print("-"*80)
-    print(f"\nContent: {response.choices[0].message.content}")
-    print(f"\nFinish reason: {response.choices[0].finish_reason}")
-    
-    # Check if we got tool_calls (should NOT if agentic loop worked)
-    if hasattr(response.choices[0].message, 'tool_calls') and response.choices[0].message.tool_calls:
-        print("\n⚠️  WARNING: Got tool_calls in response!")
-        print("This means the agentic loop did NOT execute automatically.")
-        print(f"Tool calls: {response.choices[0].message.tool_calls}")
-    else:
-        print("\n✅ SUCCESS: No tool_calls in response!")
-        print("The agentic loop executed automatically and returned the final answer.")
-    
-    print("\n" + "="*80 + "\n")
-
-
-async def test_streaming_websearch():
-    """Test websearch interception with streaming."""
-    
-    # Configure WebSearch interception
-    litellm.callbacks = ["websearch_interception"]
-    
-    print("\n" + "="*80)
-    print("Testing WebSearch Interception with STREAMING")
-    print("="*80 + "\n")
-    
-    print("Making STREAMING request to GPT-4o with litellm_web_search tool...")
-    print("Question: What are the latest AI news?")
-    
-    response = await litellm.acompletion(
-        model="gpt-4o",
-        messages=[
-            {"role": "user", "content": "What are the latest AI news from today?"}
-        ],
-        tools=[
-            {
-                "type": "function",
-                "function": {
-                    "name": "litellm_web_search",
-                    "description": "Search the web for information",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "query": {"type": "string"}
-                        }
-                    }
-                }
-            }
-        ],
-        stream=True
-    )
-    
-    print("\n" + "-"*80)
-    print("STREAMING RESPONSE:")
-    print("-"*80 + "\n")
-    
-    full_content = ""
-    async for chunk in response:
-        if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
-            content = chunk.choices[0].delta.content
-            print(content, end="", flush=True)
-            full_content += content
-    
-    print("\n\n✅ Streaming completed successfully!")
-    print(f"Total content length: {len(full_content)} chars")
-    print("\n" + "="*80 + "\n")
-
-
-if __name__ == "__main__":
-    print("\nWebSearch Interception Test Suite")
-    print("==================================\n")
-    print("This test demonstrates transparent server-side web search execution.")
-    print("The agentic loop happens automatically - user just gets the final answer.\n")
-    
-    # Run tests
-    asyncio.run(test_websearch_chat_completion())
-    
-    # Uncomment to test streaming
-    # asyncio.run(test_streaming_websearch())
@@ -0,0 +1,398 @@
+"""
+Integration tests for WebSearch interception with chat completions API.
+
+Tests the end-to-end flow of websearch_interception callback with
+litellm.acompletion() for transparent server-side web search execution.
+"""
+import os
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+import litellm
+from litellm.integrations.websearch_interception.handler import (
+    WebSearchInterceptionLogger,
+)
+from litellm.types.utils import LlmProviders, ModelResponse
+
+
+@pytest.fixture
+def mock_search_response():
+    """Mock search response from litellm.asearch()"""
+    mock_response = MagicMock()
+    mock_response.results = [
+        MagicMock(
+            title="Weather in San Francisco",
+            url="https://weather.com/sf",
+            snippet="Current weather: 65°F, partly cloudy",
+        )
+    ]
+    return mock_response
+
+
+@pytest.fixture
+def websearch_logger():
+    """Create a WebSearchInterceptionLogger instance"""
+    return WebSearchInterceptionLogger(
+        enabled_providers=[LlmProviders.OPENAI, LlmProviders.MINIMAX]
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(
+    os.environ.get("OPENAI_API_KEY") is None,
+    reason="OPENAI_API_KEY not set",
+)
+async def test_websearch_chat_completion_with_openai():
+    """Test websearch interception with OpenAI chat completions API.
+    
+    This test verifies that:
+    1. Model calls litellm_web_search tool
+    2. Server executes web search automatically
+    3. Server makes follow-up request with search results
+    4. User gets final answer without tool_calls
+    """
+    # Configure WebSearch interception
+    original_callbacks = litellm.callbacks.copy() if litellm.callbacks else []
+    websearch_logger = WebSearchInterceptionLogger(
+        enabled_providers=[LlmProviders.OPENAI]
+    )
+    litellm.callbacks = [websearch_logger]
+    
+    try:
+        response = await litellm.acompletion(
+            model="gpt-4o-mini",  # Use cheaper model for testing
+            messages=[
+                {"role": "user", "content": "What's the weather in San Francisco today?"}
+            ],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "litellm_web_search",
+                        "description": "Search the web for information",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "query": {
+                                    "type": "string",
+                                    "description": "Search query",
+                                }
+                            },
+                            "required": ["query"],
+                        },
+                    },
+                }
+            ],
+        )
+        
+        # Verify response structure
+        assert isinstance(response, ModelResponse)
+        assert response.choices[0].message.content is not None
+        assert len(response.choices[0].message.content) > 0
+        
+        # If agentic loop worked, we should NOT have tool_calls in final response
+        # (they should have been executed and replaced with final answer)
+        if hasattr(response.choices[0].message, "tool_calls"):
+            # If tool_calls exist, it means agentic loop didn't run
+            # This could happen if search tool is not configured
+            pytest.skip(
+                "Agentic loop did not execute - search tool may not be configured"
+            )
+        
+        # Verify we got a meaningful response
+        assert response.choices[0].finish_reason in ["stop", "end_turn"]
+        
+    finally:
+        # Restore original callbacks
+        litellm.callbacks = original_callbacks
+
+
+@pytest.mark.asyncio
+async def test_websearch_chat_completion_hook_detection():
+    """Test that websearch hook correctly detects tool calls in response."""
+    from litellm.types.utils import (
+        ChatCompletionMessageToolCall,
+        Choices,
+        Function,
+        Message,
+    )
+    
+    websearch_logger = WebSearchInterceptionLogger(
+        enabled_providers=[LlmProviders.OPENAI]
+    )
+    
+    # Mock response with litellm_web_search tool call
+    mock_response = ModelResponse(
+        id="test-123",
+        choices=[
+            Choices(
+                finish_reason="tool_calls",
+                index=0,
+                message=Message(
+                    role="assistant",
+                    content=None,
+                    tool_calls=[
+                        ChatCompletionMessageToolCall(
+                            id="call_123",
+                            type="function",
+                            function=Function(
+                                name="litellm_web_search",
+                                arguments='{"query": "weather in SF"}',
+                            ),
+                        )
+                    ],
+                )
+            )
+        ],
+        model="gpt-4o",
+        object="chat.completion",
+        created=1234567890,
+    )
+    
+    # Test should_run_chat_completion_agentic_loop
+    should_run, tools_dict = (
+        await websearch_logger.async_should_run_chat_completion_agentic_loop(
+            response=mock_response,
+            model="gpt-4o",
+            messages=[{"role": "user", "content": "What's the weather?"}],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {"name": "litellm_web_search"},
+                }
+            ],
+            stream=False,
+            custom_llm_provider="openai",
+            kwargs={},
+        )
+    )
+    
+    # Verify hook detected the tool call
+    assert should_run is True
+    assert "tool_calls" in tools_dict
+    assert len(tools_dict["tool_calls"]) == 1
+    assert tools_dict["tool_calls"][0]["name"] == "litellm_web_search"
+    assert tools_dict["response_format"] == "openai"
+
+
+@pytest.mark.asyncio
+async def test_websearch_not_triggered_without_tool():
+    """Test that websearch hook is NOT triggered when no web search tool in request."""
+    from litellm.types.utils import Choices, Message
+    
+    websearch_logger = WebSearchInterceptionLogger(
+        enabled_providers=[LlmProviders.OPENAI]
+    )
+    
+    mock_response = ModelResponse(
+        id="test-123",
+        choices=[
+            Choices(
+                finish_reason="stop",
+                index=0,
+                message=Message(
+                    role="assistant",
+                    content="Here's the answer",
+                    tool_calls=None,
+                )
+            )
+        ],
+        model="gpt-4o",
+        object="chat.completion",
+        created=1234567890,
+    )
+    
+    # Test without web search tool
+    should_run, tools_dict = (
+        await websearch_logger.async_should_run_chat_completion_agentic_loop(
+            response=mock_response,
+            model="gpt-4o",
+            messages=[{"role": "user", "content": "Hello"}],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {"name": "some_other_tool"},
+                }
+            ],
+            stream=False,
+            custom_llm_provider="openai",
+            kwargs={},
+        )
+    )
+    
+    # Verify hook did NOT trigger
+    assert should_run is False
+    assert tools_dict == {}
+
+
+@pytest.mark.asyncio
+async def test_websearch_not_triggered_for_disabled_provider():
+    """Test that websearch hook is NOT triggered for providers not in enabled_providers."""
+    from litellm.types.utils import (
+        ChatCompletionMessageToolCall,
+        Choices,
+        Function,
+        Message,
+    )
+
+    # Only enable bedrock
+    websearch_logger = WebSearchInterceptionLogger(
+        enabled_providers=[LlmProviders.BEDROCK]
+    )
+    
+    mock_response = ModelResponse(
+        id="test-123",
+        choices=[
+            Choices(
+                finish_reason="tool_calls",
+                index=0,
+                message=Message(
+                    role="assistant",
+                    content=None,
+                    tool_calls=[
+                        ChatCompletionMessageToolCall(
+                            id="call_123",
+                            type="function",
+                            function=Function(
+                                name="litellm_web_search",
+                                arguments='{"query": "test"}',
+                            ),
+                        )
+                    ],
+                )
+            )
+        ],
+        model="gpt-4o",
+        object="chat.completion",
+        created=1234567890,
+    )
+    
+    # Test with OpenAI provider (not enabled)
+    should_run, tools_dict = (
+        await websearch_logger.async_should_run_chat_completion_agentic_loop(
+            response=mock_response,
+            model="gpt-4o",
+            messages=[{"role": "user", "content": "test"}],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {"name": "litellm_web_search"},
+                }
+            ],
+            stream=False,
+            custom_llm_provider="openai",  # Not in enabled_providers
+            kwargs={},
+        )
+    )
+    
+    # Verify hook did NOT trigger
+    assert should_run is False
+    assert tools_dict == {}
+
+
+@pytest.mark.asyncio
+async def test_websearch_json_serialization_fix():
+    """Test that tool call arguments are properly JSON serialized.
+    
+    Regression test for the bug where arguments were converted to Python
+    string representation instead of proper JSON, causing providers like
+    MiniMax to reject requests with 'invalid function arguments json string'.
+    """
+    from litellm.integrations.websearch_interception.transformation import (
+        WebSearchTransformation,
+    )
+
+    # Mock tool calls with dict input
+    tool_calls = [
+        {
+            "id": "call_123",
+            "name": "litellm_web_search",
+            "input": {"query": "weather in SF"},  # Dict input
+        }
+    ]
+    
+    search_results = ["Weather: 65°F, partly cloudy"]
+    
+    # Transform to OpenAI format
+    assistant_message, tool_messages = WebSearchTransformation.transform_response(
+        tool_calls=tool_calls,
+        search_results=search_results,
+        response_format="openai",
+    )
+    
+    # Verify arguments are properly JSON serialized
+    import json
+    
+    arguments_str = assistant_message["tool_calls"][0]["function"]["arguments"]
+    
+    # Should be valid JSON
+    parsed_args = json.loads(arguments_str)
+    assert parsed_args == {"query": "weather in SF"}
+    
+    # Should NOT be Python string representation like "{'query': 'weather in SF'}"
+    assert arguments_str == '{"query": "weather in SF"}'
+    assert arguments_str != "{'query': 'weather in SF'}"
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(
+    os.environ.get("OPENAI_API_KEY") is None
+    or os.environ.get("PERPLEXITY_API_KEY") is None,
+    reason="OPENAI_API_KEY or PERPLEXITY_API_KEY not set",
+)
+async def test_websearch_streaming_conversion():
+    """Test that streaming requests are converted to non-streaming for web search.
+    
+    When stream=True is passed with web search tools, the handler should:
+    1. Convert stream=True to stream=False for initial request
+    2. Execute web search
+    3. Convert final response back to streaming
+    """
+    websearch_logger = WebSearchInterceptionLogger(
+        enabled_providers=[LlmProviders.OPENAI], search_tool_name="perplexity-search"
+    )
+    litellm.callbacks = [websearch_logger]
+    
+    try:
+        response = await litellm.acompletion(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "user", "content": "What's the latest AI news?"}
+            ],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "litellm_web_search",
+                        "description": "Search the web",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {"query": {"type": "string"}},
+                        },
+                    },
+                }
+            ],
+            stream=True,
+        )
+        
+        # Response should be a streaming iterator
+        chunks = []
+        async for chunk in response:
+            chunks.append(chunk)
+        
+        # Verify we got streaming chunks
+        assert len(chunks) > 0
+        
+        # Verify chunks have expected structure
+        for chunk in chunks:
+            assert hasattr(chunk, "choices")
+            assert len(chunk.choices) > 0
+            
+    finally:
+        litellm.callbacks = []
+
+
+if __name__ == "__main__":
+    # Run with: pytest test_websearch_chat_completion.py -v -s
+    pytest.main([__file__, "-v", "-s"])