[Feat] Enable Tool Calling for meta_llama (#11895)

* Enable Tool Calling for `meta_llama` (#11825) * feat: enable tools and function_call features * fix: ignore pydantic warnings for StreamingChoices from llama-api * docs: add tool calling examples * docs: change default models to Maverick * docs: fix output of tool use * test_map_openai_params --------- Co-authored-by: Young Han <110819238+seyeong-han@users.noreply.github.com>
2026-06-26 17:05:56 +00:00 · 2025-06-19 13:44:22 -07:00
parent e1764af890
commit 08b2b4f5f5
3 changed files with 108 additions and 36 deletions
@@ -45,7 +45,7 @@ os.environ["LLAMA_API_KEY"] = ""  # your Meta Llama API key
 messages = [{"content": "Hello, how are you?", "role": "user"}]

 # Meta Llama call
-response = completion(model="meta_llama/Llama-3.3-70B-Instruct", messages=messages)
+response = completion(model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8", messages=messages)
 ```

 ### Streaming
@@ -61,7 +61,7 @@ messages = [{"content": "Hello, how are you?", "role": "user"}]

 # Meta Llama call with streaming
 response = completion(
-    model="meta_llama/Llama-3.3-70B-Instruct",
+    model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
    messages=messages,
    stream=True
 )
@@ -70,6 +70,104 @@ for chunk in response:
    print(chunk)
 ```

+### Function Calling
+
+```python showLineNumbers title="Meta Llama Function Calling"
+import os
+import litellm
+from litellm import completion
+
+os.environ["LLAMA_API_KEY"] = ""  # your Meta Llama API key
+
+messages = [{"content": "What's the weather like in San Francisco?", "role": "user"}]
+
+# Define the function
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. San Francisco, CA"
+                    },
+                    "unit": {
+                        "type": "string",
+                        "enum": ["celsius", "fahrenheit"]
+                    }
+                },
+                "required": ["location"]
+            }
+        }
+    }
+]
+
+# Meta Llama call with function calling
+response = completion(
+    model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+    messages=messages,
+    tools=tools,
+    tool_choice="auto"
+)
+
+print(response.choices[0].message.tool_calls)
+```
+
+### Tool Use
+
+```python showLineNumbers title="Meta Llama Tool Use"
+import os
+import litellm
+from litellm import completion
+
+os.environ["LLAMA_API_KEY"] = ""  # your Meta Llama API key
+
+messages = [{"content": "Create a chart showing the population growth of New York City from 2010 to 2020", "role": "user"}]
+
+# Define the tools
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "create_chart",
+            "description": "Create a chart with the provided data",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "chart_type": {
+                        "type": "string",
+                        "enum": ["bar", "line", "pie", "scatter"],
+                        "description": "The type of chart to create"
+                    },
+                    "title": {
+                        "type": "string",
+                        "description": "The title of the chart"
+                    },
+                    "data": {
+                        "type": "object",
+                        "description": "The data to plot in the chart"
+                    }
+                },
+                "required": ["chart_type", "title", "data"]
+            }
+        }
+    }
+]
+
+# Meta Llama call with tool use
+response = completion(
+    model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+    messages=messages,
+    tools=tools,
+    tool_choice="auto"
+)
+
+print(response.choices[0].message.content)
+```

 ## Usage - LiteLLM Proxy

@@ -111,7 +209,7 @@ client = OpenAI(

 # Non-streaming response
 response = client.chat.completions.create(
-    model="meta_llama/Llama-3.3-70B-Instruct",
+    model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
    messages=[{"role": "user", "content": "Write a short poem about AI."}]
 )

@@ -129,7 +227,7 @@ client = OpenAI(

 # Streaming response
 response = client.chat.completions.create(
-    model="meta_llama/Llama-3.3-70B-Instruct",
+    model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
    messages=[{"role": "user", "content": "Write a short poem about AI."}],
    stream=True
 )
@@ -6,9 +6,11 @@ Calls done in OpenAI/openai.py as Llama API is openai-compatible.
 Docs: https://llama.developer.meta.com/docs/features/compatibility/
 """

-from typing import Optional
+import warnings
+
+# Suppress Pydantic serialization warnings for Meta Llama responses
+warnings.filterwarnings("ignore", message="Pydantic serializer warnings")

-from litellm import get_model_info, verbose_logger
 from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig


@@ -17,27 +19,11 @@ class LlamaAPIConfig(OpenAIGPTConfig):
        """
        Llama API has limited support for OpenAI parameters

-        Tool calling, Functional Calling, tool choice are not working right now
+        function_call, tools, and tool_choice are working
        response_format: only json_schema is working
        """
-        supports_function_calling: Optional[bool] = None
-        supports_tool_choice: Optional[bool] = None
-        try:
-            model_info = get_model_info(model, custom_llm_provider="meta_llama")
-            supports_function_calling = model_info.get(
-                "supports_function_calling", False
-            )
-            supports_tool_choice = model_info.get("supports_tool_choice", False)
-        except Exception as e:
-            verbose_logger.debug(f"Error getting supported openai params: {e}")
-            pass
-
+        # Function calling and tool choice are now supported on Llama API
        optional_params = super().get_supported_openai_params(model)
-        if not supports_function_calling:
-            optional_params.remove("function_call")
-        if not supports_tool_choice:
-            optional_params.remove("tools")
-            optional_params.remove("tool_choice")
        return optional_params

    def map_openai_params(
@@ -11,18 +11,6 @@ sys.path.insert(
 from litellm.llms.meta_llama.chat.transformation import LlamaAPIConfig


-def test_get_supported_openai_params():
-    """Test that LlamaAPIConfig correctly filters unsupported parameters"""
-    config = LlamaAPIConfig()
-
-    # Test error handling
-    with patch("litellm.get_model_info", side_effect=Exception("Test error")):
-        params = config.get_supported_openai_params("llama-3.3-8B-instruct")
-        assert "function_call" not in params
-        assert "tools" not in params
-        assert "tool_choice" not in params
-
-
 def test_map_openai_params():
    """Test that LlamaAPIConfig correctly maps OpenAI parameters"""
    config = LlamaAPIConfig()