[Feat] Enable Tool Calling for meta_llama (#11895)

* Enable Tool Calling for `meta_llama` (#11825)

* feat: enable tools and function_call features

* fix: ignore pydantic warnings for StreamingChoices from llama-api

* docs: add tool calling examples

* docs: change default models to Maverick

* docs: fix output of tool use

* test_map_openai_params

---------

Co-authored-by: Young Han <110819238+seyeong-han@users.noreply.github.com>
This commit is contained in:
Ishaan Jaff
2025-06-19 13:44:22 -07:00
committed by GitHub
parent e1764af890
commit 08b2b4f5f5
3 changed files with 108 additions and 36 deletions
+102 -4
View File
@@ -45,7 +45,7 @@ os.environ["LLAMA_API_KEY"] = "" # your Meta Llama API key
messages = [{"content": "Hello, how are you?", "role": "user"}]
# Meta Llama call
response = completion(model="meta_llama/Llama-3.3-70B-Instruct", messages=messages)
response = completion(model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8", messages=messages)
```
### Streaming
@@ -61,7 +61,7 @@ messages = [{"content": "Hello, how are you?", "role": "user"}]
# Meta Llama call with streaming
response = completion(
model="meta_llama/Llama-3.3-70B-Instruct",
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
messages=messages,
stream=True
)
@@ -70,6 +70,104 @@ for chunk in response:
print(chunk)
```
### Function Calling
```python showLineNumbers title="Meta Llama Function Calling"
import os
import litellm
from litellm import completion
os.environ["LLAMA_API_KEY"] = "" # your Meta Llama API key
messages = [{"content": "What's the weather like in San Francisco?", "role": "user"}]
# Define the function
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}
}
]
# Meta Llama call with function calling
response = completion(
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
messages=messages,
tools=tools,
tool_choice="auto"
)
print(response.choices[0].message.tool_calls)
```
### Tool Use
```python showLineNumbers title="Meta Llama Tool Use"
import os
import litellm
from litellm import completion
os.environ["LLAMA_API_KEY"] = "" # your Meta Llama API key
messages = [{"content": "Create a chart showing the population growth of New York City from 2010 to 2020", "role": "user"}]
# Define the tools
tools = [
{
"type": "function",
"function": {
"name": "create_chart",
"description": "Create a chart with the provided data",
"parameters": {
"type": "object",
"properties": {
"chart_type": {
"type": "string",
"enum": ["bar", "line", "pie", "scatter"],
"description": "The type of chart to create"
},
"title": {
"type": "string",
"description": "The title of the chart"
},
"data": {
"type": "object",
"description": "The data to plot in the chart"
}
},
"required": ["chart_type", "title", "data"]
}
}
}
]
# Meta Llama call with tool use
response = completion(
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
messages=messages,
tools=tools,
tool_choice="auto"
)
print(response.choices[0].message.content)
```
## Usage - LiteLLM Proxy
@@ -111,7 +209,7 @@ client = OpenAI(
# Non-streaming response
response = client.chat.completions.create(
model="meta_llama/Llama-3.3-70B-Instruct",
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
messages=[{"role": "user", "content": "Write a short poem about AI."}]
)
@@ -129,7 +227,7 @@ client = OpenAI(
# Streaming response
response = client.chat.completions.create(
model="meta_llama/Llama-3.3-70B-Instruct",
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
messages=[{"role": "user", "content": "Write a short poem about AI."}],
stream=True
)
+6 -20
View File
@@ -6,9 +6,11 @@ Calls done in OpenAI/openai.py as Llama API is openai-compatible.
Docs: https://llama.developer.meta.com/docs/features/compatibility/
"""
from typing import Optional
import warnings
# Suppress Pydantic serialization warnings for Meta Llama responses
warnings.filterwarnings("ignore", message="Pydantic serializer warnings")
from litellm import get_model_info, verbose_logger
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
@@ -17,27 +19,11 @@ class LlamaAPIConfig(OpenAIGPTConfig):
"""
Llama API has limited support for OpenAI parameters
Tool calling, Functional Calling, tool choice are not working right now
function_call, tools, and tool_choice are working
response_format: only json_schema is working
"""
supports_function_calling: Optional[bool] = None
supports_tool_choice: Optional[bool] = None
try:
model_info = get_model_info(model, custom_llm_provider="meta_llama")
supports_function_calling = model_info.get(
"supports_function_calling", False
)
supports_tool_choice = model_info.get("supports_tool_choice", False)
except Exception as e:
verbose_logger.debug(f"Error getting supported openai params: {e}")
pass
# Function calling and tool choice are now supported on Llama API
optional_params = super().get_supported_openai_params(model)
if not supports_function_calling:
optional_params.remove("function_call")
if not supports_tool_choice:
optional_params.remove("tools")
optional_params.remove("tool_choice")
return optional_params
def map_openai_params(
@@ -11,18 +11,6 @@ sys.path.insert(
from litellm.llms.meta_llama.chat.transformation import LlamaAPIConfig
def test_get_supported_openai_params():
"""Test that LlamaAPIConfig correctly filters unsupported parameters"""
config = LlamaAPIConfig()
# Test error handling
with patch("litellm.get_model_info", side_effect=Exception("Test error")):
params = config.get_supported_openai_params("llama-3.3-8B-instruct")
assert "function_call" not in params
assert "tools" not in params
assert "tool_choice" not in params
def test_map_openai_params():
"""Test that LlamaAPIConfig correctly maps OpenAI parameters"""
config = LlamaAPIConfig()