mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-26 17:05:56 +00:00
[Feat] Enable Tool Calling for meta_llama (#11895)
* Enable Tool Calling for `meta_llama` (#11825) * feat: enable tools and function_call features * fix: ignore pydantic warnings for StreamingChoices from llama-api * docs: add tool calling examples * docs: change default models to Maverick * docs: fix output of tool use * test_map_openai_params --------- Co-authored-by: Young Han <110819238+seyeong-han@users.noreply.github.com>
This commit is contained in:
@@ -45,7 +45,7 @@ os.environ["LLAMA_API_KEY"] = "" # your Meta Llama API key
|
||||
messages = [{"content": "Hello, how are you?", "role": "user"}]
|
||||
|
||||
# Meta Llama call
|
||||
response = completion(model="meta_llama/Llama-3.3-70B-Instruct", messages=messages)
|
||||
response = completion(model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8", messages=messages)
|
||||
```
|
||||
|
||||
### Streaming
|
||||
@@ -61,7 +61,7 @@ messages = [{"content": "Hello, how are you?", "role": "user"}]
|
||||
|
||||
# Meta Llama call with streaming
|
||||
response = completion(
|
||||
model="meta_llama/Llama-3.3-70B-Instruct",
|
||||
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
||||
messages=messages,
|
||||
stream=True
|
||||
)
|
||||
@@ -70,6 +70,104 @@ for chunk in response:
|
||||
print(chunk)
|
||||
```
|
||||
|
||||
### Function Calling
|
||||
|
||||
```python showLineNumbers title="Meta Llama Function Calling"
|
||||
import os
|
||||
import litellm
|
||||
from litellm import completion
|
||||
|
||||
os.environ["LLAMA_API_KEY"] = "" # your Meta Llama API key
|
||||
|
||||
messages = [{"content": "What's the weather like in San Francisco?", "role": "user"}]
|
||||
|
||||
# Define the function
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA"
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"]
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Meta Llama call with function calling
|
||||
response = completion(
|
||||
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto"
|
||||
)
|
||||
|
||||
print(response.choices[0].message.tool_calls)
|
||||
```
|
||||
|
||||
### Tool Use
|
||||
|
||||
```python showLineNumbers title="Meta Llama Tool Use"
|
||||
import os
|
||||
import litellm
|
||||
from litellm import completion
|
||||
|
||||
os.environ["LLAMA_API_KEY"] = "" # your Meta Llama API key
|
||||
|
||||
messages = [{"content": "Create a chart showing the population growth of New York City from 2010 to 2020", "role": "user"}]
|
||||
|
||||
# Define the tools
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "create_chart",
|
||||
"description": "Create a chart with the provided data",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"chart_type": {
|
||||
"type": "string",
|
||||
"enum": ["bar", "line", "pie", "scatter"],
|
||||
"description": "The type of chart to create"
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "The title of the chart"
|
||||
},
|
||||
"data": {
|
||||
"type": "object",
|
||||
"description": "The data to plot in the chart"
|
||||
}
|
||||
},
|
||||
"required": ["chart_type", "title", "data"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Meta Llama call with tool use
|
||||
response = completion(
|
||||
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto"
|
||||
)
|
||||
|
||||
print(response.choices[0].message.content)
|
||||
```
|
||||
|
||||
## Usage - LiteLLM Proxy
|
||||
|
||||
@@ -111,7 +209,7 @@ client = OpenAI(
|
||||
|
||||
# Non-streaming response
|
||||
response = client.chat.completions.create(
|
||||
model="meta_llama/Llama-3.3-70B-Instruct",
|
||||
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
||||
messages=[{"role": "user", "content": "Write a short poem about AI."}]
|
||||
)
|
||||
|
||||
@@ -129,7 +227,7 @@ client = OpenAI(
|
||||
|
||||
# Streaming response
|
||||
response = client.chat.completions.create(
|
||||
model="meta_llama/Llama-3.3-70B-Instruct",
|
||||
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
||||
messages=[{"role": "user", "content": "Write a short poem about AI."}],
|
||||
stream=True
|
||||
)
|
||||
|
||||
@@ -6,9 +6,11 @@ Calls done in OpenAI/openai.py as Llama API is openai-compatible.
|
||||
Docs: https://llama.developer.meta.com/docs/features/compatibility/
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
import warnings
|
||||
|
||||
# Suppress Pydantic serialization warnings for Meta Llama responses
|
||||
warnings.filterwarnings("ignore", message="Pydantic serializer warnings")
|
||||
|
||||
from litellm import get_model_info, verbose_logger
|
||||
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
|
||||
|
||||
@@ -17,27 +19,11 @@ class LlamaAPIConfig(OpenAIGPTConfig):
|
||||
"""
|
||||
Llama API has limited support for OpenAI parameters
|
||||
|
||||
Tool calling, Functional Calling, tool choice are not working right now
|
||||
function_call, tools, and tool_choice are working
|
||||
response_format: only json_schema is working
|
||||
"""
|
||||
supports_function_calling: Optional[bool] = None
|
||||
supports_tool_choice: Optional[bool] = None
|
||||
try:
|
||||
model_info = get_model_info(model, custom_llm_provider="meta_llama")
|
||||
supports_function_calling = model_info.get(
|
||||
"supports_function_calling", False
|
||||
)
|
||||
supports_tool_choice = model_info.get("supports_tool_choice", False)
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Error getting supported openai params: {e}")
|
||||
pass
|
||||
|
||||
# Function calling and tool choice are now supported on Llama API
|
||||
optional_params = super().get_supported_openai_params(model)
|
||||
if not supports_function_calling:
|
||||
optional_params.remove("function_call")
|
||||
if not supports_tool_choice:
|
||||
optional_params.remove("tools")
|
||||
optional_params.remove("tool_choice")
|
||||
return optional_params
|
||||
|
||||
def map_openai_params(
|
||||
|
||||
@@ -11,18 +11,6 @@ sys.path.insert(
|
||||
from litellm.llms.meta_llama.chat.transformation import LlamaAPIConfig
|
||||
|
||||
|
||||
def test_get_supported_openai_params():
|
||||
"""Test that LlamaAPIConfig correctly filters unsupported parameters"""
|
||||
config = LlamaAPIConfig()
|
||||
|
||||
# Test error handling
|
||||
with patch("litellm.get_model_info", side_effect=Exception("Test error")):
|
||||
params = config.get_supported_openai_params("llama-3.3-8B-instruct")
|
||||
assert "function_call" not in params
|
||||
assert "tools" not in params
|
||||
assert "tool_choice" not in params
|
||||
|
||||
|
||||
def test_map_openai_params():
|
||||
"""Test that LlamaAPIConfig correctly maps OpenAI parameters"""
|
||||
config = LlamaAPIConfig()
|
||||
|
||||
Reference in New Issue
Block a user