feat: Add OVHCloud AI Endpoints as a provider

This commit is contained in:
Elias TOURNEUX
2025-09-12 13:20:13 +02:00
parent f8036a25a2
commit ef9d1ddc40
14 changed files with 1240 additions and 1 deletions
+380
View File
@@ -0,0 +1,380 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# OVHCloud AI Endpoints
Leading French Cloud provider in Europe with data sovereignty and privacy.
You can explore the last models we made available in our [catalog](https://endpoints.ai.cloud.ovh.net/catalog).
:::tip
We support ALL OVHCloud AI Endpoints models, just set `model=ovhcloud/<any-model-on-ai-endpoints>` as a prefix when sending litellm requests.
For the complete models catalog, visit https://endpoints.ai.cloud.ovh.net/catalog. **
:::
## Sample usage
### Chat completion
You can define your API key by setting the `OVHCLOUD_API_KEY` environment variable or by overriding the `api_key` parameter. You can generate a key on the [OVHCloud Manager](https://www.ovh.com/manager).
```python
from litellm import completion
import os
# Our API is free but ratelimited for calls without an API key.
os.environ['OVHCLOUD_API_KEY'] = "your-api-key"
response = completion(
model = "ovhcloud/Meta-Llama-3_3-70B-Instruct",
messages = [
{
"role": "user",
"content": "Hello, how are you?",
}
],
max_tokens = 10,
stop = [],
temperature = 0.2,
top_p = 0.9,
user = "user",
api_key = "your-api-key" # Optional if set through the enviromnent variable.
)
print(response)
```
### Streaming
Set the parameter `stream` to `True` to stream a response.
```python
from litellm import completion
import os
os.environ['OVHCLOUD_API_KEY'] = "your-api-key"
response = completion(
model = "ovhcloud/Meta-Llama-3_3-70B-Instruct",
messages = [
{
"role": "user",
"content": "Hello, how are you?",
}
],
max_tokens = 10,
stop = [],
temperature = 0.2,
top_p = 0.9,
user = "user",
api_key = "your-api-key" # Optional if set through the enviromnent variable,
stream = True
)
for part in response:
print(response)
```
### Tool Calling
```python
from litellm import completion
import json
def get_current_weather(location, unit="celsius"):
if unit == "celsius":
return {"location": location, "temperature": "22", "unit": "celsius"}
else:
return {"location": location, "temperature": "72", "unit": "fahrenheit"}
def print_message(role, content, is_tool_call=False, function_name=None):
if role == "user":
print(f"🧑 User: {content}")
elif role == "assistant":
if is_tool_call:
print(f"🤖 Assistant: I will call the function '{function_name}' to get some informations.")
else:
print(f"🤖 Assistant: {content}")
elif role == "tool":
print(f"🔧 Tool ({function_name}): {content}")
print()
messages = [{"role": "user", "content": "What's the weather like in Paris?"}]
model = "ovhcloud/Meta-Llama-3_3-70B-Instruct"
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and country, e.g. Montréal, Canada",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
print("🌟 Beginning of the conversation")
# Initial user message
print_message("user", messages[0]["content"])
# First request to the model
print("📡 Sending first request to the model...")
response = completion(
model=model,
messages=messages,
tools=tools,
tool_choice="auto",
)
response_message = response.choices[0].message
tool_calls = response_message.tool_calls
if tool_calls:
available_functions = {
"get_current_weather": get_current_weather,
}
# Display the tool calls suggested by the model
for tool_call in tool_calls:
print_message("assistant", "", is_tool_call=True, function_name=tool_call.function.name)
print(f" 📋 Arguments: {tool_call.function.arguments}")
print()
# Add assistant message with tool calls to the conversation history
assistant_message = {
"role": "assistant",
"content": response_message.content,
"tool_calls": [
{
"id": tool_call.id,
"type": "function",
"function": {
"name": tool_call.function.name,
"arguments": tool_call.function.arguments
}
} for tool_call in tool_calls
]
}
messages.append(assistant_message)
# Execute each tool call and add the results to the conversation history
for tool_call in tool_calls:
function_name = tool_call.function.name
function_to_call = available_functions[function_name]
function_args = json.loads(tool_call.function.arguments)
print(f"🔧 Executing function '{function_name}'...")
function_response = function_to_call(
location=function_args.get("location"),
unit=function_args.get("unit"),
)
# Display tool response
print_message("tool", json.dumps(function_response, indent=2), function_name=function_name)
messages.append({
"tool_call_id": tool_call.id,
"role": "tool",
"name": function_name,
"content": json.dumps(function_response),
})
print("📡 Sending second request to the model with results...")
# Second request with function results
second_response = completion(
model=model,
messages=messages
)
# Display final response
final_content = second_response.choices[0].message.content
print_message("assistant", final_content)
else:
print("❌ No function call detected")
print_message("assistant", response_message.content)
```
### Vision Example
```python
from base64 import b64encode
from mimetypes import guess_type
import litellm
# Auxiliary function to get b64 images
def data_url_from_image(file_path):
mime_type, _ = guess_type(file_path)
if mime_type is None:
raise ValueError("Could not determine MIME type of the file")
with open(file_path, "rb") as image_file:
encoded_string = b64encode(image_file.read()).decode("utf-8")
data_url = f"data:{mime_type};base64,{encoded_string}"
return data_url
response = litellm.completion(
model = "ovhcloud/Mistral-Small-3.2-24B-Instruct-2506",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "What's in this image?"
},
{
"type": "image_url",
"image_url": {
"url": data_url_from_image("your_image.jpg"),
"format": "image/jpeg"
}
}
]
}
],
stream=False
)
print(response.choices[0].message.content)
```
### Structured Output
```python
from litellm import completion
response = completion(
model="ovhcloud/Meta-Llama-3_3-70B-Instruct",
messages=[
{
"role": "system",
"content": (
"You are a specialist in extracting structured data from unstructured text. "
"Your task is to identify relevant entities and categories, then format them "
"according to the requested structure."
),
},
{
"role": "user",
"content": "Room 12 contains books, a desk, and a lamp."
},
],
response_format={
"type": "json_schema",
"json_schema": {
"title": "data",
"name": "data_extraction",
"schema": {
"type": "object",
"properties": {
"section": {"type": "string"},
"products": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["section", "products"],
"additionalProperties": False
},
"strict": False
}
},
stream=False
)
print(response.choices[0].message.content)
```
### Embeddings
```python
from litellm import embedding
response = embedding(
model="ovhcloud/BGE-M3",
input=["sample text to embed", "another sample text to embed"]
)
print(response.data)
```
## Usage with LiteLLM Proxy Server
Here's how to call a OVHCloud AI Endpoints model with the LiteLLM Proxy Server
1. Modify the config.yaml
```yaml
model_list:
- model_name: my-model
litellm_params:
model: ovhcloud/<your-model-name> # add ovhcloud/ prefix to route as OVHCloud provider
api_key: api-key # api key to send your model
```
2. Start the proxy
```bash
$ litellm --config /path/to/config.yaml
```
3. Send Request to LiteLLM Proxy Server
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
```python
import openai
client = openai.OpenAI(
api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys
base_url="http://0.0.0.0:4000" # litellm-proxy-base url
)
response = client.chat.completions.create(
model="my-model",
messages = [
{
"role": "user",
"content": "what llm are you"
}
],
)
print(response)
```
</TabItem>
<TabItem value="curl" label="curl">
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"model": "my-model",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
}'
```
</TabItem>
</Tabs>
+2 -1
View File
@@ -485,7 +485,8 @@ const sidebars = {
"providers/bytez",
"providers/heroku",
"providers/oci",
"providers/datarobot",
"providers/datarobot",
"providers/ovhcloud",
],
},
{
+12
View File
@@ -241,6 +241,7 @@ gradient_ai_api_key: Optional[str] = None
nebius_key: Optional[str] = None
heroku_key: Optional[str] = None
cometapi_key: Optional[str] = None
ovhcloud_key: Optional[str] = None
common_cloud_provider_auth_params: dict = {
"params": ["project", "region_name", "token"],
"providers": ["vertex_ai", "bedrock", "watsonx", "azure", "vertex_ai_beta"],
@@ -520,6 +521,8 @@ cometapi_models: Set = set()
oci_models: Set = set()
vercel_ai_gateway_models: Set = set()
volcengine_models: Set = set()
ovhcloud_models: Set = set()
ovhcloud_embedding_models: Set = set()
def is_bedrock_pricing_only_model(key: str) -> bool:
@@ -734,6 +737,10 @@ def add_known_models():
oci_models.add(key)
elif value.get("litellm_provider") == "volcengine":
volcengine_models.add(key)
elif value.get("litellm_provider") == "ovhcloud":
ovhcloud_models.add(key)
elif value.get("litellm_provider") == "ovhcloud-embedding-models":
ovhcloud_embedding_models.add(key)
add_known_models()
@@ -828,6 +835,7 @@ model_list = list(
| heroku_models
| vercel_ai_gateway_models
| volcengine_models
| ovhcloud_models
)
model_list_set = set(model_list)
@@ -909,6 +917,7 @@ models_by_provider: dict = {
"cometapi": cometapi_models,
"oci": oci_models,
"volcengine": volcengine_models,
"ovhcloud": ovhcloud_models | ovhcloud_embedding_models,
}
# mapping for those models which have larger equivalents
@@ -943,6 +952,7 @@ all_embedding_models = (
| fireworks_ai_embedding_models
| nebius_embedding_models
| sambanova_embedding_models
| ovhcloud_embedding_models
)
####### IMAGE GENERATION MODELS ###################
@@ -1254,6 +1264,8 @@ from .llms.morph.chat.transformation import MorphChatConfig
from .llms.lambda_ai.chat.transformation import LambdaAIChatConfig
from .llms.hyperbolic.chat.transformation import HyperbolicChatConfig
from .llms.vercel_ai_gateway.chat.transformation import VercelAIGatewayConfig
from .llms.ovhcloud.chat.transformation import OVHCloudChatConfig
from .llms.ovhcloud.embedding.transformation import OVHCloudEmbeddingConfig
from .main import * # type: ignore
from .integrations import *
from .llms.custom_httpx.async_client_cleanup import close_litellm_async_clients
+2
View File
@@ -311,6 +311,7 @@ LITELLM_CHAT_PROVIDERS = [
"morph",
"lambda_ai",
"vercel_ai_gateway",
"ovhcloud",
]
LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS = [
@@ -1019,6 +1020,7 @@ SENTRY_DENYLIST = [
"FIREWORKS_API_KEY",
"FIREWORKS_AI_API_KEY",
"FIREWORKSAI_API_KEY",
"OVHCLOUD_API_KEY",
# Database and Connection Strings
"database_url",
"redis_url",
@@ -372,6 +372,8 @@ def get_llm_provider( # noqa: PLR0915
custom_llm_provider = "cometapi"
elif model.startswith("oci/"):
custom_llm_provider = "oci"
elif model.startswith("ovhcloud/"):
custom_llm_provider = "ovhcloud"
if not custom_llm_provider:
if litellm.suppress_debug_info is False:
print() # noqa
@@ -0,0 +1,141 @@
"""
Support for OVHCloud AI Endpoints `/v1/chat/completions` endpoint.
Our unified API follows the OpenAI standard.
More information on our website: https://endpoints.ai.cloud.ovh.net
"""
from typing import Optional, Union, List
import httpx
from litellm import ModelResponseStream, OpenAIGPTConfig, get_model_info, verbose_logger
from litellm.llms.ovhcloud.utils import OVHCloudException
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.types.llms.openai import AllMessageValues
class OVHCloudChatConfig(OpenAIGPTConfig):
@property
def custom_llm_provider(self) -> Optional[str]:
return "ovhcloud"
def get_supported_openai_params(self, model: str) -> list:
"""
Details about function calling support can be found here:
https://help.ovhcloud.com/csm/en-gb-public-cloud-ai-endpoints-function-calling?id=kb_article_view&sysparm_article=KB0071907
"""
supports_function_calling: Optional[bool] = None
try:
model_info = get_model_info(model, custom_llm_provider="ovhcloud")
supports_function_calling = model_info.get(
"supports_function_calling", False
)
except Exception as e:
verbose_logger.debug(f"Error getting supported OpenAI params: {e}")
pass
optional_params = super().get_supported_openai_params(model)
if supports_function_calling is not True:
verbose_logger.debug(
"You can see our models supporting function_calling in our catalog: https://endpoints.ai.cloud.ovh.net/catalog "
)
optional_params.remove("tools")
optional_params.remove("tool_choice")
optional_params.remove("function_call")
optional_params.remove("response_format")
return optional_params
def get_complete_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict,
stream: Optional[bool] = None,
) -> str:
api_base = "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1" if api_base is None else api_base.rstrip("/")
complete_url = f"{api_base}/chat/completions"
return complete_url
def get_error_class(
self,
error_message: str,
status_code: int,
headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
return OVHCloudException(
message=error_message,
status_code=status_code,
headers=headers,
)
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
mapped_openai_params = super().map_openai_params(
non_default_params, optional_params, model, drop_params
)
return mapped_openai_params
def transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
extra_body = optional_params.pop("extra_body", {})
response = super().transform_request(
model, messages, optional_params, litellm_params, headers
)
response.update(extra_body)
return response
class OVHCloudChatCompletionStreamingHandler(BaseModelResponseIterator):
"""
Handler for OVHCloud AI Endpoints streaming chat completion responses
"""
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
"""
Parse individual chunks from streaming response
"""
try:
if "error" in chunk:
error_chunk = chunk["error"]
error_message = "OVHCloud Error: {}".format(
error_chunk.get("message", "Unknown error")
)
raise OVHCloudException(
message=error_message,
status_code=error_chunk.get("code", 400),
headers={"Content-Type": "application/json"},
)
new_choices = []
for choice in chunk["choices"]:
if "delta" in choice and "reasoning" in choice["delta"]:
choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")
new_choices.append(choice)
return ModelResponseStream(
id=chunk["id"],
object="chat.completion.chunk",
created=chunk["created"],
usage=chunk.get("usage"),
model=chunk["model"],
choices=new_choices,
)
except KeyError as e:
raise OVHCloudException(
message=f"KeyError: {e}, Got unexpected response from CometAPI: {chunk}",
status_code=400,
headers={"Content-Type": "application/json"},
)
except Exception as e:
raise e
@@ -0,0 +1,122 @@
"""
This is OpenAI compatible - no transformation is applied
"""
from typing import List, Optional, Union
import httpx
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllEmbeddingInputValues, AllMessageValues
from litellm.types.utils import EmbeddingResponse, Usage
from ..utils import OVHCloudException
class OVHCloudEmbeddingConfig(BaseEmbeddingConfig):
def __init__(self) -> None:
pass
def get_complete_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict,
stream: Optional[bool] = None,
) -> str:
api_base = "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1" if api_base is None else api_base.rstrip("/")
complete_url = f"{api_base}/embeddings"
return complete_url
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
if api_key is None:
api_key = get_secret_str("OVHCLOUD_API_KEY")
default_headers = {
"Authorization": f"Bearer {api_key}",
"accept": "application/json",
"Content-Type": "application/json",
}
if "Authorization" in headers:
default_headers["Authorization"] = headers["Authorization"]
return {**default_headers, **headers}
def get_supported_openai_params(self, model: str):
return []
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
):
supported_openai_params = self.get_supported_openai_params(model)
for param, value in non_default_params.items():
if param in supported_openai_params:
optional_params[param] = value
return optional_params
def transform_embedding_request(
self,
model: str,
input: AllEmbeddingInputValues,
optional_params: dict,
headers: dict,
) -> dict:
return {"input": input, "model": model, **optional_params}
def transform_embedding_response(
self,
model: str,
raw_response: httpx.Response,
model_response: EmbeddingResponse,
logging_obj: LiteLLMLoggingObj,
api_key: Optional[str],
request_data: dict,
optional_params: dict,
litellm_params: dict,
) -> EmbeddingResponse:
try:
raw_response_json = raw_response.json()
except Exception:
raise OVHCloudException(
message=raw_response.text,
status_code=raw_response.status_code,
headers=raw_response.headers,
)
model_response.model = raw_response_json.get("model")
model_response.data = raw_response_json.get("data")
model_response.object = raw_response_json.get("object")
usage = Usage(
prompt_tokens=raw_response_json.get("usage", {}).get("prompt_tokens", 0),
total_tokens=raw_response_json.get("usage", {}).get("total_tokens", 0),
)
model_response.usage = usage
return model_response
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
return OVHCloudException(
message=error_message, status_code=status_code, headers=headers
)
+6
View File
@@ -0,0 +1,6 @@
from litellm.llms.base_llm.chat.transformation import BaseLLMException
class OVHCloudException(BaseLLMException):
"""OVHCloud AI Endpoints exception handling class"""
pass
+60
View File
@@ -164,6 +164,7 @@ from .llms.openai.openai import OpenAIChatCompletion
from .llms.openai.transcriptions.handler import OpenAIAudioTranscription
from .llms.openai_like.chat.handler import OpenAILikeChatHandler
from .llms.openai_like.embedding.handler import OpenAILikeEmbeddingHandler
from .llms.ovhcloud.chat.transformation import OVHCloudChatConfig
from .llms.petals.completion import handler as petals_handler
from .llms.predibase.chat.handler import PredibaseChatCompletion
from .llms.replicate.chat.handler import completion as replicate_chat_completion
@@ -259,6 +260,7 @@ sagemaker_chat_completion = SagemakerChatHandler()
bytez_transformation = BytezChatConfig()
heroku_transformation = HerokuChatConfig()
oci_transformation = OCIChatConfig()
ovhcloud_transformation = OVHCloudChatConfig()
####### COMPLETION ENDPOINTS ################
@@ -3498,6 +3500,42 @@ def completion( # type: ignore # noqa: PLR0915
pass
elif custom_llm_provider == "ovhcloud" or model in litellm.ovhcloud_models:
api_key = (
api_key
or litellm.ovhcloud_key
or get_secret_str("OVHCLOUD_API_KEY")
or litellm.api_key
)
api_base = (
api_base
or litellm.api_base
or get_secret_str("OVHCLOUD_API_BASE")
or "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1"
)
response = base_llm_http_handler.completion(
model=model,
messages=messages,
headers=headers,
model_response=model_response,
api_key=api_key,
api_base=api_base,
acompletion=acompletion,
logging_obj=logging,
optional_params=optional_params,
litellm_params=litellm_params,
timeout=timeout, # type: ignore
client=client,
custom_llm_provider=custom_llm_provider,
encoding=encoding,
stream=stream,
provider_config=ovhcloud_transformation,
)
pass
elif custom_llm_provider == "custom":
url = litellm.api_base or api_base or ""
if url is None or url == "":
@@ -4564,6 +4602,28 @@ def embedding( # noqa: PLR0915
aembedding=aembedding,
headers=headers,
)
elif custom_llm_provider == "ovhcloud":
api_key = api_key or litellm.api_key or get_secret_str("OVHCLOUD_API_KEY")
api_base = (
api_base
or litellm.api_base
or get_secret_str("OVHCLOUD_API_BASE")
or "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1"
)
response = base_llm_http_handler.embedding(
model=model,
input=input,
custom_llm_provider=custom_llm_provider,
api_base=api_base,
api_key=api_key,
logging_obj=logging,
timeout=timeout,
model_response=EmbeddingResponse(),
optional_params=optional_params,
client=client,
aembedding=aembedding,
litellm_params={},
)
elif custom_llm_provider in litellm._custom_providers:
custom_handler: Optional[CustomLLM] = None
for item in litellm.custom_provider_map:
+1
View File
@@ -2357,6 +2357,7 @@ class LlmProviders(str, Enum):
AUTO_ROUTER = "auto_router"
VERCEL_AI_GATEWAY = "vercel_ai_gateway"
DOTPROMPT = "dotprompt"
OVHCLOUD = "ovhcloud"
# Create a set of all provider values for quick lookup
+17
View File
@@ -2899,6 +2899,19 @@ def get_optional_params_embeddings( # noqa: PLR0915
model=model,
drop_params=drop_params if drop_params is not None else False,
)
elif custom_llm_provider == "ovhcloud":
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider="ovhcloud",
request_type="embeddings",
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.OVHCloudEmbeddingConfig().map_openai_params(
non_default_params=non_default_params,
optional_params={},
model=model,
drop_params=drop_params if drop_params is not None else False,
)
elif (
custom_llm_provider != "openai"
@@ -7095,6 +7108,8 @@ class ProviderConfigManager:
return litellm.OCIChatConfig()
elif litellm.LlmProviders.HYPERBOLIC == provider:
return litellm.HyperbolicChatConfig()
elif litellm.LlmProviders.OVHCLOUD == provider:
return litellm.OVHCloudChatConfig()
return None
@staticmethod
@@ -7138,6 +7153,8 @@ class ProviderConfigManager:
)
return VolcEngineEmbeddingConfig()
elif litellm.LlmProviders.OVHCLOUD == provider:
return litellm.OVHCloudEmbeddingConfig()
return None
@staticmethod
+202
View File
@@ -20777,5 +20777,207 @@
"metadata": {
"notes": "Volcengine Doubao embedding model - text-240715 version with 2560 dimensions"
}
},
"ovhcloud/Qwen2.5-VL-72B-Instruct": {
"max_tokens": 32000,
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"input_cost_per_token": 9.1e-07,
"output_cost_per_token": 9.1e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false,
"supports_vision": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-vl-72b-instruct"
},
"ovhcloud/llava-v1.6-mistral-7b-hf": {
"max_tokens": 32000,
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"input_cost_per_token": 2.9e-07,
"output_cost_per_token": 2.9e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false,
"supports_vision": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/llava-next-mistral-7b"
},
"ovhcloud/gpt-oss-120b": {
"max_tokens": 131000,
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"input_cost_per_token": 8e-08,
"output_cost_per_token": 4e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false,
"supports_reasoning": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-120b"
},
"ovhcloud/Meta-Llama-3_3-70B-Instruct": {
"max_tokens": 131000,
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"input_cost_per_token": 6.7e-07,
"output_cost_per_token": 6.7e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-3-70b-instruct"
},
"ovhcloud/Qwen2.5-Coder-32B-Instruct": {
"max_tokens": 32000,
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"input_cost_per_token": 8.7e-07,
"output_cost_per_token": 8.7e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false,
"source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-coder-32b-instruct"
},
"ovhcloud/Mixtral-8x7B-Instruct-v0.1": {
"max_tokens": 32000,
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"input_cost_per_token": 6.3e-07,
"output_cost_per_token": 6.3e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false,
"source": "https://endpoints.ai.cloud.ovh.net/models/mixtral-8x7b-instruct-v0-1"
},
"ovhcloud/Meta-Llama-3_1-70B-Instruct": {
"max_tokens": 131000,
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"input_cost_per_token": 6.7e-07,
"output_cost_per_token": 6.7e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": false,
"supports_response_schema": false,
"supports_tool_choice": false,
"source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-1-70b-instruct"
},
"ovhcloud/Mistral-Small-3.2-24B-Instruct-2506": {
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"input_cost_per_token": 9e-08,
"output_cost_per_token": 2.8e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_vision": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/mistral-small-3-2-24b-instruct-2506"
},
"ovhcloud/DeepSeek-R1-Distill-Llama-70B": {
"max_tokens": 131000,
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"input_cost_per_token": 6.7e-07,
"output_cost_per_token": 6.7e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_reasoning": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/deepseek-r1-distill-llama-70b"
},
"ovhcloud/Llama-3.1-8B-Instruct": {
"max_tokens": 131000,
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"input_cost_per_token": 1e-07,
"output_cost_per_token": 1e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/llama-3-1-8b-instruct"
},
"ovhcloud/Mistral-7B-Instruct-v0.3": {
"max_tokens": 127000,
"max_input_tokens": 127000,
"max_output_tokens": 127000,
"input_cost_per_token": 1e-07,
"output_cost_per_token": 1e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/mistral-7b-instruct-v0-3"
},
"ovhcloud/gpt-oss-20b": {
"max_tokens": 131000,
"max_input_tokens": 131000,
"max_output_tokens": 131000,
"input_cost_per_token": 4e-08,
"output_cost_per_token": 1.5e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false,
"supports_reasoning": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-20b"
},
"ovhcloud/Mistral-Nemo-Instruct-2407": {
"max_tokens": 118000,
"max_input_tokens": 118000,
"max_output_tokens": 118000,
"input_cost_per_token": 1.3e-07,
"output_cost_per_token": 1.3e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/mistral-nemo-instruct-2407"
},
"ovhcloud/Qwen3-32B": {
"max_tokens": 32000,
"max_input_tokens": 32000,
"max_output_tokens": 32000,
"input_cost_per_token": 8e-08,
"output_cost_per_token": 2.3e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supports_reasoning": true,
"source": "https://endpoints.ai.cloud.ovh.net/models/qwen3-32b"
},
"ovhcloud/mamba-codestral-7B-v0.1": {
"max_tokens": 256000,
"max_input_tokens": 256000,
"max_output_tokens": 256000,
"input_cost_per_token": 1.9e-07,
"output_cost_per_token": 1.9e-07,
"litellm_provider": "ovhcloud",
"mode": "chat",
"supports_function_calling": false,
"supports_response_schema": true,
"supports_tool_choice": false,
"source": "https://endpoints.ai.cloud.ovh.net/models/mamba-codestral-7b-v0-1"
}
}
@@ -0,0 +1,256 @@
"""
Unit tests for OVHCloud AI Endpoints chat integration.
"""
import os
import sys
import pytest
from litellm.llms.ovhcloud.utils import OVHCloudException
sys.path.insert(
0, os.path.abspath("../../../../..")
) # Adds the parent directory to the system path
from litellm.llms.ovhcloud.chat.transformation import (
OVHCloudChatCompletionStreamingHandler,
OVHCloudChatConfig,
)
config = OVHCloudChatConfig()
model = "ovhcloud/Mistral-7B-Instruct-v0.3"
class TestOvhCloudChatCompletionStreamingHandler:
def test_chunk_parser_successful(self):
handler = OVHCloudChatCompletionStreamingHandler(
streaming_response=None, sync_stream=True
)
chunk = {
"id": "test_id",
"created": 1234567890,
"model": "gpt-oss-20b",
"usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
"choices": [
{"delta": {"content": "test content", "reasoning": "test reasoning"}}
],
}
result = handler.chunk_parser(chunk)
assert result.id == "test_id"
assert result.object == "chat.completion.chunk"
assert result.created == 1234567890
assert result.model == "gpt-oss-20b"
assert result.usage.prompt_tokens == chunk["usage"]["prompt_tokens"]
assert result.usage.completion_tokens == chunk["usage"]["completion_tokens"]
assert result.usage.total_tokens == chunk["usage"]["total_tokens"]
assert len(result.choices) == 1
assert result.choices[0]["delta"]["reasoning_content"] == "test reasoning"
def test_chunk_parser_error_response(self):
handler = OVHCloudChatCompletionStreamingHandler(
streaming_response=None, sync_stream=True
)
error_chunk = {
"error": {
"message": "test error",
"code": 400,
}
}
with pytest.raises(OVHCloudException) as exc_info:
handler.chunk_parser(error_chunk)
assert "OVHCloud Error: test error" in str(exc_info.value)
assert exc_info.value.status_code == 400
def test_chunk_parser_key_error(self):
handler = OVHCloudChatCompletionStreamingHandler(
streaming_response=None, sync_stream=True
)
invalid_chunk = {"incomplete": "data"}
with pytest.raises(OVHCloudException) as exc_info:
handler.chunk_parser(invalid_chunk)
assert "KeyError" in str(exc_info.value)
assert exc_info.value.status_code == 400
class TestOVHCloudConfig:
def test_transform_request_basic(self):
"""Test basic request transformation"""
transformed_request = config.transform_request(
model,
messages=[
{"role": "user", "content": "Hello, world!"}
],
optional_params={},
litellm_params={},
headers={},
)
assert transformed_request["model"] == model
assert transformed_request["messages"] == [
{"role": "user", "content": "Hello, world!"}
]
def test_transform_request_with_extra_body(self):
"""Test request transformation with extra_body parameters"""
transformed_request = config.transform_request(
model,
messages=[{"role": "user", "content": "Hello, world!"}],
optional_params={"extra_body": {"custom_param": "custom_value"}},
litellm_params={},
headers={},
)
assert transformed_request["custom_param"] == "custom_value"
assert transformed_request["messages"] == [
{"role": "user", "content": "Hello, world!"}
]
def test_map_openai_params(self):
"""Test OpenAI parameter mapping"""
non_default_params = {
"temperature": 0.7,
"max_tokens": 100,
"top_p": 0.9,
}
mapped_params = config.map_openai_params(
non_default_params=non_default_params,
optional_params={},
model=model,
drop_params=False,
)
assert mapped_params["temperature"] == 0.7
assert mapped_params["max_tokens"] == 100
assert mapped_params["top_p"] == 0.9
def test_get_error_class(self):
"""Test error class creation"""
error = config.get_error_class(
error_message="Test error",
status_code=400,
headers={"Content-Type": "application/json"}
)
assert isinstance(error, OVHCloudException)
assert error.message == "Test error"
assert error.status_code == 400
def test_ovhcloud_integration():
import os
from litellm import completion
api_key = os.getenv("OVHCLOUD_API_KEY")
if not api_key:
pytest.skip("OVHCLOUD_API_KEY not set, skipping test")
response = completion(
model,
messages=[{"role": "user", "content": "Say hello in one word"}],
api_key=api_key,
max_tokens=10,
temperature=0.7
)
assert response.choices[0].message.content
assert len(response.choices[0].message.content.strip()) > 0
assert response.model
assert response.usage
assert response.usage.total_tokens > 0
def test_OVHCloud_streaming_integration():
"""
Integration test for streaming - requires real API key
Run with: pytest -k test_OVHCloud_streaming_integration -s
"""
import os
from litellm import completion
api_key = os.getenv("OVHCLOUD_API_KEY")
if not api_key:
pytest.skip("OVHCLOUD_API_KEY not set, skipping test")
try:
print(f"🔍 Testing streaming with API key: {api_key[:6]}...{api_key[-4:]} (length: {len(api_key)})")
print(f"🔍 API base URL: {os.getenv('OVHCLOUD_API_BASE')}")
response = completion(
model,
messages=[{"role": "user", "content": "Count from 1 to 5"}],
api_key=api_key,
max_tokens=50,
stream=True
)
chunks = []
content_parts = []
for chunk in response:
chunks.append(chunk)
if chunk.choices[0].delta.content:
content_parts.append(chunk.choices[0].delta.content)
assert len(chunks) > 0, "Should receive at least one chunk"
assert len(content_parts) > 0, "Should receive content in chunks"
full_content = "".join(content_parts)
assert len(full_content.strip()) > 0, "Should have non-empty content"
print(f"✅ Received {len(chunks)} chunks")
print(f"✅ Full content: {full_content}")
except Exception as e:
print(f"❌ Streaming integration test error details:")
print(f" Error type: {type(e).__name__}")
print(f" Error message: {str(e)}")
if hasattr(e, 'status_code'):
print(f" Status code: {e.status_code}")
if hasattr(e, 'response'):
print(f" Response: {e.response}")
pytest.fail(f"Streaming integration test failed: {type(e).__name__}: {str(e)}")
def test_ovhcloud_with_custom_base_url():
"""
Test OVHCloud with custom base URL
"""
import os
from litellm import completion
api_key = os.getenv("OVHCLOUD_API_KEY")
if not api_key:
pytest.skip("OVHCLOUD_API_KEY not set, skipping test")
custom_base_url = os.getenv("OVHCLOUD_API_BASE", "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1")
try:
response = completion(
model,
messages=[{"role": "user", "content": "Hello"}],
api_key=api_key,
api_base=custom_base_url,
max_tokens=5
)
assert response.choices[0].message.content
print(f"✅ Custom base URL test passed: {response.choices[0].message.content}")
except Exception as e:
pytest.fail(f"Custom base URL test failed: {str(e)}")
if __name__ == "__main__":
pytest.main([__file__, "-v"])
@@ -0,0 +1,37 @@
from unittest.mock import patch
import litellm
model="ovhcloud/BGE-M3"
def mock_embedding_response(*args, **kwargs):
class MockResponse:
def __init__(self):
self.data = [{"embedding": [0.1, 0.2, 0.3]}]
self.usage = litellm.Usage()
self.model = kwargs.get("model", model)
self.object = "embedding"
def __getitem__(self, key):
return getattr(self, key)
return MockResponse()
def test_ovhcloud_embeddings():
with patch("litellm.embedding", side_effect=mock_embedding_response) as mock_embed:
response = litellm.embedding(
model,
input=["good morning from litellm"],
)
mock_embed.assert_called_once_with(
model,
input=["good morning from litellm"],
)
assert isinstance(response.data, list)
assert "embedding" in response.data[0]
assert isinstance(response.data[0]["embedding"], list)
assert response.model == model
assert response.object == "embedding"