feat: Add OVHCloud AI Endpoints as a provider

2026-07-03 19:07:08 +00:00 · 2025-09-12 13:20:13 +02:00
parent f8036a25a2
commit ef9d1ddc40
14 changed files with 1240 additions and 1 deletions
@@ -0,0 +1,380 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# OVHCloud AI Endpoints
+Leading French Cloud provider in Europe with data sovereignty and privacy.
+
+You can explore the last models we made available in our [catalog](https://endpoints.ai.cloud.ovh.net/catalog).
+
+:::tip
+
+We support ALL OVHCloud AI Endpoints models, just set `model=ovhcloud/<any-model-on-ai-endpoints>` as a prefix when sending litellm requests.
+For the complete models catalog, visit https://endpoints.ai.cloud.ovh.net/catalog. **
+
+:::
+
+## Sample usage
+### Chat completion
+You can define your API key by setting the `OVHCLOUD_API_KEY` environment variable or by overriding the `api_key` parameter. You can generate a key on the [OVHCloud Manager](https://www.ovh.com/manager).
+
+```python
+from litellm import completion
+import os
+
+# Our API is free but ratelimited for calls without an API key.
+os.environ['OVHCLOUD_API_KEY'] = "your-api-key"
+
+response = completion(
+    model = "ovhcloud/Meta-Llama-3_3-70B-Instruct",
+    messages = [
+        {
+            "role": "user",
+            "content": "Hello, how are you?",
+        }
+    ],
+    max_tokens = 10,
+    stop = [],
+    temperature = 0.2,
+    top_p = 0.9,
+    user = "user",
+    api_key = "your-api-key" # Optional if set through the enviromnent variable.
+)
+
+print(response)
+```
+
+### Streaming
+Set the parameter `stream` to `True` to stream a response.
+```python
+from litellm import completion
+import os
+
+os.environ['OVHCLOUD_API_KEY'] = "your-api-key"
+
+response = completion(
+    model = "ovhcloud/Meta-Llama-3_3-70B-Instruct",
+    messages = [
+        {
+            "role": "user",
+            "content": "Hello, how are you?",
+        }
+    ],
+    max_tokens = 10,
+    stop = [],
+    temperature = 0.2,
+    top_p = 0.9,
+    user = "user",
+    api_key = "your-api-key" # Optional if set through the enviromnent variable,
+    stream = True
+)
+
+for part in response:
+    print(response)
+```
+
+### Tool Calling
+
+```python
+from litellm import completion
+import json
+
+def get_current_weather(location, unit="celsius"):
+    if unit == "celsius":
+        return {"location": location, "temperature": "22", "unit": "celsius"}
+    else:
+        return {"location": location, "temperature": "72", "unit": "fahrenheit"}
+
+def print_message(role, content, is_tool_call=False, function_name=None):
+    if role == "user":
+        print(f"🧑 User: {content}")
+    elif role == "assistant":
+        if is_tool_call:
+            print(f"🤖 Assistant: I will call the function '{function_name}' to get some informations.")
+        else:
+            print(f"🤖 Assistant: {content}")
+    elif role == "tool":
+        print(f"🔧 Tool ({function_name}): {content}")
+    print()
+
+messages = [{"role": "user", "content": "What's the weather like in Paris?"}]
+model = "ovhcloud/Meta-Llama-3_3-70B-Instruct"
+
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and country, e.g. Montréal, Canada",
+                    },
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                },
+                "required": ["location"],
+            },
+        },
+    }
+]
+
+print("🌟 Beginning of the conversation")
+
+# Initial user message
+print_message("user", messages[0]["content"])
+
+# First request to the model
+print("📡 Sending first request to the model...")
+response = completion(
+    model=model,
+    messages=messages,
+    tools=tools,
+    tool_choice="auto",
+)
+
+response_message = response.choices[0].message
+tool_calls = response_message.tool_calls
+
+if tool_calls:
+    available_functions = {
+        "get_current_weather": get_current_weather,
+    }
+    
+    # Display the tool calls suggested by the model
+    for tool_call in tool_calls:
+        print_message("assistant", "", is_tool_call=True, function_name=tool_call.function.name)
+        print(f"   📋 Arguments: {tool_call.function.arguments}")
+        print()
+    
+    # Add assistant message with tool calls to the conversation history
+    assistant_message = {
+        "role": "assistant",
+        "content": response_message.content,
+        "tool_calls": [
+            {
+                "id": tool_call.id,
+                "type": "function", 
+                "function": {
+                    "name": tool_call.function.name,
+                    "arguments": tool_call.function.arguments
+                }
+            } for tool_call in tool_calls
+        ]
+    }
+    
+    messages.append(assistant_message)
+    
+    # Execute each tool call and add the results to the conversation history
+    for tool_call in tool_calls:
+        function_name = tool_call.function.name
+        function_to_call = available_functions[function_name]
+        function_args = json.loads(tool_call.function.arguments)
+        
+        print(f"🔧 Executing function '{function_name}'...")
+        function_response = function_to_call(
+            location=function_args.get("location"),
+            unit=function_args.get("unit"),
+        )
+        
+        # Display tool response
+        print_message("tool", json.dumps(function_response, indent=2), function_name=function_name)
+        
+        messages.append({
+            "tool_call_id": tool_call.id,
+            "role": "tool",
+            "name": function_name,
+            "content": json.dumps(function_response),
+        })
+    
+    print("📡 Sending second request to the model with results...")
+    
+    # Second request with function results
+    second_response = completion(
+        model=model,
+        messages=messages
+    )
+    
+    # Display final response
+    final_content = second_response.choices[0].message.content
+    print_message("assistant", final_content)
+    
+else:
+    print("❌ No function call detected")
+    print_message("assistant", response_message.content)
+```
+
+### Vision Example
+
+```python
+from base64 import b64encode
+from mimetypes import guess_type
+import litellm
+
+# Auxiliary function to get b64 images
+def data_url_from_image(file_path):
+    mime_type, _ = guess_type(file_path)
+    if mime_type is None:
+        raise ValueError("Could not determine MIME type of the file")
+
+    with open(file_path, "rb") as image_file:
+        encoded_string = b64encode(image_file.read()).decode("utf-8")
+
+    data_url = f"data:{mime_type};base64,{encoded_string}"
+    return data_url
+
+response = litellm.completion(
+    model = "ovhcloud/Mistral-Small-3.2-24B-Instruct-2506", 
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "What's in this image?"
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": data_url_from_image("your_image.jpg"),
+                        "format": "image/jpeg"
+                    }
+                }
+            ]
+        }
+    ],
+    stream=False
+)
+
+print(response.choices[0].message.content)
+```
+
+
+### Structured Output
+
+```python
+from litellm import completion
+
+response = completion(
+    model="ovhcloud/Meta-Llama-3_3-70B-Instruct",
+    messages=[
+        {
+            "role": "system",
+            "content": (
+                "You are a specialist in extracting structured data from unstructured text. "
+                "Your task is to identify relevant entities and categories, then format them "
+                "according to the requested structure."
+            ),
+        },
+        {
+            "role": "user",
+            "content": "Room 12 contains books, a desk, and a lamp."
+        },
+    ],
+    response_format={
+        "type": "json_schema",
+        "json_schema": {
+            "title": "data",
+            "name": "data_extraction",
+            "schema": {
+                "type": "object",
+                "properties": {
+                    "section": {"type": "string"},
+                    "products": {
+                        "type": "array",
+                        "items": {"type": "string"}
+                    }
+                },
+                "required": ["section", "products"],
+                "additionalProperties": False
+            },
+            "strict": False
+        }
+    },
+    stream=False
+)
+
+print(response.choices[0].message.content)
+```
+
+### Embeddings
+
+```python
+from litellm import embedding
+
+response = embedding(
+    model="ovhcloud/BGE-M3",
+    input=["sample text to embed", "another sample text to embed"]
+)
+
+print(response.data)
+```
+
+## Usage with LiteLLM Proxy Server
+
+Here's how to call a OVHCloud AI Endpoints model with the LiteLLM Proxy Server
+
+1. Modify the config.yaml 
+
+  ```yaml
+  model_list:
+    - model_name: my-model
+      litellm_params:
+        model: ovhcloud/<your-model-name>  # add ovhcloud/ prefix to route as OVHCloud provider
+        api_key: api-key                   # api key to send your model
+  ```
+
+
+2. Start the proxy 
+
+  ```bash
+  $ litellm --config /path/to/config.yaml
+  ```
+
+3. Send Request to LiteLLM Proxy Server
+
+  <Tabs>
+
+  <TabItem value="openai" label="OpenAI Python v1.0.0+">
+
+  ```python
+  import openai
+  client = openai.OpenAI(
+      api_key="sk-1234",             # pass litellm proxy key, if you're using virtual keys
+      base_url="http://0.0.0.0:4000" # litellm-proxy-base url
+  )
+
+  response = client.chat.completions.create(
+      model="my-model",
+      messages = [
+          {
+              "role": "user",
+              "content": "what llm are you"
+          }
+      ],
+  )
+
+  print(response)
+  ```
+  </TabItem>
+
+  <TabItem value="curl" label="curl">
+
+  ```shell
+  curl --location 'http://0.0.0.0:4000/chat/completions' \
+      --header 'Authorization: Bearer sk-1234' \
+      --header 'Content-Type: application/json' \
+      --data '{
+      "model": "my-model",
+      "messages": [
+          {
+          "role": "user",
+          "content": "what llm are you"
+          }
+      ],
+  }'
+  ```
+  </TabItem>
+
+  </Tabs>
@@ -485,7 +485,8 @@ const sidebars = {
        "providers/bytez",
        "providers/heroku",
        "providers/oci",
-        "providers/datarobot",  
+        "providers/datarobot",
+        "providers/ovhcloud",  
      ],
    },
    {
@@ -241,6 +241,7 @@ gradient_ai_api_key: Optional[str] = None
 nebius_key: Optional[str] = None
 heroku_key: Optional[str] = None
 cometapi_key: Optional[str] = None
+ovhcloud_key: Optional[str] = None
 common_cloud_provider_auth_params: dict = {
    "params": ["project", "region_name", "token"],
    "providers": ["vertex_ai", "bedrock", "watsonx", "azure", "vertex_ai_beta"],
@@ -520,6 +521,8 @@ cometapi_models: Set = set()
 oci_models: Set = set()
 vercel_ai_gateway_models: Set = set()
 volcengine_models: Set = set()
+ovhcloud_models: Set = set()
+ovhcloud_embedding_models: Set = set()


 def is_bedrock_pricing_only_model(key: str) -> bool:
@@ -734,6 +737,10 @@ def add_known_models():
            oci_models.add(key)
        elif value.get("litellm_provider") == "volcengine":
            volcengine_models.add(key)
+        elif value.get("litellm_provider") == "ovhcloud":
+            ovhcloud_models.add(key)
+        elif value.get("litellm_provider") == "ovhcloud-embedding-models":
+            ovhcloud_embedding_models.add(key)


 add_known_models()
@@ -828,6 +835,7 @@ model_list = list(
    | heroku_models
    | vercel_ai_gateway_models
    | volcengine_models
+    | ovhcloud_models
 )

 model_list_set = set(model_list)
@@ -909,6 +917,7 @@ models_by_provider: dict = {
    "cometapi": cometapi_models,
    "oci": oci_models,
    "volcengine": volcengine_models,
+    "ovhcloud": ovhcloud_models | ovhcloud_embedding_models,
 }

 # mapping for those models which have larger equivalents
@@ -943,6 +952,7 @@ all_embedding_models = (
    | fireworks_ai_embedding_models
    | nebius_embedding_models
    | sambanova_embedding_models
+    | ovhcloud_embedding_models
 )

 ####### IMAGE GENERATION MODELS ###################
@@ -1254,6 +1264,8 @@ from .llms.morph.chat.transformation import MorphChatConfig
 from .llms.lambda_ai.chat.transformation import LambdaAIChatConfig
 from .llms.hyperbolic.chat.transformation import HyperbolicChatConfig
 from .llms.vercel_ai_gateway.chat.transformation import VercelAIGatewayConfig
+from .llms.ovhcloud.chat.transformation import OVHCloudChatConfig
+from .llms.ovhcloud.embedding.transformation import OVHCloudEmbeddingConfig
 from .main import *  # type: ignore
 from .integrations import *
 from .llms.custom_httpx.async_client_cleanup import close_litellm_async_clients
@@ -311,6 +311,7 @@ LITELLM_CHAT_PROVIDERS = [
    "morph",
    "lambda_ai",
    "vercel_ai_gateway",
+    "ovhcloud",
 ]

 LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS = [
@@ -1019,6 +1020,7 @@ SENTRY_DENYLIST = [
    "FIREWORKS_API_KEY",
    "FIREWORKS_AI_API_KEY",
    "FIREWORKSAI_API_KEY",
+    "OVHCLOUD_API_KEY",
    # Database and Connection Strings
    "database_url",
    "redis_url",
@@ -372,6 +372,8 @@ def get_llm_provider(  # noqa: PLR0915
            custom_llm_provider = "cometapi"
        elif model.startswith("oci/"):
            custom_llm_provider = "oci"
+        elif model.startswith("ovhcloud/"):
+            custom_llm_provider = "ovhcloud"
        if not custom_llm_provider:
            if litellm.suppress_debug_info is False:
                print()  # noqa
@@ -0,0 +1,141 @@
+"""
+Support for OVHCloud AI Endpoints `/v1/chat/completions` endpoint.
+
+Our unified API follows the OpenAI standard.
+More information on our website: https://endpoints.ai.cloud.ovh.net
+"""
+from typing import Optional, Union, List
+
+import httpx
+from litellm import ModelResponseStream, OpenAIGPTConfig, get_model_info, verbose_logger
+from litellm.llms.ovhcloud.utils import OVHCloudException
+from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.types.llms.openai import AllMessageValues
+
+class OVHCloudChatConfig(OpenAIGPTConfig):
+    @property
+    def custom_llm_provider(self) -> Optional[str]:
+        return "ovhcloud"
+
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        Details about function calling support can be found here:
+        https://help.ovhcloud.com/csm/en-gb-public-cloud-ai-endpoints-function-calling?id=kb_article_view&sysparm_article=KB0071907
+        """
+        supports_function_calling: Optional[bool] = None
+        try:
+            model_info = get_model_info(model, custom_llm_provider="ovhcloud")
+            supports_function_calling = model_info.get(
+                "supports_function_calling", False
+            )
+        except Exception as e:
+            verbose_logger.debug(f"Error getting supported OpenAI params: {e}")
+            pass
+
+        optional_params = super().get_supported_openai_params(model)
+        if supports_function_calling is not True:
+            verbose_logger.debug(
+                "You can see our models supporting function_calling in our catalog: https://endpoints.ai.cloud.ovh.net/catalog "
+            )
+            optional_params.remove("tools")
+            optional_params.remove("tool_choice")
+            optional_params.remove("function_call")
+            optional_params.remove("response_format")
+        return optional_params
+    
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        api_base = "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1" if api_base is None else api_base.rstrip("/")
+        complete_url = f"{api_base}/chat/completions"
+        return complete_url
+    
+    def get_error_class(
+        self, 
+        error_message: str, 
+        status_code: int, 
+        headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        return OVHCloudException(
+            message=error_message,
+            status_code=status_code,
+            headers=headers,
+        )
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        mapped_openai_params = super().map_openai_params(
+            non_default_params, optional_params, model, drop_params
+        )
+        return mapped_openai_params
+    
+    def transform_request(
+        self,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        extra_body = optional_params.pop("extra_body", {})
+        response = super().transform_request(
+            model, messages, optional_params, litellm_params, headers
+        )
+        response.update(extra_body)
+        return response
+
+class OVHCloudChatCompletionStreamingHandler(BaseModelResponseIterator):
+    """
+    Handler for OVHCloud AI Endpoints streaming chat completion responses
+    """
+
+    def chunk_parser(self, chunk: dict) -> ModelResponseStream:
+        """
+        Parse individual chunks from streaming response
+        """
+        try:
+            if "error" in chunk:
+                error_chunk = chunk["error"]
+                error_message = "OVHCloud Error: {}".format(
+                    error_chunk.get("message", "Unknown error")
+                )
+                raise OVHCloudException(
+                    message=error_message,
+                    status_code=error_chunk.get("code", 400),
+                    headers={"Content-Type": "application/json"},
+                )
+
+            new_choices = []
+            for choice in chunk["choices"]:
+                if "delta" in choice and "reasoning" in choice["delta"]:
+                    choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")
+                new_choices.append(choice)
+
+            return ModelResponseStream(
+                id=chunk["id"],
+                object="chat.completion.chunk",
+                created=chunk["created"],
+                usage=chunk.get("usage"),
+                model=chunk["model"],
+                choices=new_choices,
+            )
+        except KeyError as e:
+            raise OVHCloudException(
+                message=f"KeyError: {e}, Got unexpected response from CometAPI: {chunk}",
+                status_code=400,
+                headers={"Content-Type": "application/json"},
+            )
+        except Exception as e:
+            raise e
@@ -0,0 +1,122 @@
+"""
+This is OpenAI compatible - no transformation is applied
+
+"""
+from typing import List, Optional, Union
+
+import httpx
+
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import AllEmbeddingInputValues, AllMessageValues
+from litellm.types.utils import EmbeddingResponse, Usage
+
+from ..utils import OVHCloudException
+
+
+class OVHCloudEmbeddingConfig(BaseEmbeddingConfig):
+    def __init__(self) -> None:
+        pass
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        api_base = "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1" if api_base is None else api_base.rstrip("/")
+        complete_url = f"{api_base}/embeddings"
+        return complete_url
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        if api_key is None:
+            api_key = get_secret_str("OVHCLOUD_API_KEY")
+
+        default_headers = {
+            "Authorization": f"Bearer {api_key}",
+            "accept": "application/json",
+            "Content-Type": "application/json",
+        }
+
+        if "Authorization" in headers:
+            default_headers["Authorization"] = headers["Authorization"]
+
+        return {**default_headers, **headers}
+
+    def get_supported_openai_params(self, model: str):
+        return []
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ):
+        supported_openai_params = self.get_supported_openai_params(model)
+        for param, value in non_default_params.items():
+            if param in supported_openai_params:
+                optional_params[param] = value
+        return optional_params
+
+    def transform_embedding_request(
+        self,
+        model: str,
+        input: AllEmbeddingInputValues,
+        optional_params: dict,
+        headers: dict,
+    ) -> dict:
+        return {"input": input, "model": model, **optional_params}
+
+    def transform_embedding_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: EmbeddingResponse,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str],
+        request_data: dict,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> EmbeddingResponse:
+        try:
+            raw_response_json = raw_response.json()
+        except Exception:
+            raise OVHCloudException(
+                message=raw_response.text,
+                status_code=raw_response.status_code,
+                headers=raw_response.headers,
+            )
+
+        model_response.model = raw_response_json.get("model")
+        model_response.data = raw_response_json.get("data")
+        model_response.object = raw_response_json.get("object")
+
+        usage = Usage(
+            prompt_tokens=raw_response_json.get("usage", {}).get("prompt_tokens", 0),
+            total_tokens=raw_response_json.get("usage", {}).get("total_tokens", 0),
+        )
+
+        model_response.usage = usage
+        return model_response
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        return OVHCloudException(
+            message=error_message, status_code=status_code, headers=headers
+        )
@@ -0,0 +1,6 @@
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+
+
+class OVHCloudException(BaseLLMException):
+    """OVHCloud AI Endpoints exception handling class"""
+    pass
@@ -164,6 +164,7 @@ from .llms.openai.openai import OpenAIChatCompletion
 from .llms.openai.transcriptions.handler import OpenAIAudioTranscription
 from .llms.openai_like.chat.handler import OpenAILikeChatHandler
 from .llms.openai_like.embedding.handler import OpenAILikeEmbeddingHandler
+from .llms.ovhcloud.chat.transformation import OVHCloudChatConfig
 from .llms.petals.completion import handler as petals_handler
 from .llms.predibase.chat.handler import PredibaseChatCompletion
 from .llms.replicate.chat.handler import completion as replicate_chat_completion
@@ -259,6 +260,7 @@ sagemaker_chat_completion = SagemakerChatHandler()
 bytez_transformation = BytezChatConfig()
 heroku_transformation = HerokuChatConfig()
 oci_transformation = OCIChatConfig()
+ovhcloud_transformation = OVHCloudChatConfig()
 ####### COMPLETION ENDPOINTS ################


@@ -3498,6 +3500,42 @@ def completion(  # type: ignore # noqa: PLR0915

            pass

+        elif custom_llm_provider == "ovhcloud" or model in litellm.ovhcloud_models:
+            api_key = (
+                api_key
+                or litellm.ovhcloud_key
+                or get_secret_str("OVHCLOUD_API_KEY")
+                or litellm.api_key
+            )
+
+            api_base = (
+                api_base
+                or litellm.api_base
+                or get_secret_str("OVHCLOUD_API_BASE")
+                or "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1"
+            )
+
+            response = base_llm_http_handler.completion(
+                model=model,
+                messages=messages,
+                headers=headers,
+                model_response=model_response,
+                api_key=api_key,
+                api_base=api_base,
+                acompletion=acompletion,
+                logging_obj=logging,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                timeout=timeout,  # type: ignore
+                client=client,
+                custom_llm_provider=custom_llm_provider,
+                encoding=encoding,
+                stream=stream,
+                provider_config=ovhcloud_transformation,
+            )
+
+            pass
+
        elif custom_llm_provider == "custom":
            url = litellm.api_base or api_base or ""
            if url is None or url == "":
@@ -4564,6 +4602,28 @@ def embedding(  # noqa: PLR0915
                aembedding=aembedding,
                headers=headers,
            )
+        elif custom_llm_provider == "ovhcloud":
+            api_key = api_key or litellm.api_key or get_secret_str("OVHCLOUD_API_KEY")
+            api_base = (
+                api_base
+                or litellm.api_base
+                or get_secret_str("OVHCLOUD_API_BASE")
+                or "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1"
+            )
+            response = base_llm_http_handler.embedding(
+                model=model,
+                input=input,
+                custom_llm_provider=custom_llm_provider,
+                api_base=api_base,
+                api_key=api_key,
+                logging_obj=logging,
+                timeout=timeout,
+                model_response=EmbeddingResponse(),
+                optional_params=optional_params,
+                client=client,
+                aembedding=aembedding,
+                litellm_params={},
+            )
        elif custom_llm_provider in litellm._custom_providers:
            custom_handler: Optional[CustomLLM] = None
            for item in litellm.custom_provider_map:
@@ -2357,6 +2357,7 @@ class LlmProviders(str, Enum):
    AUTO_ROUTER = "auto_router"
    VERCEL_AI_GATEWAY = "vercel_ai_gateway"
    DOTPROMPT = "dotprompt"
+    OVHCLOUD = "ovhcloud"


 # Create a set of all provider values for quick lookup
@@ -2899,6 +2899,19 @@ def get_optional_params_embeddings(  # noqa: PLR0915
            model=model,
            drop_params=drop_params if drop_params is not None else False,
        )
+    elif custom_llm_provider == "ovhcloud":
+        supported_params = get_supported_openai_params(
+            model=model,
+            custom_llm_provider="ovhcloud",
+            request_type="embeddings",
+        )
+        _check_valid_arg(supported_params=supported_params)
+        optional_params = litellm.OVHCloudEmbeddingConfig().map_openai_params(
+            non_default_params=non_default_params,
+            optional_params={},
+            model=model,
+            drop_params=drop_params if drop_params is not None else False,
+        )

    elif (
        custom_llm_provider != "openai"
@@ -7095,6 +7108,8 @@ class ProviderConfigManager:
            return litellm.OCIChatConfig()
        elif litellm.LlmProviders.HYPERBOLIC == provider:
            return litellm.HyperbolicChatConfig()
+        elif litellm.LlmProviders.OVHCLOUD == provider:
+            return litellm.OVHCloudChatConfig()
        return None

    @staticmethod
@@ -7138,6 +7153,8 @@ class ProviderConfigManager:
            )

            return VolcEngineEmbeddingConfig()
+        elif litellm.LlmProviders.OVHCLOUD == provider:
+            return litellm.OVHCloudEmbeddingConfig()
        return None

    @staticmethod
@@ -20777,5 +20777,207 @@
        "metadata": {
            "notes": "Volcengine Doubao embedding model - text-240715 version with 2560 dimensions"
        }
+    },
+    "ovhcloud/Qwen2.5-VL-72B-Instruct": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 9.1e-07,
+        "output_cost_per_token": 9.1e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": false,
+        "supports_response_schema": true,
+        "supports_tool_choice": false,
+        "supports_vision": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-vl-72b-instruct"
+    },
+    "ovhcloud/llava-v1.6-mistral-7b-hf": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 2.9e-07,
+        "output_cost_per_token": 2.9e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": false,
+        "supports_response_schema": true,
+        "supports_tool_choice": false,
+        "supports_vision": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/llava-next-mistral-7b"
+    },
+    "ovhcloud/gpt-oss-120b": {
+        "max_tokens": 131000,
+        "max_input_tokens": 131000,
+        "max_output_tokens": 131000,
+        "input_cost_per_token": 8e-08,
+        "output_cost_per_token": 4e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": false,
+        "supports_response_schema": true,
+        "supports_tool_choice": false,
+        "supports_reasoning": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-120b"
+    },
+    "ovhcloud/Meta-Llama-3_3-70B-Instruct": {
+        "max_tokens": 131000,
+        "max_input_tokens": 131000,
+        "max_output_tokens": 131000,
+        "input_cost_per_token": 6.7e-07,
+        "output_cost_per_token": 6.7e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-3-70b-instruct"
+    },
+    "ovhcloud/Qwen2.5-Coder-32B-Instruct": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 8.7e-07,
+        "output_cost_per_token": 8.7e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": false,
+        "supports_response_schema": true,
+        "supports_tool_choice": false,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-coder-32b-instruct"
+    },
+    "ovhcloud/Mixtral-8x7B-Instruct-v0.1": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 6.3e-07,
+        "output_cost_per_token": 6.3e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": false,
+        "supports_response_schema": true,
+        "supports_tool_choice": false,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/mixtral-8x7b-instruct-v0-1"
+    },
+    "ovhcloud/Meta-Llama-3_1-70B-Instruct": {
+        "max_tokens": 131000,
+        "max_input_tokens": 131000,
+        "max_output_tokens": 131000,
+        "input_cost_per_token": 6.7e-07,
+        "output_cost_per_token": 6.7e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": false,
+        "supports_response_schema": false,
+        "supports_tool_choice": false,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-1-70b-instruct"
+    },
+    "ovhcloud/Mistral-Small-3.2-24B-Instruct-2506": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 128000,
+        "input_cost_per_token": 9e-08,
+        "output_cost_per_token": 2.8e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "supports_vision": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-small-3-2-24b-instruct-2506"
+    },
+    "ovhcloud/DeepSeek-R1-Distill-Llama-70B": {
+        "max_tokens": 131000,
+        "max_input_tokens": 131000,
+        "max_output_tokens": 131000,
+        "input_cost_per_token": 6.7e-07,
+        "output_cost_per_token": 6.7e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "supports_reasoning": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/deepseek-r1-distill-llama-70b"
+    },
+    "ovhcloud/Llama-3.1-8B-Instruct": {
+        "max_tokens": 131000,
+        "max_input_tokens": 131000,
+        "max_output_tokens": 131000,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 1e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/llama-3-1-8b-instruct"
+    },
+    "ovhcloud/Mistral-7B-Instruct-v0.3": {
+        "max_tokens": 127000,
+        "max_input_tokens": 127000,
+        "max_output_tokens": 127000,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 1e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-7b-instruct-v0-3"
+    },
+    "ovhcloud/gpt-oss-20b": {
+        "max_tokens": 131000,
+        "max_input_tokens": 131000,
+        "max_output_tokens": 131000,
+        "input_cost_per_token": 4e-08,
+        "output_cost_per_token": 1.5e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": false,
+        "supports_response_schema": true,
+        "supports_tool_choice": false,
+        "supports_reasoning": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-20b"
+    },
+    "ovhcloud/Mistral-Nemo-Instruct-2407": {
+        "max_tokens": 118000,
+        "max_input_tokens": 118000,
+        "max_output_tokens": 118000,
+        "input_cost_per_token": 1.3e-07,
+        "output_cost_per_token": 1.3e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-nemo-instruct-2407"
+    },
+    "ovhcloud/Qwen3-32B": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 8e-08,
+        "output_cost_per_token": 2.3e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true,
+        "supports_reasoning": true,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/qwen3-32b"
+    },
+    "ovhcloud/mamba-codestral-7B-v0.1": {
+        "max_tokens": 256000,
+        "max_input_tokens": 256000,
+        "max_output_tokens": 256000,
+        "input_cost_per_token": 1.9e-07,
+        "output_cost_per_token": 1.9e-07,
+        "litellm_provider": "ovhcloud",
+        "mode": "chat",
+        "supports_function_calling": false,
+        "supports_response_schema": true,
+        "supports_tool_choice": false,
+        "source": "https://endpoints.ai.cloud.ovh.net/models/mamba-codestral-7b-v0-1"
    }
 }
@@ -0,0 +1,256 @@
+"""
+Unit tests for OVHCloud AI Endpoints chat integration.
+"""
+
+import os
+import sys
+
+import pytest
+
+from litellm.llms.ovhcloud.utils import OVHCloudException
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.ovhcloud.chat.transformation import (
+    OVHCloudChatCompletionStreamingHandler,
+    OVHCloudChatConfig,
+)
+
+config = OVHCloudChatConfig()
+model = "ovhcloud/Mistral-7B-Instruct-v0.3"
+
+class TestOvhCloudChatCompletionStreamingHandler:
+    def test_chunk_parser_successful(self):
+        handler = OVHCloudChatCompletionStreamingHandler(
+            streaming_response=None, sync_stream=True
+        )
+
+        chunk = {
+            "id": "test_id",
+            "created": 1234567890,
+            "model": "gpt-oss-20b",
+            "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
+            "choices": [
+                {"delta": {"content": "test content", "reasoning": "test reasoning"}}
+            ],
+        }
+
+        result = handler.chunk_parser(chunk)
+
+        assert result.id == "test_id"
+        assert result.object == "chat.completion.chunk"
+        assert result.created == 1234567890
+        assert result.model == "gpt-oss-20b"
+        assert result.usage.prompt_tokens == chunk["usage"]["prompt_tokens"]
+        assert result.usage.completion_tokens == chunk["usage"]["completion_tokens"]
+        assert result.usage.total_tokens == chunk["usage"]["total_tokens"]
+        assert len(result.choices) == 1
+        assert result.choices[0]["delta"]["reasoning_content"] == "test reasoning"
+
+    def test_chunk_parser_error_response(self):
+        handler = OVHCloudChatCompletionStreamingHandler(
+            streaming_response=None, sync_stream=True
+        )
+
+        error_chunk = {
+            "error": {
+                "message": "test error",
+                "code": 400,
+            } 
+        }
+
+        with pytest.raises(OVHCloudException) as exc_info:
+            handler.chunk_parser(error_chunk)
+
+        assert "OVHCloud Error: test error" in str(exc_info.value)
+        assert exc_info.value.status_code == 400
+
+    def test_chunk_parser_key_error(self):
+        handler = OVHCloudChatCompletionStreamingHandler(
+            streaming_response=None, sync_stream=True
+        )
+
+        invalid_chunk = {"incomplete": "data"}
+
+        with pytest.raises(OVHCloudException) as exc_info:
+            handler.chunk_parser(invalid_chunk)
+
+        assert "KeyError" in str(exc_info.value)
+        assert exc_info.value.status_code == 400
+
+
+class TestOVHCloudConfig:
+    def test_transform_request_basic(self):
+        """Test basic request transformation"""        
+        transformed_request = config.transform_request(
+            model,
+            messages=[
+                {"role": "user", "content": "Hello, world!"}
+            ],
+            optional_params={},
+            litellm_params={},
+            headers={},
+        )
+
+        assert transformed_request["model"] == model
+        assert transformed_request["messages"] == [
+            {"role": "user", "content": "Hello, world!"}
+        ]
+
+    def test_transform_request_with_extra_body(self):
+        """Test request transformation with extra_body parameters"""        
+        transformed_request = config.transform_request(
+            model,
+            messages=[{"role": "user", "content": "Hello, world!"}],
+            optional_params={"extra_body": {"custom_param": "custom_value"}},
+            litellm_params={},
+            headers={},
+        )
+
+        assert transformed_request["custom_param"] == "custom_value"
+        assert transformed_request["messages"] == [
+            {"role": "user", "content": "Hello, world!"}
+        ]
+
+    def test_map_openai_params(self):
+        """Test OpenAI parameter mapping"""        
+        non_default_params = {
+            "temperature": 0.7,
+            "max_tokens": 100,
+            "top_p": 0.9,
+        }
+        
+        mapped_params = config.map_openai_params(
+            non_default_params=non_default_params,
+            optional_params={},
+            model=model,
+            drop_params=False,
+        )
+        
+        assert mapped_params["temperature"] == 0.7
+        assert mapped_params["max_tokens"] == 100
+        assert mapped_params["top_p"] == 0.9
+
+    def test_get_error_class(self):
+        """Test error class creation"""        
+        error = config.get_error_class(
+            error_message="Test error",
+            status_code=400,
+            headers={"Content-Type": "application/json"}
+        )
+        
+        assert isinstance(error, OVHCloudException)
+        assert error.message == "Test error"
+        assert error.status_code == 400
+
+
+def test_ovhcloud_integration():
+    import os
+    from litellm import completion
+    
+    api_key = os.getenv("OVHCLOUD_API_KEY") 
+    
+    if not api_key:
+        pytest.skip("OVHCLOUD_API_KEY not set, skipping test")
+    
+    response = completion(
+        model,
+        messages=[{"role": "user", "content": "Say hello in one word"}],
+        api_key=api_key,
+        max_tokens=10,
+        temperature=0.7
+    )
+    
+    assert response.choices[0].message.content
+    assert len(response.choices[0].message.content.strip()) > 0
+    assert response.model
+    assert response.usage
+    assert response.usage.total_tokens > 0
+
+def test_OVHCloud_streaming_integration():
+    """
+    Integration test for streaming - requires real API key
+    Run with: pytest -k test_OVHCloud_streaming_integration -s
+    """
+    import os
+    from litellm import completion
+    
+    api_key = os.getenv("OVHCLOUD_API_KEY") 
+    
+    if not api_key:
+        pytest.skip("OVHCLOUD_API_KEY not set, skipping test")
+    
+    try:
+        print(f"🔍 Testing streaming with API key: {api_key[:6]}...{api_key[-4:]} (length: {len(api_key)})")
+        print(f"🔍 API base URL: {os.getenv('OVHCLOUD_API_BASE')}")
+        
+        response = completion(
+            model,
+            messages=[{"role": "user", "content": "Count from 1 to 5"}],
+            api_key=api_key,
+            max_tokens=50,
+            stream=True
+        )
+
+        chunks = []
+        content_parts = []
+
+        for chunk in response:
+            chunks.append(chunk)
+            if chunk.choices[0].delta.content:
+                content_parts.append(chunk.choices[0].delta.content)
+
+        assert len(chunks) > 0, "Should receive at least one chunk"
+        assert len(content_parts) > 0, "Should receive content in chunks"
+
+        full_content = "".join(content_parts)
+        assert len(full_content.strip()) > 0, "Should have non-empty content"
+
+        print(f"✅ Received {len(chunks)} chunks")
+        print(f"✅ Full content: {full_content}")
+
+    except Exception as e:
+        print(f"❌ Streaming integration test error details:")
+        print(f"   Error type: {type(e).__name__}")
+        print(f"   Error message: {str(e)}")
+        if hasattr(e, 'status_code'):
+            print(f"   Status code: {e.status_code}")
+        if hasattr(e, 'response'):
+            print(f"   Response: {e.response}")
+            
+        pytest.fail(f"Streaming integration test failed: {type(e).__name__}: {str(e)}")
+
+def test_ovhcloud_with_custom_base_url():
+    """
+    Test OVHCloud with custom base URL
+    """
+    import os
+    from litellm import completion
+    
+    api_key = os.getenv("OVHCLOUD_API_KEY") 
+    
+    if not api_key:
+        pytest.skip("OVHCLOUD_API_KEY not set, skipping test")
+
+    custom_base_url = os.getenv("OVHCLOUD_API_BASE", "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1")
+        
+    try:
+        response = completion(
+            model,
+            messages=[{"role": "user", "content": "Hello"}],
+            api_key=api_key,
+            api_base=custom_base_url,
+            max_tokens=5
+        )
+        
+        assert response.choices[0].message.content
+        print(f"✅ Custom base URL test passed: {response.choices[0].message.content}")
+        
+    except Exception as e:
+        pytest.fail(f"Custom base URL test failed: {str(e)}")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
@@ -0,0 +1,37 @@
+from unittest.mock import patch
+
+import litellm
+
+model="ovhcloud/BGE-M3"
+
+def mock_embedding_response(*args, **kwargs):
+    class MockResponse:
+        def __init__(self):
+            self.data = [{"embedding": [0.1, 0.2, 0.3]}]
+            self.usage = litellm.Usage()
+            self.model = kwargs.get("model", model)
+            self.object = "embedding"
+
+        def __getitem__(self, key):
+            return getattr(self, key)
+
+    return MockResponse()
+
+
+def test_ovhcloud_embeddings():
+    with patch("litellm.embedding", side_effect=mock_embedding_response) as mock_embed:
+        response = litellm.embedding(
+            model,
+            input=["good morning from litellm"],
+        )
+
+        mock_embed.assert_called_once_with(
+            model,
+            input=["good morning from litellm"],
+        )
+
+        assert isinstance(response.data, list)
+        assert "embedding" in response.data[0]
+        assert isinstance(response.data[0]["embedding"], list)
+        assert response.model == model
+        assert response.object == "embedding"