From ef9d1ddc40cdbff2cd61ca1dfad71ec8533ea6bc Mon Sep 17 00:00:00 2001 From: Elias TOURNEUX Date: Fri, 12 Sep 2025 13:20:13 +0200 Subject: [PATCH] feat: Add OVHCloud AI Endpoints as a provider --- docs/my-website/docs/providers/ovhcloud.md | 380 ++++++++++++++++++ docs/my-website/sidebars.js | 3 +- litellm/__init__.py | 12 + litellm/constants.py | 2 + .../get_llm_provider_logic.py | 2 + litellm/llms/ovhcloud/chat/transformation.py | 141 +++++++ .../llms/ovhcloud/embedding/transformation.py | 122 ++++++ litellm/llms/ovhcloud/utils.py | 6 + litellm/main.py | 60 +++ litellm/types/utils.py | 1 + litellm/utils.py | 17 + model_prices_and_context_window.json | 202 ++++++++++ .../test_ovhcloud_chat_transformation.py | 256 ++++++++++++ ...test_ovhcloud_embeddings_transformation.py | 37 ++ 14 files changed, 1240 insertions(+), 1 deletion(-) create mode 100644 docs/my-website/docs/providers/ovhcloud.md create mode 100644 litellm/llms/ovhcloud/chat/transformation.py create mode 100644 litellm/llms/ovhcloud/embedding/transformation.py create mode 100644 litellm/llms/ovhcloud/utils.py create mode 100644 tests/test_litellm/llms/ovhcloud/test_ovhcloud_chat_transformation.py create mode 100644 tests/test_litellm/llms/ovhcloud/test_ovhcloud_embeddings_transformation.py diff --git a/docs/my-website/docs/providers/ovhcloud.md b/docs/my-website/docs/providers/ovhcloud.md new file mode 100644 index 0000000000..c37c968787 --- /dev/null +++ b/docs/my-website/docs/providers/ovhcloud.md @@ -0,0 +1,380 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# OVHCloud AI Endpoints +Leading French Cloud provider in Europe with data sovereignty and privacy. + +You can explore the last models we made available in our [catalog](https://endpoints.ai.cloud.ovh.net/catalog). + +:::tip + +We support ALL OVHCloud AI Endpoints models, just set `model=ovhcloud/` as a prefix when sending litellm requests. +For the complete models catalog, visit https://endpoints.ai.cloud.ovh.net/catalog. ** + +::: + +## Sample usage +### Chat completion +You can define your API key by setting the `OVHCLOUD_API_KEY` environment variable or by overriding the `api_key` parameter. You can generate a key on the [OVHCloud Manager](https://www.ovh.com/manager). + +```python +from litellm import completion +import os + +# Our API is free but ratelimited for calls without an API key. +os.environ['OVHCLOUD_API_KEY'] = "your-api-key" + +response = completion( + model = "ovhcloud/Meta-Llama-3_3-70B-Instruct", + messages = [ + { + "role": "user", + "content": "Hello, how are you?", + } + ], + max_tokens = 10, + stop = [], + temperature = 0.2, + top_p = 0.9, + user = "user", + api_key = "your-api-key" # Optional if set through the enviromnent variable. +) + +print(response) +``` + +### Streaming +Set the parameter `stream` to `True` to stream a response. +```python +from litellm import completion +import os + +os.environ['OVHCLOUD_API_KEY'] = "your-api-key" + +response = completion( + model = "ovhcloud/Meta-Llama-3_3-70B-Instruct", + messages = [ + { + "role": "user", + "content": "Hello, how are you?", + } + ], + max_tokens = 10, + stop = [], + temperature = 0.2, + top_p = 0.9, + user = "user", + api_key = "your-api-key" # Optional if set through the enviromnent variable, + stream = True +) + +for part in response: + print(response) +``` + +### Tool Calling + +```python +from litellm import completion +import json + +def get_current_weather(location, unit="celsius"): + if unit == "celsius": + return {"location": location, "temperature": "22", "unit": "celsius"} + else: + return {"location": location, "temperature": "72", "unit": "fahrenheit"} + +def print_message(role, content, is_tool_call=False, function_name=None): + if role == "user": + print(f"πŸ§‘ User: {content}") + elif role == "assistant": + if is_tool_call: + print(f"πŸ€– Assistant: I will call the function '{function_name}' to get some informations.") + else: + print(f"πŸ€– Assistant: {content}") + elif role == "tool": + print(f"πŸ”§ Tool ({function_name}): {content}") + print() + +messages = [{"role": "user", "content": "What's the weather like in Paris?"}] +model = "ovhcloud/Meta-Llama-3_3-70B-Instruct" + +tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and country, e.g. MontrΓ©al, Canada", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + }, + } +] + +print("🌟 Beginning of the conversation") + +# Initial user message +print_message("user", messages[0]["content"]) + +# First request to the model +print("πŸ“‘ Sending first request to the model...") +response = completion( + model=model, + messages=messages, + tools=tools, + tool_choice="auto", +) + +response_message = response.choices[0].message +tool_calls = response_message.tool_calls + +if tool_calls: + available_functions = { + "get_current_weather": get_current_weather, + } + + # Display the tool calls suggested by the model + for tool_call in tool_calls: + print_message("assistant", "", is_tool_call=True, function_name=tool_call.function.name) + print(f" πŸ“‹ Arguments: {tool_call.function.arguments}") + print() + + # Add assistant message with tool calls to the conversation history + assistant_message = { + "role": "assistant", + "content": response_message.content, + "tool_calls": [ + { + "id": tool_call.id, + "type": "function", + "function": { + "name": tool_call.function.name, + "arguments": tool_call.function.arguments + } + } for tool_call in tool_calls + ] + } + + messages.append(assistant_message) + + # Execute each tool call and add the results to the conversation history + for tool_call in tool_calls: + function_name = tool_call.function.name + function_to_call = available_functions[function_name] + function_args = json.loads(tool_call.function.arguments) + + print(f"πŸ”§ Executing function '{function_name}'...") + function_response = function_to_call( + location=function_args.get("location"), + unit=function_args.get("unit"), + ) + + # Display tool response + print_message("tool", json.dumps(function_response, indent=2), function_name=function_name) + + messages.append({ + "tool_call_id": tool_call.id, + "role": "tool", + "name": function_name, + "content": json.dumps(function_response), + }) + + print("πŸ“‘ Sending second request to the model with results...") + + # Second request with function results + second_response = completion( + model=model, + messages=messages + ) + + # Display final response + final_content = second_response.choices[0].message.content + print_message("assistant", final_content) + +else: + print("❌ No function call detected") + print_message("assistant", response_message.content) +``` + +### Vision Example + +```python +from base64 import b64encode +from mimetypes import guess_type +import litellm + +# Auxiliary function to get b64 images +def data_url_from_image(file_path): + mime_type, _ = guess_type(file_path) + if mime_type is None: + raise ValueError("Could not determine MIME type of the file") + + with open(file_path, "rb") as image_file: + encoded_string = b64encode(image_file.read()).decode("utf-8") + + data_url = f"data:{mime_type};base64,{encoded_string}" + return data_url + +response = litellm.completion( + model = "ovhcloud/Mistral-Small-3.2-24B-Instruct-2506", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What's in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": data_url_from_image("your_image.jpg"), + "format": "image/jpeg" + } + } + ] + } + ], + stream=False +) + +print(response.choices[0].message.content) +``` + + +### Structured Output + +```python +from litellm import completion + +response = completion( + model="ovhcloud/Meta-Llama-3_3-70B-Instruct", + messages=[ + { + "role": "system", + "content": ( + "You are a specialist in extracting structured data from unstructured text. " + "Your task is to identify relevant entities and categories, then format them " + "according to the requested structure." + ), + }, + { + "role": "user", + "content": "Room 12 contains books, a desk, and a lamp." + }, + ], + response_format={ + "type": "json_schema", + "json_schema": { + "title": "data", + "name": "data_extraction", + "schema": { + "type": "object", + "properties": { + "section": {"type": "string"}, + "products": { + "type": "array", + "items": {"type": "string"} + } + }, + "required": ["section", "products"], + "additionalProperties": False + }, + "strict": False + } + }, + stream=False +) + +print(response.choices[0].message.content) +``` + +### Embeddings + +```python +from litellm import embedding + +response = embedding( + model="ovhcloud/BGE-M3", + input=["sample text to embed", "another sample text to embed"] +) + +print(response.data) +``` + +## Usage with LiteLLM Proxy Server + +Here's how to call a OVHCloud AI Endpoints model with the LiteLLM Proxy Server + +1. Modify the config.yaml + + ```yaml + model_list: + - model_name: my-model + litellm_params: + model: ovhcloud/ # add ovhcloud/ prefix to route as OVHCloud provider + api_key: api-key # api key to send your model + ``` + + +2. Start the proxy + + ```bash + $ litellm --config /path/to/config.yaml + ``` + +3. Send Request to LiteLLM Proxy Server + + + + + + ```python + import openai + client = openai.OpenAI( + api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys + base_url="http://0.0.0.0:4000" # litellm-proxy-base url + ) + + response = client.chat.completions.create( + model="my-model", + messages = [ + { + "role": "user", + "content": "what llm are you" + } + ], + ) + + print(response) + ``` + + + + + ```shell + curl --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Authorization: Bearer sk-1234' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "my-model", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ], + }' + ``` + + + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 72b3859643..220ca831cf 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -485,7 +485,8 @@ const sidebars = { "providers/bytez", "providers/heroku", "providers/oci", - "providers/datarobot", + "providers/datarobot", + "providers/ovhcloud", ], }, { diff --git a/litellm/__init__.py b/litellm/__init__.py index f6be2bc6f0..736cd2c2fe 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -241,6 +241,7 @@ gradient_ai_api_key: Optional[str] = None nebius_key: Optional[str] = None heroku_key: Optional[str] = None cometapi_key: Optional[str] = None +ovhcloud_key: Optional[str] = None common_cloud_provider_auth_params: dict = { "params": ["project", "region_name", "token"], "providers": ["vertex_ai", "bedrock", "watsonx", "azure", "vertex_ai_beta"], @@ -520,6 +521,8 @@ cometapi_models: Set = set() oci_models: Set = set() vercel_ai_gateway_models: Set = set() volcengine_models: Set = set() +ovhcloud_models: Set = set() +ovhcloud_embedding_models: Set = set() def is_bedrock_pricing_only_model(key: str) -> bool: @@ -734,6 +737,10 @@ def add_known_models(): oci_models.add(key) elif value.get("litellm_provider") == "volcengine": volcengine_models.add(key) + elif value.get("litellm_provider") == "ovhcloud": + ovhcloud_models.add(key) + elif value.get("litellm_provider") == "ovhcloud-embedding-models": + ovhcloud_embedding_models.add(key) add_known_models() @@ -828,6 +835,7 @@ model_list = list( | heroku_models | vercel_ai_gateway_models | volcengine_models + | ovhcloud_models ) model_list_set = set(model_list) @@ -909,6 +917,7 @@ models_by_provider: dict = { "cometapi": cometapi_models, "oci": oci_models, "volcengine": volcengine_models, + "ovhcloud": ovhcloud_models | ovhcloud_embedding_models, } # mapping for those models which have larger equivalents @@ -943,6 +952,7 @@ all_embedding_models = ( | fireworks_ai_embedding_models | nebius_embedding_models | sambanova_embedding_models + | ovhcloud_embedding_models ) ####### IMAGE GENERATION MODELS ################### @@ -1254,6 +1264,8 @@ from .llms.morph.chat.transformation import MorphChatConfig from .llms.lambda_ai.chat.transformation import LambdaAIChatConfig from .llms.hyperbolic.chat.transformation import HyperbolicChatConfig from .llms.vercel_ai_gateway.chat.transformation import VercelAIGatewayConfig +from .llms.ovhcloud.chat.transformation import OVHCloudChatConfig +from .llms.ovhcloud.embedding.transformation import OVHCloudEmbeddingConfig from .main import * # type: ignore from .integrations import * from .llms.custom_httpx.async_client_cleanup import close_litellm_async_clients diff --git a/litellm/constants.py b/litellm/constants.py index 75c25d9ea9..32dde5888d 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -311,6 +311,7 @@ LITELLM_CHAT_PROVIDERS = [ "morph", "lambda_ai", "vercel_ai_gateway", + "ovhcloud", ] LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS = [ @@ -1019,6 +1020,7 @@ SENTRY_DENYLIST = [ "FIREWORKS_API_KEY", "FIREWORKS_AI_API_KEY", "FIREWORKSAI_API_KEY", + "OVHCLOUD_API_KEY", # Database and Connection Strings "database_url", "redis_url", diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index d5009fb0ca..7cbb2afcb2 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -372,6 +372,8 @@ def get_llm_provider( # noqa: PLR0915 custom_llm_provider = "cometapi" elif model.startswith("oci/"): custom_llm_provider = "oci" + elif model.startswith("ovhcloud/"): + custom_llm_provider = "ovhcloud" if not custom_llm_provider: if litellm.suppress_debug_info is False: print() # noqa diff --git a/litellm/llms/ovhcloud/chat/transformation.py b/litellm/llms/ovhcloud/chat/transformation.py new file mode 100644 index 0000000000..6bdc28620f --- /dev/null +++ b/litellm/llms/ovhcloud/chat/transformation.py @@ -0,0 +1,141 @@ +""" +Support for OVHCloud AI Endpoints `/v1/chat/completions` endpoint. + +Our unified API follows the OpenAI standard. +More information on our website: https://endpoints.ai.cloud.ovh.net +""" +from typing import Optional, Union, List + +import httpx +from litellm import ModelResponseStream, OpenAIGPTConfig, get_model_info, verbose_logger +from litellm.llms.ovhcloud.utils import OVHCloudException +from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator +from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm.types.llms.openai import AllMessageValues + +class OVHCloudChatConfig(OpenAIGPTConfig): + @property + def custom_llm_provider(self) -> Optional[str]: + return "ovhcloud" + + def get_supported_openai_params(self, model: str) -> list: + """ + Details about function calling support can be found here: + https://help.ovhcloud.com/csm/en-gb-public-cloud-ai-endpoints-function-calling?id=kb_article_view&sysparm_article=KB0071907 + """ + supports_function_calling: Optional[bool] = None + try: + model_info = get_model_info(model, custom_llm_provider="ovhcloud") + supports_function_calling = model_info.get( + "supports_function_calling", False + ) + except Exception as e: + verbose_logger.debug(f"Error getting supported OpenAI params: {e}") + pass + + optional_params = super().get_supported_openai_params(model) + if supports_function_calling is not True: + verbose_logger.debug( + "You can see our models supporting function_calling in our catalog: https://endpoints.ai.cloud.ovh.net/catalog " + ) + optional_params.remove("tools") + optional_params.remove("tool_choice") + optional_params.remove("function_call") + optional_params.remove("response_format") + return optional_params + + def get_complete_url( + self, + api_base: Optional[str], + api_key: Optional[str], + model: str, + optional_params: dict, + litellm_params: dict, + stream: Optional[bool] = None, + ) -> str: + api_base = "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1" if api_base is None else api_base.rstrip("/") + complete_url = f"{api_base}/chat/completions" + return complete_url + + def get_error_class( + self, + error_message: str, + status_code: int, + headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + return OVHCloudException( + message=error_message, + status_code=status_code, + headers=headers, + ) + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ) -> dict: + mapped_openai_params = super().map_openai_params( + non_default_params, optional_params, model, drop_params + ) + return mapped_openai_params + + def transform_request( + self, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + headers: dict, + ) -> dict: + extra_body = optional_params.pop("extra_body", {}) + response = super().transform_request( + model, messages, optional_params, litellm_params, headers + ) + response.update(extra_body) + return response + +class OVHCloudChatCompletionStreamingHandler(BaseModelResponseIterator): + """ + Handler for OVHCloud AI Endpoints streaming chat completion responses + """ + + def chunk_parser(self, chunk: dict) -> ModelResponseStream: + """ + Parse individual chunks from streaming response + """ + try: + if "error" in chunk: + error_chunk = chunk["error"] + error_message = "OVHCloud Error: {}".format( + error_chunk.get("message", "Unknown error") + ) + raise OVHCloudException( + message=error_message, + status_code=error_chunk.get("code", 400), + headers={"Content-Type": "application/json"}, + ) + + new_choices = [] + for choice in chunk["choices"]: + if "delta" in choice and "reasoning" in choice["delta"]: + choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning") + new_choices.append(choice) + + return ModelResponseStream( + id=chunk["id"], + object="chat.completion.chunk", + created=chunk["created"], + usage=chunk.get("usage"), + model=chunk["model"], + choices=new_choices, + ) + except KeyError as e: + raise OVHCloudException( + message=f"KeyError: {e}, Got unexpected response from CometAPI: {chunk}", + status_code=400, + headers={"Content-Type": "application/json"}, + ) + except Exception as e: + raise e \ No newline at end of file diff --git a/litellm/llms/ovhcloud/embedding/transformation.py b/litellm/llms/ovhcloud/embedding/transformation.py new file mode 100644 index 0000000000..1266f74c0a --- /dev/null +++ b/litellm/llms/ovhcloud/embedding/transformation.py @@ -0,0 +1,122 @@ +""" +This is OpenAI compatible - no transformation is applied + +""" +from typing import List, Optional, Union + +import httpx + +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig +from litellm.secret_managers.main import get_secret_str +from litellm.types.llms.openai import AllEmbeddingInputValues, AllMessageValues +from litellm.types.utils import EmbeddingResponse, Usage + +from ..utils import OVHCloudException + + +class OVHCloudEmbeddingConfig(BaseEmbeddingConfig): + def __init__(self) -> None: + pass + + def get_complete_url( + self, + api_base: Optional[str], + api_key: Optional[str], + model: str, + optional_params: dict, + litellm_params: dict, + stream: Optional[bool] = None, + ) -> str: + api_base = "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1" if api_base is None else api_base.rstrip("/") + complete_url = f"{api_base}/embeddings" + return complete_url + + def validate_environment( + self, + headers: dict, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + ) -> dict: + if api_key is None: + api_key = get_secret_str("OVHCLOUD_API_KEY") + + default_headers = { + "Authorization": f"Bearer {api_key}", + "accept": "application/json", + "Content-Type": "application/json", + } + + if "Authorization" in headers: + default_headers["Authorization"] = headers["Authorization"] + + return {**default_headers, **headers} + + def get_supported_openai_params(self, model: str): + return [] + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ): + supported_openai_params = self.get_supported_openai_params(model) + for param, value in non_default_params.items(): + if param in supported_openai_params: + optional_params[param] = value + return optional_params + + def transform_embedding_request( + self, + model: str, + input: AllEmbeddingInputValues, + optional_params: dict, + headers: dict, + ) -> dict: + return {"input": input, "model": model, **optional_params} + + def transform_embedding_response( + self, + model: str, + raw_response: httpx.Response, + model_response: EmbeddingResponse, + logging_obj: LiteLLMLoggingObj, + api_key: Optional[str], + request_data: dict, + optional_params: dict, + litellm_params: dict, + ) -> EmbeddingResponse: + try: + raw_response_json = raw_response.json() + except Exception: + raise OVHCloudException( + message=raw_response.text, + status_code=raw_response.status_code, + headers=raw_response.headers, + ) + + model_response.model = raw_response_json.get("model") + model_response.data = raw_response_json.get("data") + model_response.object = raw_response_json.get("object") + + usage = Usage( + prompt_tokens=raw_response_json.get("usage", {}).get("prompt_tokens", 0), + total_tokens=raw_response_json.get("usage", {}).get("total_tokens", 0), + ) + + model_response.usage = usage + return model_response + + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + return OVHCloudException( + message=error_message, status_code=status_code, headers=headers + ) diff --git a/litellm/llms/ovhcloud/utils.py b/litellm/llms/ovhcloud/utils.py new file mode 100644 index 0000000000..9ae4dfb1ef --- /dev/null +++ b/litellm/llms/ovhcloud/utils.py @@ -0,0 +1,6 @@ +from litellm.llms.base_llm.chat.transformation import BaseLLMException + + +class OVHCloudException(BaseLLMException): + """OVHCloud AI Endpoints exception handling class""" + pass \ No newline at end of file diff --git a/litellm/main.py b/litellm/main.py index d7395eb145..71875f63b3 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -164,6 +164,7 @@ from .llms.openai.openai import OpenAIChatCompletion from .llms.openai.transcriptions.handler import OpenAIAudioTranscription from .llms.openai_like.chat.handler import OpenAILikeChatHandler from .llms.openai_like.embedding.handler import OpenAILikeEmbeddingHandler +from .llms.ovhcloud.chat.transformation import OVHCloudChatConfig from .llms.petals.completion import handler as petals_handler from .llms.predibase.chat.handler import PredibaseChatCompletion from .llms.replicate.chat.handler import completion as replicate_chat_completion @@ -259,6 +260,7 @@ sagemaker_chat_completion = SagemakerChatHandler() bytez_transformation = BytezChatConfig() heroku_transformation = HerokuChatConfig() oci_transformation = OCIChatConfig() +ovhcloud_transformation = OVHCloudChatConfig() ####### COMPLETION ENDPOINTS ################ @@ -3498,6 +3500,42 @@ def completion( # type: ignore # noqa: PLR0915 pass + elif custom_llm_provider == "ovhcloud" or model in litellm.ovhcloud_models: + api_key = ( + api_key + or litellm.ovhcloud_key + or get_secret_str("OVHCLOUD_API_KEY") + or litellm.api_key + ) + + api_base = ( + api_base + or litellm.api_base + or get_secret_str("OVHCLOUD_API_BASE") + or "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1" + ) + + response = base_llm_http_handler.completion( + model=model, + messages=messages, + headers=headers, + model_response=model_response, + api_key=api_key, + api_base=api_base, + acompletion=acompletion, + logging_obj=logging, + optional_params=optional_params, + litellm_params=litellm_params, + timeout=timeout, # type: ignore + client=client, + custom_llm_provider=custom_llm_provider, + encoding=encoding, + stream=stream, + provider_config=ovhcloud_transformation, + ) + + pass + elif custom_llm_provider == "custom": url = litellm.api_base or api_base or "" if url is None or url == "": @@ -4564,6 +4602,28 @@ def embedding( # noqa: PLR0915 aembedding=aembedding, headers=headers, ) + elif custom_llm_provider == "ovhcloud": + api_key = api_key or litellm.api_key or get_secret_str("OVHCLOUD_API_KEY") + api_base = ( + api_base + or litellm.api_base + or get_secret_str("OVHCLOUD_API_BASE") + or "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1" + ) + response = base_llm_http_handler.embedding( + model=model, + input=input, + custom_llm_provider=custom_llm_provider, + api_base=api_base, + api_key=api_key, + logging_obj=logging, + timeout=timeout, + model_response=EmbeddingResponse(), + optional_params=optional_params, + client=client, + aembedding=aembedding, + litellm_params={}, + ) elif custom_llm_provider in litellm._custom_providers: custom_handler: Optional[CustomLLM] = None for item in litellm.custom_provider_map: diff --git a/litellm/types/utils.py b/litellm/types/utils.py index e54516371e..19a16cb7a5 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -2357,6 +2357,7 @@ class LlmProviders(str, Enum): AUTO_ROUTER = "auto_router" VERCEL_AI_GATEWAY = "vercel_ai_gateway" DOTPROMPT = "dotprompt" + OVHCLOUD = "ovhcloud" # Create a set of all provider values for quick lookup diff --git a/litellm/utils.py b/litellm/utils.py index 0d2fe5d4d6..c8e382f309 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2899,6 +2899,19 @@ def get_optional_params_embeddings( # noqa: PLR0915 model=model, drop_params=drop_params if drop_params is not None else False, ) + elif custom_llm_provider == "ovhcloud": + supported_params = get_supported_openai_params( + model=model, + custom_llm_provider="ovhcloud", + request_type="embeddings", + ) + _check_valid_arg(supported_params=supported_params) + optional_params = litellm.OVHCloudEmbeddingConfig().map_openai_params( + non_default_params=non_default_params, + optional_params={}, + model=model, + drop_params=drop_params if drop_params is not None else False, + ) elif ( custom_llm_provider != "openai" @@ -7095,6 +7108,8 @@ class ProviderConfigManager: return litellm.OCIChatConfig() elif litellm.LlmProviders.HYPERBOLIC == provider: return litellm.HyperbolicChatConfig() + elif litellm.LlmProviders.OVHCLOUD == provider: + return litellm.OVHCloudChatConfig() return None @staticmethod @@ -7138,6 +7153,8 @@ class ProviderConfigManager: ) return VolcEngineEmbeddingConfig() + elif litellm.LlmProviders.OVHCLOUD == provider: + return litellm.OVHCloudEmbeddingConfig() return None @staticmethod diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 46331c7cff..261fe9552d 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -20777,5 +20777,207 @@ "metadata": { "notes": "Volcengine Doubao embedding model - text-240715 version with 2560 dimensions" } + }, + "ovhcloud/Qwen2.5-VL-72B-Instruct": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 9.1e-07, + "output_cost_per_token": 9.1e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-vl-72b-instruct" + }, + "ovhcloud/llava-v1.6-mistral-7b-hf": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 2.9e-07, + "output_cost_per_token": 2.9e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/llava-next-mistral-7b" + }, + "ovhcloud/gpt-oss-120b": { + "max_tokens": 131000, + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 4e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_reasoning": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-120b" + }, + "ovhcloud/Meta-Llama-3_3-70B-Instruct": { + "max_tokens": 131000, + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "input_cost_per_token": 6.7e-07, + "output_cost_per_token": 6.7e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-3-70b-instruct" + }, + "ovhcloud/Qwen2.5-Coder-32B-Instruct": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 8.7e-07, + "output_cost_per_token": 8.7e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-coder-32b-instruct" + }, + "ovhcloud/Mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 6.3e-07, + "output_cost_per_token": 6.3e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "source": "https://endpoints.ai.cloud.ovh.net/models/mixtral-8x7b-instruct-v0-1" + }, + "ovhcloud/Meta-Llama-3_1-70B-Instruct": { + "max_tokens": 131000, + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "input_cost_per_token": 6.7e-07, + "output_cost_per_token": 6.7e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": false, + "supports_response_schema": false, + "supports_tool_choice": false, + "source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-1-70b-instruct" + }, + "ovhcloud/Mistral-Small-3.2-24B-Instruct-2506": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 9e-08, + "output_cost_per_token": 2.8e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-small-3-2-24b-instruct-2506" + }, + "ovhcloud/DeepSeek-R1-Distill-Llama-70B": { + "max_tokens": 131000, + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "input_cost_per_token": 6.7e-07, + "output_cost_per_token": 6.7e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/deepseek-r1-distill-llama-70b" + }, + "ovhcloud/Llama-3.1-8B-Instruct": { + "max_tokens": 131000, + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/llama-3-1-8b-instruct" + }, + "ovhcloud/Mistral-7B-Instruct-v0.3": { + "max_tokens": 127000, + "max_input_tokens": 127000, + "max_output_tokens": 127000, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-7b-instruct-v0-3" + }, + "ovhcloud/gpt-oss-20b": { + "max_tokens": 131000, + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1.5e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_reasoning": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-20b" + }, + "ovhcloud/Mistral-Nemo-Instruct-2407": { + "max_tokens": 118000, + "max_input_tokens": 118000, + "max_output_tokens": 118000, + "input_cost_per_token": 1.3e-07, + "output_cost_per_token": 1.3e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-nemo-instruct-2407" + }, + "ovhcloud/Qwen3-32B": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 2.3e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "source": "https://endpoints.ai.cloud.ovh.net/models/qwen3-32b" + }, + "ovhcloud/mamba-codestral-7B-v0.1": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 1.9e-07, + "output_cost_per_token": 1.9e-07, + "litellm_provider": "ovhcloud", + "mode": "chat", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "source": "https://endpoints.ai.cloud.ovh.net/models/mamba-codestral-7b-v0-1" } } diff --git a/tests/test_litellm/llms/ovhcloud/test_ovhcloud_chat_transformation.py b/tests/test_litellm/llms/ovhcloud/test_ovhcloud_chat_transformation.py new file mode 100644 index 0000000000..d391c91cb8 --- /dev/null +++ b/tests/test_litellm/llms/ovhcloud/test_ovhcloud_chat_transformation.py @@ -0,0 +1,256 @@ +""" +Unit tests for OVHCloud AI Endpoints chat integration. +""" + +import os +import sys + +import pytest + +from litellm.llms.ovhcloud.utils import OVHCloudException + +sys.path.insert( + 0, os.path.abspath("../../../../..") +) # Adds the parent directory to the system path + +from litellm.llms.ovhcloud.chat.transformation import ( + OVHCloudChatCompletionStreamingHandler, + OVHCloudChatConfig, +) + +config = OVHCloudChatConfig() +model = "ovhcloud/Mistral-7B-Instruct-v0.3" + +class TestOvhCloudChatCompletionStreamingHandler: + def test_chunk_parser_successful(self): + handler = OVHCloudChatCompletionStreamingHandler( + streaming_response=None, sync_stream=True + ) + + chunk = { + "id": "test_id", + "created": 1234567890, + "model": "gpt-oss-20b", + "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}, + "choices": [ + {"delta": {"content": "test content", "reasoning": "test reasoning"}} + ], + } + + result = handler.chunk_parser(chunk) + + assert result.id == "test_id" + assert result.object == "chat.completion.chunk" + assert result.created == 1234567890 + assert result.model == "gpt-oss-20b" + assert result.usage.prompt_tokens == chunk["usage"]["prompt_tokens"] + assert result.usage.completion_tokens == chunk["usage"]["completion_tokens"] + assert result.usage.total_tokens == chunk["usage"]["total_tokens"] + assert len(result.choices) == 1 + assert result.choices[0]["delta"]["reasoning_content"] == "test reasoning" + + def test_chunk_parser_error_response(self): + handler = OVHCloudChatCompletionStreamingHandler( + streaming_response=None, sync_stream=True + ) + + error_chunk = { + "error": { + "message": "test error", + "code": 400, + } + } + + with pytest.raises(OVHCloudException) as exc_info: + handler.chunk_parser(error_chunk) + + assert "OVHCloud Error: test error" in str(exc_info.value) + assert exc_info.value.status_code == 400 + + def test_chunk_parser_key_error(self): + handler = OVHCloudChatCompletionStreamingHandler( + streaming_response=None, sync_stream=True + ) + + invalid_chunk = {"incomplete": "data"} + + with pytest.raises(OVHCloudException) as exc_info: + handler.chunk_parser(invalid_chunk) + + assert "KeyError" in str(exc_info.value) + assert exc_info.value.status_code == 400 + + +class TestOVHCloudConfig: + def test_transform_request_basic(self): + """Test basic request transformation""" + transformed_request = config.transform_request( + model, + messages=[ + {"role": "user", "content": "Hello, world!"} + ], + optional_params={}, + litellm_params={}, + headers={}, + ) + + assert transformed_request["model"] == model + assert transformed_request["messages"] == [ + {"role": "user", "content": "Hello, world!"} + ] + + def test_transform_request_with_extra_body(self): + """Test request transformation with extra_body parameters""" + transformed_request = config.transform_request( + model, + messages=[{"role": "user", "content": "Hello, world!"}], + optional_params={"extra_body": {"custom_param": "custom_value"}}, + litellm_params={}, + headers={}, + ) + + assert transformed_request["custom_param"] == "custom_value" + assert transformed_request["messages"] == [ + {"role": "user", "content": "Hello, world!"} + ] + + def test_map_openai_params(self): + """Test OpenAI parameter mapping""" + non_default_params = { + "temperature": 0.7, + "max_tokens": 100, + "top_p": 0.9, + } + + mapped_params = config.map_openai_params( + non_default_params=non_default_params, + optional_params={}, + model=model, + drop_params=False, + ) + + assert mapped_params["temperature"] == 0.7 + assert mapped_params["max_tokens"] == 100 + assert mapped_params["top_p"] == 0.9 + + def test_get_error_class(self): + """Test error class creation""" + error = config.get_error_class( + error_message="Test error", + status_code=400, + headers={"Content-Type": "application/json"} + ) + + assert isinstance(error, OVHCloudException) + assert error.message == "Test error" + assert error.status_code == 400 + + +def test_ovhcloud_integration(): + import os + from litellm import completion + + api_key = os.getenv("OVHCLOUD_API_KEY") + + if not api_key: + pytest.skip("OVHCLOUD_API_KEY not set, skipping test") + + response = completion( + model, + messages=[{"role": "user", "content": "Say hello in one word"}], + api_key=api_key, + max_tokens=10, + temperature=0.7 + ) + + assert response.choices[0].message.content + assert len(response.choices[0].message.content.strip()) > 0 + assert response.model + assert response.usage + assert response.usage.total_tokens > 0 + +def test_OVHCloud_streaming_integration(): + """ + Integration test for streaming - requires real API key + Run with: pytest -k test_OVHCloud_streaming_integration -s + """ + import os + from litellm import completion + + api_key = os.getenv("OVHCLOUD_API_KEY") + + if not api_key: + pytest.skip("OVHCLOUD_API_KEY not set, skipping test") + + try: + print(f"πŸ” Testing streaming with API key: {api_key[:6]}...{api_key[-4:]} (length: {len(api_key)})") + print(f"πŸ” API base URL: {os.getenv('OVHCLOUD_API_BASE')}") + + response = completion( + model, + messages=[{"role": "user", "content": "Count from 1 to 5"}], + api_key=api_key, + max_tokens=50, + stream=True + ) + + chunks = [] + content_parts = [] + + for chunk in response: + chunks.append(chunk) + if chunk.choices[0].delta.content: + content_parts.append(chunk.choices[0].delta.content) + + assert len(chunks) > 0, "Should receive at least one chunk" + assert len(content_parts) > 0, "Should receive content in chunks" + + full_content = "".join(content_parts) + assert len(full_content.strip()) > 0, "Should have non-empty content" + + print(f"βœ… Received {len(chunks)} chunks") + print(f"βœ… Full content: {full_content}") + + except Exception as e: + print(f"❌ Streaming integration test error details:") + print(f" Error type: {type(e).__name__}") + print(f" Error message: {str(e)}") + if hasattr(e, 'status_code'): + print(f" Status code: {e.status_code}") + if hasattr(e, 'response'): + print(f" Response: {e.response}") + + pytest.fail(f"Streaming integration test failed: {type(e).__name__}: {str(e)}") + +def test_ovhcloud_with_custom_base_url(): + """ + Test OVHCloud with custom base URL + """ + import os + from litellm import completion + + api_key = os.getenv("OVHCLOUD_API_KEY") + + if not api_key: + pytest.skip("OVHCLOUD_API_KEY not set, skipping test") + + custom_base_url = os.getenv("OVHCLOUD_API_BASE", "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1") + + try: + response = completion( + model, + messages=[{"role": "user", "content": "Hello"}], + api_key=api_key, + api_base=custom_base_url, + max_tokens=5 + ) + + assert response.choices[0].message.content + print(f"βœ… Custom base URL test passed: {response.choices[0].message.content}") + + except Exception as e: + pytest.fail(f"Custom base URL test failed: {str(e)}") + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/tests/test_litellm/llms/ovhcloud/test_ovhcloud_embeddings_transformation.py b/tests/test_litellm/llms/ovhcloud/test_ovhcloud_embeddings_transformation.py new file mode 100644 index 0000000000..b7e899f038 --- /dev/null +++ b/tests/test_litellm/llms/ovhcloud/test_ovhcloud_embeddings_transformation.py @@ -0,0 +1,37 @@ +from unittest.mock import patch + +import litellm + +model="ovhcloud/BGE-M3" + +def mock_embedding_response(*args, **kwargs): + class MockResponse: + def __init__(self): + self.data = [{"embedding": [0.1, 0.2, 0.3]}] + self.usage = litellm.Usage() + self.model = kwargs.get("model", model) + self.object = "embedding" + + def __getitem__(self, key): + return getattr(self, key) + + return MockResponse() + + +def test_ovhcloud_embeddings(): + with patch("litellm.embedding", side_effect=mock_embedding_response) as mock_embed: + response = litellm.embedding( + model, + input=["good morning from litellm"], + ) + + mock_embed.assert_called_once_with( + model, + input=["good morning from litellm"], + ) + + assert isinstance(response.data, list) + assert "embedding" in response.data[0] + assert isinstance(response.data[0]["embedding"], list) + assert response.model == model + assert response.object == "embedding"