From 8fada93fffa6398b796c3d7c8a514dacddcce65f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 23 Aug 2024 17:57:49 -0700 Subject: [PATCH] docs on using vertex tts --- docs/my-website/docs/providers/vertex.md | 27 +++++++------------ litellm/llms/text_to_speech/vertex_ai.py | 6 +++-- litellm/main.py | 17 ++++++++---- .../proxy/tests/test_openai_tts_request.py | 11 ++++++++ 4 files changed, 36 insertions(+), 25 deletions(-) create mode 100644 litellm/proxy/tests/test_openai_tts_request.py diff --git a/docs/my-website/docs/providers/vertex.md b/docs/my-website/docs/providers/vertex.md index d87e8e814e..967f45b843 100644 --- a/docs/my-website/docs/providers/vertex.md +++ b/docs/my-website/docs/providers/vertex.md @@ -1812,9 +1812,9 @@ response.stream_to_file(speech_file_path) 1. Add model to config.yaml ```yaml model_list: - - model_name: multimodalembedding@001 + - model_name: vertex-tts litellm_params: - model: vertex_ai/multimodalembedding@001 + model: vertex_ai/ # Vertex AI does not support passing a `model` param - so passing `model=vertex_ai/` is the only required param vertex_project: "adroit-crow-413218" vertex_location: "us-central1" vertex_credentials: adroit-crow-413218-a956eef1a2a8.json @@ -1837,23 +1837,14 @@ import openai client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000") -# # request sent to model set on litellm proxy, `litellm --model` -response = client.embeddings.create( - model="multimodalembedding@001", - input = None, - extra_body = { - "instances": [ - { - "image": { - "bytesBase64Encoded": "base64" - }, - "text": "this is a unicorn", - }, - ], - } +# see supported values for "voice" on vertex here: +# https://console.cloud.google.com/vertex-ai/generative/speech/text-to-speech +response = client.audio.speech.create( + model = "vertex-tts", + input="the quick brown fox jumped over the lazy dogs", + voice={'languageCode': 'en-US', 'name': 'en-US-Studio-O'} ) - -print(response) +print("response from proxy", response) ``` diff --git a/litellm/llms/text_to_speech/vertex_ai.py b/litellm/llms/text_to_speech/vertex_ai.py index 139f3bbc2b..03120a8698 100644 --- a/litellm/llms/text_to_speech/vertex_ai.py +++ b/litellm/llms/text_to_speech/vertex_ai.py @@ -54,7 +54,7 @@ class VertexTextToSpeechAPI(VertexLLM): timeout: Union[float, httpx.Timeout], model: str, input: str, - voice: Optional[str] = None, + voice: Optional[dict] = None, _is_async: Optional[bool] = False, optional_params: Optional[dict] = None, **kwargs, @@ -87,7 +87,9 @@ class VertexTextToSpeechAPI(VertexLLM): vertex_input = VertexInput(text=input) # required param optional_params = optional_params or {} - if "voice" in optional_params: + if voice is not None: + vertex_voice = VertexVoice(**voice) + elif "voice" in optional_params: vertex_voice = VertexVoice(**optional_params["voice"]) else: # use defaults to not fail the request diff --git a/litellm/main.py b/litellm/main.py index f9ad007d6c..9c8f0a2d35 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -4699,7 +4699,7 @@ async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent: def speech( model: str, input: str, - voice: Optional[str] = None, + voice: Optional[Union[str, dict]] = None, api_key: Optional[str] = None, api_base: Optional[str] = None, api_version: Optional[str] = None, @@ -4735,9 +4735,9 @@ def speech( logging_obj = kwargs.get("litellm_logging_obj", None) response: Optional[HttpxBinaryResponseContent] = None if custom_llm_provider == "openai": - if voice is None: + if voice is None or not (isinstance(voice, str)): raise litellm.BadRequestError( - message="'voice' is required for OpenAI TTS", + message="'voice' is required to be passed as a string for OpenAI TTS", model=model, llm_provider=custom_llm_provider, ) @@ -4787,9 +4787,9 @@ def speech( ) elif custom_llm_provider == "azure": # azure configs - if voice is None: + if voice is None or not (isinstance(voice, str)): raise litellm.BadRequestError( - message="'voice' is required for Azure TTS", + message="'voice' is required to be passed as a string for Azure TTS", model=model, llm_provider=custom_llm_provider, ) @@ -4849,6 +4849,13 @@ def speech( vertex_credentials = generic_optional_params.vertex_credentials or get_secret( "VERTEXAI_CREDENTIALS" ) + + if voice is not None and not isinstance(voice, dict): + raise litellm.BadRequestError( + message=f"'voice' is required to be passed as a dict for Vertex AI TTS, passed in voice={voice}", + model=model, + llm_provider=custom_llm_provider, + ) response = vertex_text_to_speech.audio_speech( _is_async=aspeech, vertex_credentials=vertex_credentials, diff --git a/litellm/proxy/tests/test_openai_tts_request.py b/litellm/proxy/tests/test_openai_tts_request.py new file mode 100644 index 0000000000..91848947ae --- /dev/null +++ b/litellm/proxy/tests/test_openai_tts_request.py @@ -0,0 +1,11 @@ +import openai + +client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000") + +# # request sent to model set on litellm proxy, `litellm --model` +response = client.audio.speech.create( + model="vertex-tts", + input="the quick brown fox jumped over the lazy dogs", + voice={"languageCode": "en-US", "name": "en-US-Studio-O"}, # type: ignore +) +print("response from proxy", response) # noqa