mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 22:48:35 +00:00
feat(audio_transcription): add NVIDIA Riva STT provider (#27185)
* feat(audio_transcription): add NVIDIA Riva STT provider Adds nvidia_riva as a new audio transcription provider, supporting both NVCF-hosted and self-hosted Riva ASR deployments via gRPC streaming. - Auto-resamples input audio to 16 kHz mono LINEAR_PCM (soundfile + numpy, audioread fallback) so callers can send any common format. - Maps OpenAI params: language (en -> en-US), response_format (text/json/ verbose_json), timestamp_granularities=["word"] -> enable_word_time_offsets, word offsets converted ms -> s for verbose_json. - Auth: NVCF when nvcf_function_id is set (SSL on by default), self-hosted otherwise (SSL off by default), with explicit use_ssl override. - gRPC errors wrapped via NvidiaRivaException -> litellm exception classes. - Optional deps gated behind [stt-nvidia-riva] extra (nvidia-riva-client, soundfile, audioread, numpy). Co-authored-by: Cursor <cursoragent@cursor.com> * fix(nvidia_riva): address PR review feedback - handler: forward call-level `timeout` to streaming_response_generator (kwarg-detected via inspect for older riva-client compat) so a stalled Riva server cannot block the caller indefinitely. - audio_utils: spill bytes to a tempfile before audioread.audio_open; most audioread backends (FFmpeg, GStreamer) require a real filesystem path and previously raised TypeError on BytesIO, breaking the mp3/m4a fallback path. - audio_utils: prefer soxr / scipy.signal.resample_poly for resampling (anti-aliased polyphase) when installed, falling back to linear only as a last resort. Avoids aliasing on 44.1/48 kHz -> 16 kHz downsamples. - transformation: bare `es` now maps to es-ES (Castilian) instead of es-US, matching BCP-47 conventions. Co-authored-by: Cursor <cursoragent@cursor.com> * chore: trigger CI re-run [stabilize loop 1/3] * Update litellm/llms/nvidia_riva/audio_transcription/transformation.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * chore: trigger CI re-run [stabilize loop 1/3] * fix code qa * fix lint * fix mypy * fix mypy * Fix NVIDIA Riva ASR service lookup * Fix NVIDIA Riva transcription payload logging --------- Co-authored-by: Cursor <cursoragent@cursor.com> Co-authored-by: oss-pr-review-agent-shin[bot] <281797381+oss-pr-review-agent-shin[bot]@users.noreply.github.com> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> Co-authored-by: mateo-berri <277851410+mateo-berri@users.noreply.github.com>
This commit is contained in:
@@ -586,6 +586,7 @@ anyscale_models: Set = set()
|
||||
cerebras_models: Set = set()
|
||||
galadriel_models: Set = set()
|
||||
nvidia_nim_models: Set = set()
|
||||
nvidia_riva_models: Set = set()
|
||||
sambanova_models: Set = set()
|
||||
sambanova_embedding_models: Set = set()
|
||||
novita_models: Set = set()
|
||||
@@ -812,6 +813,8 @@ def add_known_models(model_cost_map: Optional[Dict] = None):
|
||||
galadriel_models.add(key)
|
||||
elif value.get("litellm_provider") == "nvidia_nim":
|
||||
nvidia_nim_models.add(key)
|
||||
elif value.get("litellm_provider") == "nvidia_riva":
|
||||
nvidia_riva_models.add(key)
|
||||
elif value.get("litellm_provider") == "sambanova":
|
||||
sambanova_models.add(key)
|
||||
elif value.get("litellm_provider") == "sambanova-embedding-models":
|
||||
@@ -971,6 +974,7 @@ model_list = list(
|
||||
| cerebras_models
|
||||
| galadriel_models
|
||||
| nvidia_nim_models
|
||||
| nvidia_riva_models
|
||||
| sambanova_models
|
||||
| azure_text_models
|
||||
| novita_models
|
||||
@@ -1067,6 +1071,7 @@ models_by_provider: dict = {
|
||||
"cerebras": cerebras_models,
|
||||
"galadriel": galadriel_models,
|
||||
"nvidia_nim": nvidia_nim_models,
|
||||
"nvidia_riva": nvidia_riva_models,
|
||||
"sambanova": sambanova_models | sambanova_embedding_models,
|
||||
"novita": novita_models,
|
||||
"nebius": nebius_models | nebius_embedding_models,
|
||||
@@ -1618,6 +1623,9 @@ if TYPE_CHECKING:
|
||||
from .llms.deepgram.audio_transcription.transformation import (
|
||||
DeepgramAudioTranscriptionConfig as DeepgramAudioTranscriptionConfig,
|
||||
)
|
||||
from .llms.nvidia_riva.audio_transcription.transformation import (
|
||||
NvidiaRivaAudioTranscriptionConfig as NvidiaRivaAudioTranscriptionConfig,
|
||||
)
|
||||
from .llms.topaz.image_variations.transformation import (
|
||||
TopazImageVariationConfig as TopazImageVariationConfig,
|
||||
)
|
||||
|
||||
@@ -621,6 +621,18 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915
|
||||
or "https://integrate.api.nvidia.com/v1"
|
||||
) # type: ignore
|
||||
dynamic_api_key = api_key or get_secret_str("NVIDIA_NIM_API_KEY")
|
||||
elif custom_llm_provider == "nvidia_riva":
|
||||
# NVIDIA Riva is gRPC-based; api_base must be a host:port like
|
||||
# `grpc.nvcf.nvidia.com:443` or `localhost:50051`. There is no
|
||||
# public-default endpoint, so we do not fill one in here.
|
||||
api_base = api_base or get_secret_str("NVIDIA_RIVA_API_BASE") # type: ignore
|
||||
# Fall back to NVIDIA_NIM_API_KEY because users running both NVCF
|
||||
# services typically reuse the same nvapi-* key.
|
||||
dynamic_api_key = (
|
||||
api_key
|
||||
or get_secret_str("NVIDIA_RIVA_API_KEY")
|
||||
or get_secret_str("NVIDIA_NIM_API_KEY")
|
||||
)
|
||||
elif custom_llm_provider == "cerebras":
|
||||
api_base = (
|
||||
api_base or get_secret("CEREBRAS_API_BASE") or "https://api.cerebras.ai/v1"
|
||||
|
||||
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
Audio resampling utilities for the NVIDIA Riva STT provider.
|
||||
|
||||
We intentionally avoid a hard dependency on ``ffmpeg`` so this works in
|
||||
slim Python environments. Format coverage:
|
||||
|
||||
- ``soundfile`` handles wav / flac / ogg out of the box (libsndfile).
|
||||
- ``audioread`` is tried for everything ``soundfile`` cannot decode (mp3,
|
||||
m4a, mp4, webm, ...). This is a soft optional dependency.
|
||||
|
||||
If neither library can decode the input we raise a clear error instructing
|
||||
the caller to convert the audio upstream.
|
||||
"""
|
||||
|
||||
import io
|
||||
import os
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Tuple, cast
|
||||
|
||||
from litellm.llms.nvidia_riva.audio_transcription.transformation import (
|
||||
RIVA_TARGET_NUM_CHANNELS,
|
||||
RIVA_TARGET_SAMPLE_RATE_HZ,
|
||||
)
|
||||
from litellm.llms.nvidia_riva.common_utils import NvidiaRivaException
|
||||
|
||||
# Keep this as Any: the module intentionally avoids importing numpy at module
|
||||
# import time (optional dependency), and project-wide mypy config evaluates this
|
||||
# file in contexts where conditional type aliases can degrade to "FloatArray?".
|
||||
FloatArray = Any
|
||||
|
||||
|
||||
_INSTALL_HINT = (
|
||||
"Install Riva STT extras to enable automatic audio resampling: "
|
||||
"`pip install 'litellm[stt-nvidia-riva]'`"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResampledAudio:
|
||||
pcm_bytes: bytes
|
||||
duration_seconds: float
|
||||
sample_rate_hz: int
|
||||
num_channels: int
|
||||
|
||||
|
||||
def resample_to_riva_pcm(file_bytes: bytes) -> ResampledAudio:
|
||||
"""
|
||||
Decode ``file_bytes`` and produce 16 kHz mono LINEAR_PCM (int16 little
|
||||
endian) suitable for streaming to Riva, plus the audio duration in
|
||||
seconds (used for cost calculation when Riva does not return usage).
|
||||
"""
|
||||
try:
|
||||
import numpy as np # type: ignore
|
||||
except ImportError as e:
|
||||
raise NvidiaRivaException(
|
||||
status_code=500,
|
||||
message=f"numpy is required for Riva audio resampling. {_INSTALL_HINT}",
|
||||
) from e
|
||||
|
||||
samples_float, source_rate = _decode_to_float32(file_bytes)
|
||||
|
||||
# Downmix to mono by averaging channels.
|
||||
if samples_float.ndim == 2 and samples_float.shape[1] > 1:
|
||||
samples_float = samples_float.mean(axis=1)
|
||||
elif samples_float.ndim == 2:
|
||||
samples_float = samples_float[:, 0]
|
||||
|
||||
samples_float = np.asarray(samples_float, dtype=np.float32).ravel()
|
||||
|
||||
if source_rate != RIVA_TARGET_SAMPLE_RATE_HZ:
|
||||
samples_float = _resample(
|
||||
samples_float, source_rate, RIVA_TARGET_SAMPLE_RATE_HZ
|
||||
)
|
||||
|
||||
# Clip + convert float [-1, 1] to int16 little-endian PCM.
|
||||
np.clip(samples_float, -1.0, 1.0, out=samples_float)
|
||||
pcm_int16 = (samples_float * 32767.0).astype("<i2")
|
||||
pcm_bytes = pcm_int16.tobytes()
|
||||
|
||||
duration_seconds = float(pcm_int16.size) / float(RIVA_TARGET_SAMPLE_RATE_HZ)
|
||||
|
||||
return ResampledAudio(
|
||||
pcm_bytes=pcm_bytes,
|
||||
duration_seconds=duration_seconds,
|
||||
sample_rate_hz=RIVA_TARGET_SAMPLE_RATE_HZ,
|
||||
num_channels=RIVA_TARGET_NUM_CHANNELS,
|
||||
)
|
||||
|
||||
|
||||
def _decode_to_float32(file_bytes: bytes) -> Tuple["FloatArray", int]:
|
||||
"""
|
||||
Decode arbitrary audio bytes into a float32 array shaped either
|
||||
``(n_samples,)`` (mono) or ``(n_samples, n_channels)`` plus the source
|
||||
sample rate.
|
||||
|
||||
Tries ``soundfile`` first (wav/flac/ogg), then falls back to
|
||||
``audioread`` for compressed formats. Raises a clear error if neither
|
||||
works.
|
||||
"""
|
||||
import numpy as np # type: ignore
|
||||
|
||||
sf_error: Exception | None = None
|
||||
try:
|
||||
import soundfile as sf # type: ignore
|
||||
|
||||
with io.BytesIO(file_bytes) as buf:
|
||||
data, source_rate = sf.read(buf, dtype="float32", always_2d=False)
|
||||
return cast("FloatArray", data), int(source_rate)
|
||||
except ImportError as e:
|
||||
sf_error = e
|
||||
except Exception as e:
|
||||
# soundfile raises RuntimeError / LibsndfileError for formats it
|
||||
# cannot decode (mp3 on older libsndfile, m4a, webm, ...).
|
||||
sf_error = e
|
||||
|
||||
try:
|
||||
import audioread # type: ignore
|
||||
except ImportError as e:
|
||||
raise NvidiaRivaException(
|
||||
status_code=400,
|
||||
message=(
|
||||
"Could not decode audio for Riva STT. Install audio extras "
|
||||
f"(`pip install 'litellm[stt-nvidia-riva]'`) or convert your "
|
||||
f"audio to wav/flac/ogg before calling the API. "
|
||||
f"Underlying error: {sf_error}"
|
||||
),
|
||||
) from e
|
||||
|
||||
# audioread backends (FFmpeg subprocess, GStreamer, Core Audio) require a
|
||||
# filesystem path, so spill the bytes to a temp file. mkstemp is portable
|
||||
# to Windows where re-opening a NamedTemporaryFile is not allowed.
|
||||
fd, tmp_path = tempfile.mkstemp(suffix=".audio")
|
||||
try:
|
||||
with os.fdopen(fd, "wb") as tmp_file:
|
||||
tmp_file.write(file_bytes)
|
||||
try:
|
||||
with audioread.audio_open(tmp_path) as src:
|
||||
source_rate = int(src.samplerate)
|
||||
channels = int(src.channels)
|
||||
chunks = []
|
||||
for buf in src:
|
||||
chunks.append(np.frombuffer(buf, dtype=np.int16))
|
||||
if not chunks:
|
||||
raise NvidiaRivaException(
|
||||
status_code=400,
|
||||
message="Audio decode produced no samples.",
|
||||
)
|
||||
interleaved = np.concatenate(chunks).astype(np.float32) / 32768.0
|
||||
if channels > 1:
|
||||
interleaved = interleaved.reshape(-1, channels)
|
||||
return cast("FloatArray", interleaved), source_rate
|
||||
except NvidiaRivaException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise NvidiaRivaException(
|
||||
status_code=400,
|
||||
message=(
|
||||
"Could not decode audio for Riva STT. Convert your audio to "
|
||||
f"wav/flac/ogg before calling the API. Underlying error: {e}"
|
||||
),
|
||||
) from e
|
||||
finally:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _resample(
|
||||
samples: "FloatArray", source_rate: int, target_rate: int
|
||||
) -> "FloatArray":
|
||||
"""
|
||||
Resample mono float32 ``samples`` from ``source_rate`` to ``target_rate``.
|
||||
|
||||
Prefers high-quality polyphase resampling when ``soxr`` or ``scipy`` is
|
||||
available (anti-aliased, important for downsampling 44.1/48 kHz -> 16 kHz
|
||||
where naive interpolation folds high frequencies back into the speech
|
||||
band). Falls back to linear interpolation if neither is installed —
|
||||
acceptable for speech-only mono input but lossy for wideband content.
|
||||
"""
|
||||
import numpy as np # type: ignore
|
||||
|
||||
if source_rate == target_rate or samples.size == 0:
|
||||
return samples
|
||||
|
||||
try:
|
||||
import soxr # type: ignore
|
||||
|
||||
return cast(
|
||||
"FloatArray",
|
||||
np.asarray(
|
||||
soxr.resample(samples, source_rate, target_rate), dtype=np.float32
|
||||
),
|
||||
)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from math import gcd
|
||||
|
||||
from scipy.signal import resample_poly # type: ignore
|
||||
|
||||
g = gcd(int(source_rate), int(target_rate))
|
||||
up = int(target_rate) // g
|
||||
down = int(source_rate) // g
|
||||
return cast(
|
||||
"FloatArray", np.asarray(resample_poly(samples, up, down), dtype=np.float32)
|
||||
)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
return _linear_resample(samples, source_rate, target_rate)
|
||||
|
||||
|
||||
def _linear_resample(
|
||||
samples: "FloatArray", source_rate: int, target_rate: int
|
||||
) -> "FloatArray":
|
||||
"""Linear-interpolation fallback. See :func:`_resample` for caveats."""
|
||||
import numpy as np # type: ignore
|
||||
|
||||
duration = samples.size / float(source_rate)
|
||||
target_length = int(round(duration * target_rate))
|
||||
if target_length <= 1:
|
||||
return samples.astype(np.float32)
|
||||
|
||||
src_indices = np.linspace(0, samples.size - 1, num=target_length, dtype=np.float64)
|
||||
left = np.floor(src_indices).astype(np.int64)
|
||||
right = np.minimum(left + 1, samples.size - 1)
|
||||
frac = (src_indices - left).astype(np.float32)
|
||||
|
||||
return ((1.0 - frac) * samples[left] + frac * samples[right]).astype(np.float32)
|
||||
@@ -0,0 +1,444 @@
|
||||
"""
|
||||
NVIDIA Riva STT handler.
|
||||
|
||||
This module bridges litellm's transcription dispatch to NVIDIA Riva's gRPC
|
||||
streaming ASR API. We do *not* go through ``base_llm_http_handler`` because
|
||||
Riva is gRPC-only: HTTP-shaped abstractions (``httpx.Response``,
|
||||
``api_base/v1/...`` URLs, multipart bodies) do not apply.
|
||||
|
||||
The handler is intentionally a thin orchestration layer:
|
||||
|
||||
1. Resample the inbound audio to 16 kHz mono LINEAR_PCM (Riva's required
|
||||
wire format).
|
||||
2. Build ``RecognitionConfig`` / ``StreamingRecognitionConfig`` protobufs
|
||||
from the structured dict produced by
|
||||
:class:`NvidiaRivaAudioTranscriptionConfig`.
|
||||
3. Construct ``riva.client.Auth`` honoring NVCF (function-id metadata + TLS)
|
||||
vs self-hosted (any host:port, optional TLS) modes.
|
||||
4. Stream the audio through Riva's ``streaming_response_generator`` and
|
||||
aggregate ``is_final`` results into a single transcript.
|
||||
5. Return a normalized ``TranscriptionResponse`` with ``duration`` exposed
|
||||
on ``_hidden_params`` so cost calculation works.
|
||||
|
||||
``riva-client`` is imported lazily so ``litellm`` core remains usable
|
||||
without the optional STT extras installed.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
||||
|
||||
from litellm.litellm_core_utils.audio_utils.utils import (
|
||||
get_audio_file_name,
|
||||
process_audio_file,
|
||||
)
|
||||
from litellm.llms.nvidia_riva.audio_transcription.audio_utils import (
|
||||
resample_to_riva_pcm,
|
||||
)
|
||||
from litellm.llms.nvidia_riva.audio_transcription.transformation import (
|
||||
NvidiaRivaAudioTranscriptionConfig,
|
||||
RIVA_TARGET_NUM_CHANNELS,
|
||||
RIVA_TARGET_SAMPLE_RATE_HZ,
|
||||
)
|
||||
from litellm.llms.nvidia_riva.common_utils import (
|
||||
NvidiaRivaException,
|
||||
grpc_error_to_litellm_exception,
|
||||
)
|
||||
from litellm.types.utils import FileTypes, TranscriptionResponse
|
||||
from litellm.utils import convert_to_model_response_object
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
Logging as LiteLLMLoggingObj,
|
||||
)
|
||||
|
||||
# Stream audio to Riva in ~50 ms slices (1600 samples at 16 kHz). Matches
|
||||
# NVIDIA's recommended chunk size for streaming ASR — small enough for
|
||||
# responsive endpointing, large enough to keep per-RPC overhead low.
|
||||
_DEFAULT_CHUNK_SAMPLES = 1600
|
||||
_DEFAULT_CHUNK_BYTES = _DEFAULT_CHUNK_SAMPLES * 2 # int16 = 2 bytes/sample
|
||||
|
||||
|
||||
_RIVA_INSTALL_HINT = (
|
||||
"NVIDIA Riva client is not installed. "
|
||||
"Install with `pip install 'litellm[stt-nvidia-riva]'`."
|
||||
)
|
||||
|
||||
|
||||
class NvidiaRivaAudioTranscription:
|
||||
"""Sync + async entry point for Riva ASR."""
|
||||
|
||||
def audio_transcriptions(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
logging_obj: "LiteLLMLoggingObj",
|
||||
api_key: Optional[str],
|
||||
api_base: Optional[str],
|
||||
atranscription: bool = False,
|
||||
provider_config: Optional[NvidiaRivaAudioTranscriptionConfig] = None,
|
||||
):
|
||||
if provider_config is None:
|
||||
provider_config = NvidiaRivaAudioTranscriptionConfig()
|
||||
|
||||
if atranscription:
|
||||
return self.async_audio_transcriptions(
|
||||
model=model,
|
||||
audio_file=audio_file,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
model_response=model_response,
|
||||
timeout=timeout,
|
||||
logging_obj=logging_obj,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
provider_config=provider_config,
|
||||
)
|
||||
|
||||
return self._run_sync(
|
||||
model=model,
|
||||
audio_file=audio_file,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
model_response=model_response,
|
||||
timeout=timeout,
|
||||
logging_obj=logging_obj,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
provider_config=provider_config,
|
||||
atranscription=atranscription,
|
||||
)
|
||||
|
||||
async def async_audio_transcriptions(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
logging_obj: "LiteLLMLoggingObj",
|
||||
api_key: Optional[str],
|
||||
api_base: Optional[str],
|
||||
provider_config: Optional[NvidiaRivaAudioTranscriptionConfig] = None,
|
||||
) -> TranscriptionResponse:
|
||||
# ``riva-client`` exposes a sync streaming generator, so we offload
|
||||
# the blocking call to a worker thread to keep the event loop free.
|
||||
return await asyncio.to_thread(
|
||||
self._run_sync,
|
||||
model=model,
|
||||
audio_file=audio_file,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
model_response=model_response,
|
||||
timeout=timeout,
|
||||
logging_obj=logging_obj,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
provider_config=provider_config or NvidiaRivaAudioTranscriptionConfig(),
|
||||
atranscription=True,
|
||||
)
|
||||
|
||||
def _run_sync(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
model_response: TranscriptionResponse,
|
||||
timeout: float,
|
||||
logging_obj: "LiteLLMLoggingObj",
|
||||
api_key: Optional[str],
|
||||
api_base: Optional[str],
|
||||
provider_config: NvidiaRivaAudioTranscriptionConfig,
|
||||
atranscription: bool = False,
|
||||
) -> TranscriptionResponse:
|
||||
if not api_base:
|
||||
raise NvidiaRivaException(
|
||||
status_code=400,
|
||||
message=(
|
||||
"NVIDIA Riva requires `api_base` (host:port for the gRPC "
|
||||
"endpoint, e.g. `grpc.nvcf.nvidia.com:443` or "
|
||||
"`localhost:50051`). Set it in litellm_params or via "
|
||||
"NVIDIA_RIVA_API_BASE."
|
||||
),
|
||||
)
|
||||
|
||||
processed = process_audio_file(audio_file)
|
||||
resampled = resample_to_riva_pcm(processed.file_content)
|
||||
|
||||
request_payload = provider_config.transform_audio_transcription_request(
|
||||
model=model,
|
||||
audio_file=audio_file,
|
||||
optional_params=optional_params,
|
||||
litellm_params={
|
||||
**litellm_params,
|
||||
"api_base": api_base,
|
||||
"api_key": api_key,
|
||||
},
|
||||
).data
|
||||
if not isinstance(request_payload, dict):
|
||||
raise NvidiaRivaException(
|
||||
status_code=500,
|
||||
message="NvidiaRivaAudioTranscriptionConfig produced an unexpected request payload type.",
|
||||
)
|
||||
|
||||
recognition_config_dict: Dict[str, Any] = request_payload["recognition_config"]
|
||||
# The wire format is fixed by our resampler; override anything stale
|
||||
# the caller passed in so the gRPC config matches the bytes we send.
|
||||
recognition_config_dict["sample_rate_hertz"] = RIVA_TARGET_SAMPLE_RATE_HZ
|
||||
recognition_config_dict["audio_channel_count"] = RIVA_TARGET_NUM_CHANNELS
|
||||
recognition_config_dict["encoding"] = "LINEAR_PCM"
|
||||
|
||||
response_format = request_payload.get("response_format") or "json"
|
||||
timestamp_granularities = request_payload.get("timestamp_granularities")
|
||||
|
||||
riva_module, riva_asr_module = _import_riva()
|
||||
auth_obj = self._construct_auth(
|
||||
riva_module=riva_module,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
|
||||
recognition_config = self._build_recognition_config_proto(
|
||||
riva_asr_module=riva_asr_module,
|
||||
recognition_config_dict=recognition_config_dict,
|
||||
)
|
||||
streaming_config = riva_asr_module.StreamingRecognitionConfig(
|
||||
config=recognition_config, interim_results=False
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input=None,
|
||||
api_key=api_key,
|
||||
additional_args={
|
||||
"api_base": api_base,
|
||||
"atranscription": atranscription,
|
||||
"complete_input_dict": {
|
||||
"recognition_config": recognition_config_dict,
|
||||
"nvcf_function_id_set": bool(
|
||||
optional_params.get("nvcf_function_id")
|
||||
),
|
||||
"use_ssl": optional_params.get("use_ssl"),
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
asr_service = riva_module.ASRService(auth_obj)
|
||||
audio_chunks = self._iter_audio_chunks(resampled.pcm_bytes)
|
||||
stream_kwargs: Dict[str, Any] = {
|
||||
"audio_chunks": audio_chunks,
|
||||
"streaming_config": streaming_config,
|
||||
}
|
||||
# Forward the deadline so the stream cannot block forever if the
|
||||
# server stalls. Older riva-client versions do not accept a
|
||||
# ``timeout`` kwarg, so pass it only when supported.
|
||||
if timeout is not None and self._supports_timeout_kwarg(
|
||||
asr_service.streaming_response_generator
|
||||
):
|
||||
stream_kwargs["timeout"] = float(timeout)
|
||||
stream = asr_service.streaming_response_generator(**stream_kwargs)
|
||||
final_results = self._collect_final_results(stream)
|
||||
except NvidiaRivaException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise grpc_error_to_litellm_exception(e) from e
|
||||
|
||||
transcription = NvidiaRivaAudioTranscriptionConfig.build_transcription_response(
|
||||
final_results=final_results,
|
||||
response_format=response_format,
|
||||
duration_seconds=resampled.duration_seconds,
|
||||
timestamp_granularities=timestamp_granularities,
|
||||
)
|
||||
|
||||
stringified_response = dict(transcription)
|
||||
|
||||
logging_obj.post_call(
|
||||
input=get_audio_file_name(audio_file),
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": recognition_config_dict},
|
||||
original_response=stringified_response,
|
||||
)
|
||||
|
||||
hidden_params = {
|
||||
"model": model,
|
||||
"custom_llm_provider": "nvidia_riva",
|
||||
"audio_transcription_duration": resampled.duration_seconds,
|
||||
}
|
||||
|
||||
final_response: TranscriptionResponse = convert_to_model_response_object( # type: ignore
|
||||
response_object=stringified_response,
|
||||
model_response_object=model_response,
|
||||
hidden_params=hidden_params,
|
||||
response_type="audio_transcription",
|
||||
)
|
||||
|
||||
return final_response
|
||||
|
||||
def _construct_auth(
|
||||
self,
|
||||
riva_module: Any,
|
||||
api_base: str,
|
||||
api_key: Optional[str],
|
||||
optional_params: dict,
|
||||
) -> Any:
|
||||
"""
|
||||
Build a ``riva.client.Auth`` object.
|
||||
|
||||
- When ``nvcf_function_id`` is provided we attach the NVCF
|
||||
``function-id`` and bearer ``authorization`` metadata, and default
|
||||
``use_ssl`` to True (NVCF endpoints are TLS-only).
|
||||
- Otherwise (self-hosted) we default ``use_ssl`` to False but still
|
||||
honor an explicit override — self-hosted Riva behind an ingress
|
||||
with TLS termination is a real deployment topology.
|
||||
"""
|
||||
nvcf_function_id = optional_params.get("nvcf_function_id")
|
||||
use_ssl_override = optional_params.get("use_ssl")
|
||||
use_ssl = (
|
||||
bool(use_ssl_override)
|
||||
if use_ssl_override is not None
|
||||
else bool(nvcf_function_id)
|
||||
)
|
||||
|
||||
metadata: List[Tuple[str, str]] = []
|
||||
if nvcf_function_id:
|
||||
metadata.append(("function-id", str(nvcf_function_id)))
|
||||
if api_key:
|
||||
metadata.append(("authorization", f"Bearer {api_key}"))
|
||||
|
||||
try:
|
||||
return riva_module.Auth(
|
||||
uri=api_base, use_ssl=use_ssl, metadata_args=metadata
|
||||
)
|
||||
except TypeError:
|
||||
# Older riva-client signatures used positional-only args.
|
||||
return riva_module.Auth(None, use_ssl, api_base, metadata)
|
||||
|
||||
def _build_recognition_config_proto(
|
||||
self, riva_asr_module: Any, recognition_config_dict: Dict[str, Any]
|
||||
):
|
||||
encoding_name = (
|
||||
recognition_config_dict.get("encoding") or "LINEAR_PCM"
|
||||
).upper()
|
||||
encoding_enum = getattr(
|
||||
riva_asr_module.AudioEncoding,
|
||||
encoding_name,
|
||||
riva_asr_module.AudioEncoding.LINEAR_PCM,
|
||||
)
|
||||
|
||||
config = riva_asr_module.RecognitionConfig(
|
||||
encoding=encoding_enum,
|
||||
sample_rate_hertz=int(recognition_config_dict["sample_rate_hertz"]),
|
||||
language_code=recognition_config_dict["language_code"],
|
||||
audio_channel_count=int(recognition_config_dict["audio_channel_count"]),
|
||||
enable_automatic_punctuation=bool(
|
||||
recognition_config_dict.get("enable_automatic_punctuation", True)
|
||||
),
|
||||
enable_word_time_offsets=bool(
|
||||
recognition_config_dict.get("enable_word_time_offsets", False)
|
||||
),
|
||||
max_alternatives=int(recognition_config_dict.get("max_alternatives", 1)),
|
||||
model=recognition_config_dict.get("model", "") or "",
|
||||
verbatim_transcripts=bool(
|
||||
recognition_config_dict.get("verbatim_transcripts", False)
|
||||
),
|
||||
profanity_filter=bool(
|
||||
recognition_config_dict.get("profanity_filter", False)
|
||||
),
|
||||
)
|
||||
|
||||
endpointing = recognition_config_dict.get("endpointing_config")
|
||||
if isinstance(endpointing, dict) and endpointing:
|
||||
try:
|
||||
ep = riva_asr_module.EndpointingConfig(**endpointing)
|
||||
config.endpointing_config.CopyFrom(ep)
|
||||
except Exception:
|
||||
# If the user supplied an unknown EndpointingConfig field
|
||||
# (older Riva server), fall back to Riva's defaults rather
|
||||
# than failing the whole request.
|
||||
pass
|
||||
|
||||
return config
|
||||
|
||||
@staticmethod
|
||||
def _supports_timeout_kwarg(callable_obj: Any) -> bool:
|
||||
try:
|
||||
sig = inspect.signature(callable_obj)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
params = sig.parameters
|
||||
if "timeout" in params:
|
||||
return True
|
||||
return any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params.values())
|
||||
|
||||
@staticmethod
|
||||
def _iter_audio_chunks(pcm_bytes: bytes):
|
||||
for offset in range(0, len(pcm_bytes), _DEFAULT_CHUNK_BYTES):
|
||||
chunk = pcm_bytes[offset : offset + _DEFAULT_CHUNK_BYTES]
|
||||
if not chunk:
|
||||
continue
|
||||
yield chunk
|
||||
|
||||
@staticmethod
|
||||
def _collect_final_results(stream) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Walk the gRPC stream, ignore empty / non-final chunks, and return a
|
||||
list of normalized final-result dicts. Matching the user's note: the
|
||||
``id`` blocks with no ``results`` are streaming heartbeats and must
|
||||
be skipped.
|
||||
"""
|
||||
final_results: List[Dict[str, Any]] = []
|
||||
for response in stream:
|
||||
results = getattr(response, "results", None) or []
|
||||
for result in results:
|
||||
if not getattr(result, "is_final", False):
|
||||
continue
|
||||
alternatives = getattr(result, "alternatives", None) or []
|
||||
if not alternatives:
|
||||
continue
|
||||
top = alternatives[0]
|
||||
transcript = getattr(top, "transcript", "") or ""
|
||||
words_proto = getattr(top, "words", None) or []
|
||||
words = []
|
||||
for word in words_proto:
|
||||
words.append(
|
||||
{
|
||||
"word": getattr(word, "word", ""),
|
||||
"start_time_ms": int(getattr(word, "start_time", 0) or 0),
|
||||
"end_time_ms": int(getattr(word, "end_time", 0) or 0),
|
||||
}
|
||||
)
|
||||
final_results.append({"transcript": transcript, "words": words})
|
||||
return final_results
|
||||
|
||||
|
||||
def _import_riva():
|
||||
"""
|
||||
Lazy import of ``riva.client`` and ``riva.client.proto.riva_asr_pb2``.
|
||||
|
||||
We try the SDK first (preferred) and fall back to importing the proto
|
||||
module separately when the SDK packaging changes between versions.
|
||||
"""
|
||||
try:
|
||||
import riva.client as riva_client # type: ignore
|
||||
except ImportError as e:
|
||||
raise NvidiaRivaException(status_code=500, message=_RIVA_INSTALL_HINT) from e
|
||||
|
||||
riva_asr_module = riva_client
|
||||
if not hasattr(riva_asr_module, "RecognitionConfig"):
|
||||
try:
|
||||
import riva.client.proto.riva_asr_pb2 as riva_asr_pb2 # type: ignore
|
||||
|
||||
riva_asr_module = riva_asr_pb2
|
||||
except ImportError as e:
|
||||
raise NvidiaRivaException(
|
||||
status_code=500, message=_RIVA_INSTALL_HINT
|
||||
) from e
|
||||
|
||||
return riva_client, riva_asr_module
|
||||
@@ -0,0 +1,284 @@
|
||||
"""
|
||||
Translates from OpenAI's `/v1/audio/transcriptions` to NVIDIA Riva's gRPC
|
||||
streaming recognize API.
|
||||
|
||||
Riva is gRPC-only, so unlike most providers in this directory the request
|
||||
"transformation" produced here is a structured dict consumed directly by the
|
||||
gRPC handler (rather than HTTP form-data). The handler builds Riva
|
||||
``RecognitionConfig`` / ``StreamingRecognitionConfig`` protobufs from this
|
||||
dict at call time.
|
||||
|
||||
Reference: https://docs.nvidia.com/deeplearning/riva/user-guide/docs/asr/asr-overview.html
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from httpx import Headers, Response
|
||||
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
OpenAIAudioTranscriptionOptionalParams,
|
||||
)
|
||||
from litellm.types.utils import FileTypes, TranscriptionResponse
|
||||
|
||||
from ...base_llm.audio_transcription.transformation import (
|
||||
AudioTranscriptionRequestData,
|
||||
BaseAudioTranscriptionConfig,
|
||||
)
|
||||
from ..common_utils import NvidiaRivaException
|
||||
|
||||
# Riva expects a fixed wire format for the audio chunks we stream in.
|
||||
RIVA_TARGET_SAMPLE_RATE_HZ = 16000
|
||||
RIVA_TARGET_NUM_CHANNELS = 1
|
||||
RIVA_TARGET_ENCODING = "LINEAR_PCM"
|
||||
|
||||
|
||||
class NvidiaRivaAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
|
||||
"""
|
||||
Config for NVIDIA Riva ASR (gRPC).
|
||||
|
||||
Supports both NVCF-hosted (``api_base=grpc.nvcf.nvidia.com:443`` +
|
||||
``nvcf_function_id``) and self-hosted deployments (any ``host:port``,
|
||||
optional TLS via ``use_ssl``).
|
||||
"""
|
||||
|
||||
def get_supported_openai_params(
|
||||
self, model: str
|
||||
) -> List[OpenAIAudioTranscriptionOptionalParams]:
|
||||
# Riva natively understands language + word timestamps.
|
||||
# `response_format` is honored at response-shaping time in the handler.
|
||||
return ["language", "response_format", "timestamp_granularities"]
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
for key, value in non_default_params.items():
|
||||
if value is None:
|
||||
continue
|
||||
|
||||
if key == "language":
|
||||
optional_params["language_code"] = self._normalize_language_code(value)
|
||||
elif key == "timestamp_granularities":
|
||||
# OpenAI accepts ["word"], ["segment"], or both. Riva only
|
||||
# natively exposes word timing, so we toggle it on whenever
|
||||
# "word" is requested. Segment timing is reconstructed in the
|
||||
# response transformer.
|
||||
if isinstance(value, list) and "word" in value:
|
||||
optional_params["enable_word_time_offsets"] = True
|
||||
optional_params["timestamp_granularities"] = value
|
||||
elif key == "response_format":
|
||||
# Stored verbatim; consumed by transform_audio_transcription_response.
|
||||
optional_params["response_format"] = value
|
||||
else:
|
||||
optional_params[key] = value
|
||||
|
||||
return optional_params
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, Headers]
|
||||
) -> BaseLLMException:
|
||||
return NvidiaRivaException(
|
||||
message=error_message, status_code=status_code, headers=headers
|
||||
)
|
||||
|
||||
def transform_audio_transcription_request(
|
||||
self,
|
||||
model: str,
|
||||
audio_file: FileTypes,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
) -> AudioTranscriptionRequestData:
|
||||
"""
|
||||
Build a structured dict that the gRPC handler consumes. We do *not*
|
||||
construct protobufs here, so this module remains importable without
|
||||
``nvidia-riva-client`` being installed (matching how other providers
|
||||
defer SDK imports to handler-call time).
|
||||
"""
|
||||
recognition_config = self._build_recognition_config_dict(
|
||||
model=model,
|
||||
optional_params=optional_params,
|
||||
)
|
||||
|
||||
endpointing_config = self._build_endpointing_config_dict(optional_params)
|
||||
if endpointing_config is not None:
|
||||
recognition_config["endpointing_config"] = endpointing_config
|
||||
|
||||
request_payload: Dict[str, Any] = {
|
||||
"recognition_config": recognition_config,
|
||||
"response_format": optional_params.get("response_format") or "json",
|
||||
"timestamp_granularities": optional_params.get("timestamp_granularities"),
|
||||
}
|
||||
|
||||
return AudioTranscriptionRequestData(data=request_payload, files=None)
|
||||
|
||||
def transform_audio_transcription_response(
|
||||
self,
|
||||
raw_response: Response,
|
||||
) -> TranscriptionResponse:
|
||||
# Not used: Riva responses come from a gRPC stream, not an httpx
|
||||
# response. The handler calls _build_transcription_response directly.
|
||||
raise NotImplementedError(
|
||||
"NvidiaRivaAudioTranscriptionConfig.transform_audio_transcription_response "
|
||||
"is not used. The handler builds the TranscriptionResponse directly "
|
||||
"from Riva's gRPC streaming results."
|
||||
)
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
# gRPC auth is constructed in the handler, not via HTTP headers.
|
||||
return headers
|
||||
|
||||
def _build_recognition_config_dict(
|
||||
self, model: str, optional_params: dict
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Build the Riva ``RecognitionConfig`` shape as a plain dict.
|
||||
|
||||
``model`` is intentionally left empty when the user has not supplied
|
||||
``riva_model_name``. Riva auto-selects the right deployment from
|
||||
``language_code`` + ``sample_rate_hertz``. NVIDIA's internal
|
||||
deployment names (e.g. ``parakeet-1.1b-en-US-asr-streaming-...``)
|
||||
change across NIM versions, regions, and self-hosted builds, so
|
||||
hardcoding any name here would break unpredictably.
|
||||
"""
|
||||
return {
|
||||
"language_code": optional_params.get("language_code", "en-US"),
|
||||
"sample_rate_hertz": optional_params.get(
|
||||
"sample_rate_hertz", RIVA_TARGET_SAMPLE_RATE_HZ
|
||||
),
|
||||
"encoding": optional_params.get("encoding", RIVA_TARGET_ENCODING),
|
||||
"audio_channel_count": optional_params.get(
|
||||
"audio_channel_count", RIVA_TARGET_NUM_CHANNELS
|
||||
),
|
||||
"enable_automatic_punctuation": optional_params.get(
|
||||
"enable_automatic_punctuation", True
|
||||
),
|
||||
"enable_word_time_offsets": bool(
|
||||
optional_params.get("enable_word_time_offsets", False)
|
||||
),
|
||||
"max_alternatives": optional_params.get("max_alternatives", 1),
|
||||
"model": optional_params.get("riva_model_name", ""),
|
||||
"verbatim_transcripts": optional_params.get("verbatim_transcripts", False),
|
||||
"profanity_filter": optional_params.get("profanity_filter", False),
|
||||
}
|
||||
|
||||
def _build_endpointing_config_dict(
|
||||
self, optional_params: dict
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Translate an OpenAI-style ``chunking_strategy`` into Riva's
|
||||
``EndpointingConfig`` shape, or pass through an explicit
|
||||
``endpointing_config`` dict.
|
||||
|
||||
Returns ``None`` when neither is provided so Riva uses its built-in
|
||||
VAD defaults.
|
||||
"""
|
||||
explicit = optional_params.get("endpointing_config")
|
||||
if isinstance(explicit, dict):
|
||||
return dict(explicit)
|
||||
|
||||
chunking = optional_params.get("chunking_strategy")
|
||||
if chunking in (None, "auto"):
|
||||
return None
|
||||
|
||||
if isinstance(chunking, dict) and chunking.get("type") == "server_vad":
|
||||
config: Dict[str, Any] = {}
|
||||
if "threshold" in chunking:
|
||||
threshold = float(chunking["threshold"])
|
||||
config["start_threshold"] = threshold
|
||||
config["stop_threshold"] = threshold
|
||||
if "silence_duration_ms" in chunking:
|
||||
config["stop_history"] = int(chunking["silence_duration_ms"])
|
||||
if "prefix_padding_ms" in chunking:
|
||||
config["stop_history_eou"] = int(chunking["prefix_padding_ms"])
|
||||
return config or None
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _normalize_language_code(language: str) -> str:
|
||||
"""
|
||||
OpenAI accepts bare ISO-639 codes like ``en``; Riva requires BCP-47
|
||||
like ``en-US``. Normalize the most common bare codes; pass through
|
||||
anything that already looks like BCP-47.
|
||||
"""
|
||||
if not isinstance(language, str) or not language:
|
||||
return "en-US"
|
||||
if "-" in language:
|
||||
return language
|
||||
bare_to_bcp47 = {
|
||||
"en": "en-US",
|
||||
"es": "es-ES",
|
||||
"de": "de-DE",
|
||||
"fr": "fr-FR",
|
||||
"it": "it-IT",
|
||||
"pt": "pt-BR",
|
||||
"ja": "ja-JP",
|
||||
"ko": "ko-KR",
|
||||
"zh": "zh-CN",
|
||||
"ru": "ru-RU",
|
||||
"hi": "hi-IN",
|
||||
"ar": "ar-SA",
|
||||
}
|
||||
return bare_to_bcp47.get(language.lower(), language)
|
||||
|
||||
@staticmethod
|
||||
def build_transcription_response(
|
||||
final_results: List[Dict[str, Any]],
|
||||
response_format: str,
|
||||
duration_seconds: Optional[float],
|
||||
timestamp_granularities: Optional[List[str]],
|
||||
) -> TranscriptionResponse:
|
||||
"""
|
||||
Aggregate a list of normalized "final result" dicts into a
|
||||
``TranscriptionResponse`` shaped for the requested ``response_format``.
|
||||
|
||||
Each entry in ``final_results`` is expected to look like::
|
||||
|
||||
{
|
||||
"transcript": str,
|
||||
"words": [{"word": str, "start_time_ms": int, "end_time_ms": int}, ...],
|
||||
}
|
||||
|
||||
which the handler produces by walking the gRPC stream and keeping
|
||||
only ``result.is_final`` entries (empty/non-final chunks are
|
||||
ignored).
|
||||
"""
|
||||
full_transcript = "".join(
|
||||
(item.get("transcript") or "") for item in final_results
|
||||
).strip()
|
||||
|
||||
response = TranscriptionResponse(text=full_transcript)
|
||||
response["task"] = "transcribe"
|
||||
|
||||
if response_format == "verbose_json":
|
||||
words: List[Dict[str, Any]] = []
|
||||
if timestamp_granularities and "word" in timestamp_granularities:
|
||||
for item in final_results:
|
||||
for word in item.get("words", []) or []:
|
||||
words.append(
|
||||
{
|
||||
"word": word.get("word", ""),
|
||||
"start": (float(word.get("start_time_ms", 0)) / 1000.0),
|
||||
"end": float(word.get("end_time_ms", 0)) / 1000.0,
|
||||
}
|
||||
)
|
||||
if words:
|
||||
response["words"] = words
|
||||
if duration_seconds is not None:
|
||||
response["duration"] = duration_seconds
|
||||
|
||||
return response
|
||||
@@ -0,0 +1,92 @@
|
||||
"""
|
||||
Common utilities and exceptions for the NVIDIA Riva STT provider
|
||||
"""
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
|
||||
|
||||
class NvidiaRivaException(BaseLLMException):
|
||||
"""
|
||||
Exception raised for NVIDIA Riva (gRPC) errors.
|
||||
|
||||
``status_code`` is an HTTP-equivalent code derived from the underlying
|
||||
gRPC ``StatusCode`` (when available) so that litellm's existing error
|
||||
classifiers (RateLimitError, AuthenticationError, etc.) keep working.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
# Mapping from grpc.StatusCode.name -> equivalent HTTP status code.
|
||||
# Kept as a plain dict (rather than importing grpc enums) so this module is
|
||||
# importable without grpc installed.
|
||||
_GRPC_STATUS_CODE_TO_HTTP: dict = {
|
||||
"OK": 200,
|
||||
"CANCELLED": 499,
|
||||
"UNKNOWN": 500,
|
||||
"INVALID_ARGUMENT": 400,
|
||||
"DEADLINE_EXCEEDED": 504,
|
||||
"NOT_FOUND": 404,
|
||||
"ALREADY_EXISTS": 409,
|
||||
"PERMISSION_DENIED": 403,
|
||||
"RESOURCE_EXHAUSTED": 429,
|
||||
"FAILED_PRECONDITION": 400,
|
||||
"ABORTED": 409,
|
||||
"OUT_OF_RANGE": 400,
|
||||
"UNIMPLEMENTED": 501,
|
||||
"INTERNAL": 500,
|
||||
"UNAVAILABLE": 503,
|
||||
"DATA_LOSS": 500,
|
||||
"UNAUTHENTICATED": 401,
|
||||
}
|
||||
|
||||
|
||||
def _extract_grpc_status_name(error: Any) -> Optional[str]:
|
||||
"""
|
||||
Best-effort extraction of a gRPC StatusCode name from an arbitrary error.
|
||||
|
||||
Works for ``grpc.RpcError`` instances (which expose ``.code()``) as well
|
||||
as plain exceptions whose string representation contains a status name.
|
||||
"""
|
||||
code_fn = getattr(error, "code", None)
|
||||
if callable(code_fn):
|
||||
try:
|
||||
code = code_fn()
|
||||
except Exception:
|
||||
code = None
|
||||
name = getattr(code, "name", None)
|
||||
if isinstance(name, str):
|
||||
return name
|
||||
return None
|
||||
|
||||
|
||||
def _extract_grpc_details(error: Any) -> Optional[str]:
|
||||
"""Best-effort extraction of a human-readable detail string from a gRPC error."""
|
||||
details_fn = getattr(error, "details", None)
|
||||
if callable(details_fn):
|
||||
try:
|
||||
details = details_fn()
|
||||
except Exception:
|
||||
details = None
|
||||
if isinstance(details, str) and details:
|
||||
return details
|
||||
return None
|
||||
|
||||
|
||||
def grpc_error_to_litellm_exception(error: Exception) -> NvidiaRivaException:
|
||||
"""
|
||||
Convert a gRPC error (or any exception raised from the Riva client) into
|
||||
a ``NvidiaRivaException`` with an appropriate HTTP-equivalent status code.
|
||||
"""
|
||||
status_name = _extract_grpc_status_name(error)
|
||||
http_status = _GRPC_STATUS_CODE_TO_HTTP.get(status_name or "", 500)
|
||||
|
||||
detail = _extract_grpc_details(error) or str(error)
|
||||
message = (
|
||||
f"NVIDIA Riva gRPC error ({status_name}): {detail}"
|
||||
if status_name
|
||||
else f"NVIDIA Riva error: {detail}"
|
||||
)
|
||||
return NvidiaRivaException(status_code=http_status, message=message)
|
||||
@@ -211,6 +211,12 @@ from .llms.oobabooga.chat import oobabooga
|
||||
from .llms.openai.completion.handler import OpenAITextCompletion
|
||||
from .llms.openai.image_variations.handler import OpenAIImageVariationsHandler
|
||||
from .llms.openai.openai import OpenAIChatCompletion
|
||||
from .llms.nvidia_riva.audio_transcription.handler import (
|
||||
NvidiaRivaAudioTranscription,
|
||||
)
|
||||
from .llms.nvidia_riva.audio_transcription.transformation import (
|
||||
NvidiaRivaAudioTranscriptionConfig,
|
||||
)
|
||||
from .llms.openai.transcriptions.handler import OpenAIAudioTranscription
|
||||
from .llms.openai_like.chat.handler import OpenAILikeChatHandler
|
||||
from .llms.openai_like.embedding.handler import OpenAILikeEmbeddingHandler
|
||||
@@ -266,6 +272,7 @@ from .types.utils import (
|
||||
openai_chat_completions = OpenAIChatCompletion()
|
||||
openai_text_completions = OpenAITextCompletion()
|
||||
openai_audio_transcriptions = OpenAIAudioTranscription()
|
||||
nvidia_riva_audio_transcriptions = NvidiaRivaAudioTranscription()
|
||||
openai_image_variations = OpenAIImageVariationsHandler()
|
||||
groq_chat_completions = GroqChatCompletion()
|
||||
sap_gen_ai_hub_chat_completions = GenAIHubOrchestration()
|
||||
@@ -6605,6 +6612,26 @@ def transcription(
|
||||
litellm_params=litellm_params_dict,
|
||||
shared_session=shared_session,
|
||||
)
|
||||
elif custom_llm_provider == "nvidia_riva":
|
||||
# NVIDIA Riva is gRPC-based, not HTTP. It has its own dedicated handler
|
||||
# rather than going through base_llm_http_handler.
|
||||
response = nvidia_riva_audio_transcriptions.audio_transcriptions(
|
||||
model=model,
|
||||
audio_file=file,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params_dict,
|
||||
model_response=model_response,
|
||||
atranscription=atranscription,
|
||||
timeout=timeout,
|
||||
logging_obj=litellm_logging_obj,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
provider_config=(
|
||||
provider_config
|
||||
if isinstance(provider_config, NvidiaRivaAudioTranscriptionConfig)
|
||||
else None
|
||||
),
|
||||
)
|
||||
elif provider_config is not None:
|
||||
response = base_llm_http_handler.audio_transcriptions(
|
||||
model=model,
|
||||
|
||||
@@ -3247,6 +3247,7 @@ class LlmProviders(str, Enum):
|
||||
A2A = "a2a"
|
||||
GIGACHAT = "gigachat"
|
||||
NVIDIA_NIM = "nvidia_nim"
|
||||
NVIDIA_RIVA = "nvidia_riva"
|
||||
CEREBRAS = "cerebras"
|
||||
AI21_CHAT = "ai21_chat"
|
||||
VOLCENGINE = "volcengine"
|
||||
|
||||
@@ -8545,6 +8545,12 @@ class ProviderConfigManager:
|
||||
)
|
||||
|
||||
return MistralAudioTranscriptionConfig()
|
||||
elif litellm.LlmProviders.NVIDIA_RIVA == provider:
|
||||
from litellm.llms.nvidia_riva.audio_transcription.transformation import (
|
||||
NvidiaRivaAudioTranscriptionConfig,
|
||||
)
|
||||
|
||||
return NvidiaRivaAudioTranscriptionConfig()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -1610,6 +1610,22 @@
|
||||
"interactions": true
|
||||
}
|
||||
},
|
||||
"nvidia_riva": {
|
||||
"display_name": "Nvidia Riva (`nvidia_riva`)",
|
||||
"url": "https://docs.litellm.ai/docs/providers/nvidia_riva",
|
||||
"endpoints": {
|
||||
"chat_completions": false,
|
||||
"messages": false,
|
||||
"responses": false,
|
||||
"embeddings": false,
|
||||
"image_generations": false,
|
||||
"audio_transcriptions": true,
|
||||
"audio_speech": false,
|
||||
"moderations": false,
|
||||
"batches": false,
|
||||
"rerank": false
|
||||
}
|
||||
},
|
||||
"oci": {
|
||||
"display_name": "OCI (`oci`)",
|
||||
"url": "https://docs.litellm.ai/docs/providers/oci",
|
||||
|
||||
@@ -86,6 +86,14 @@ grpc = [
|
||||
# Newest non-yanked release older than the 30-day cutoff.
|
||||
"grpcio==1.78.0",
|
||||
]
|
||||
stt-nvidia-riva = [
|
||||
# NVIDIA Riva STT provider (gRPC). These are imported lazily inside the
|
||||
# provider handler so litellm core remains usable without them.
|
||||
"nvidia-riva-client>=2.15.0",
|
||||
"soundfile>=0.12.1",
|
||||
"audioread>=3.0.1",
|
||||
"numpy>=1.26.0",
|
||||
]
|
||||
google = ["google-cloud-aiplatform==1.133.0"]
|
||||
proxy-runtime = [
|
||||
# Historically bundled in the proxy Docker images via requirements.txt.
|
||||
|
||||
@@ -126,6 +126,7 @@ sentry_sdk: >=2.21.0 # Unknown license
|
||||
cryptography: >=43.0.1 # Unknown license
|
||||
tzdata: >=2025.1 # Unknown license
|
||||
urllib3: >=2.0.0 # MIT license - https://github.com/urllib3/urllib3
|
||||
audioread: >=3.0.1 # MIT license manually verified - https://github.com/beetbox/audioread
|
||||
python-dotenv: >=1.0.0 # Unknown license
|
||||
tiktoken: >=0.8.0 # Unknown license
|
||||
click: >=8.1.7 # Unknown license
|
||||
|
||||
@@ -0,0 +1,130 @@
|
||||
"""
|
||||
Tests for the NVIDIA Riva audio resampling utility.
|
||||
|
||||
The resampler turns arbitrary inbound audio (mp3/wav/m4a/...) into the wire
|
||||
format Riva's gRPC ASR expects: 16 kHz mono LINEAR_PCM (int16 LE).
|
||||
"""
|
||||
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import soundfile as sf
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../../../.."))
|
||||
|
||||
from litellm.llms.nvidia_riva.audio_transcription.audio_utils import (
|
||||
resample_to_riva_pcm,
|
||||
)
|
||||
from litellm.llms.nvidia_riva.common_utils import NvidiaRivaException
|
||||
|
||||
|
||||
def _wav_bytes(samples: np.ndarray, sample_rate: int) -> bytes:
|
||||
buf = io.BytesIO()
|
||||
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def test_resample_24khz_stereo_to_16khz_mono_int16():
|
||||
sample_rate_in = 24000
|
||||
duration_seconds = 1.0
|
||||
n = int(sample_rate_in * duration_seconds)
|
||||
t = np.linspace(0, duration_seconds, n, endpoint=False)
|
||||
left = 0.5 * np.sin(2 * np.pi * 440.0 * t)
|
||||
right = 0.5 * np.sin(2 * np.pi * 660.0 * t)
|
||||
stereo = np.stack([left, right], axis=1).astype(np.float32)
|
||||
|
||||
wav_in = _wav_bytes(stereo, sample_rate_in)
|
||||
|
||||
resampled = resample_to_riva_pcm(wav_in)
|
||||
|
||||
assert resampled.sample_rate_hz == 16000
|
||||
assert resampled.num_channels == 1
|
||||
# int16 = 2 bytes per sample
|
||||
expected_samples = int(round(duration_seconds * 16000))
|
||||
assert len(resampled.pcm_bytes) == expected_samples * 2
|
||||
assert resampled.duration_seconds == pytest.approx(duration_seconds, abs=0.005)
|
||||
|
||||
|
||||
def test_resample_16khz_mono_passes_through_int16_bytes_match_length():
|
||||
sample_rate = 16000
|
||||
n = sample_rate
|
||||
samples = (0.1 * np.sin(np.linspace(0, 2 * np.pi * 200, n))).astype(np.float32)
|
||||
wav_in = _wav_bytes(samples, sample_rate)
|
||||
|
||||
resampled = resample_to_riva_pcm(wav_in)
|
||||
|
||||
assert resampled.sample_rate_hz == 16000
|
||||
assert len(resampled.pcm_bytes) == n * 2
|
||||
assert resampled.duration_seconds == pytest.approx(1.0, abs=0.001)
|
||||
|
||||
|
||||
def test_resample_preserves_int16_clip_range():
|
||||
sample_rate = 16000
|
||||
samples = np.array([2.0, -2.0, 0.0, 1.0], dtype=np.float32)
|
||||
wav_in = _wav_bytes(samples, sample_rate)
|
||||
|
||||
resampled = resample_to_riva_pcm(wav_in)
|
||||
|
||||
decoded = np.frombuffer(resampled.pcm_bytes, dtype="<i2")
|
||||
# Anything outside [-1, 1] should clip to int16 boundary.
|
||||
assert decoded.max() <= 32767
|
||||
assert decoded.min() >= -32767
|
||||
|
||||
|
||||
def test_unknown_format_raises_clear_error():
|
||||
# 4 random bytes are not valid audio in any container we can decode.
|
||||
with pytest.raises(NvidiaRivaException) as excinfo:
|
||||
resample_to_riva_pcm(b"\x00\x01\x02\x03")
|
||||
# Message must hint at what to do next.
|
||||
assert "Riva STT" in excinfo.value.message
|
||||
|
||||
|
||||
def test_audioread_fallback_writes_to_tempfile_path(monkeypatch):
|
||||
"""
|
||||
The audioread fallback handles compressed formats (mp3, m4a, ...). Most
|
||||
audioread backends call into a subprocess (FFmpeg, GStreamer) and
|
||||
require a real filesystem path — passing a BytesIO blows up with a
|
||||
TypeError in subprocess.Popen. This test would have caught that bug:
|
||||
we assert ``audio_open`` is called with a string path that points at a
|
||||
file containing exactly the input bytes.
|
||||
"""
|
||||
payload = b"\xff\xfbfake-mp3-bytes-not-actually-decodable"
|
||||
seen_paths = []
|
||||
|
||||
class FakeAudioSource:
|
||||
samplerate = 22050
|
||||
channels = 1
|
||||
|
||||
def __iter__(self):
|
||||
yield np.array([0, 0, 0, 0], dtype=np.int16).tobytes()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
return False
|
||||
|
||||
def fake_audio_open(path):
|
||||
assert isinstance(path, str), "audioread requires a filesystem path"
|
||||
seen_paths.append(path)
|
||||
with open(path, "rb") as fh:
|
||||
assert fh.read() == payload
|
||||
return FakeAudioSource()
|
||||
|
||||
fake_audioread = SimpleNamespace(audio_open=fake_audio_open)
|
||||
monkeypatch.setitem(sys.modules, "audioread", fake_audioread)
|
||||
|
||||
fake_sf = MagicMock()
|
||||
fake_sf.read.side_effect = RuntimeError("libsndfile cannot decode mp3")
|
||||
monkeypatch.setitem(sys.modules, "soundfile", fake_sf)
|
||||
|
||||
resampled = resample_to_riva_pcm(payload)
|
||||
assert resampled.sample_rate_hz == 16000
|
||||
assert seen_paths and seen_paths[0].endswith(".audio")
|
||||
# Tempfile must be cleaned up after decode.
|
||||
assert not os.path.exists(seen_paths[0])
|
||||
@@ -0,0 +1,419 @@
|
||||
"""
|
||||
End-to-end-ish tests for NvidiaRivaAudioTranscription.
|
||||
|
||||
We mock ``riva.client`` so the test does not need the real gRPC SDK or a
|
||||
running Riva server. The mock also lets us assert how Auth metadata is
|
||||
constructed (NVCF vs self-hosted) and how the streaming generator output
|
||||
is aggregated.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import soundfile as sf
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../../../.."))
|
||||
|
||||
from litellm.llms.nvidia_riva.audio_transcription import handler as handler_mod
|
||||
from litellm.llms.nvidia_riva.audio_transcription.handler import (
|
||||
NvidiaRivaAudioTranscription,
|
||||
)
|
||||
from litellm.llms.nvidia_riva.common_utils import NvidiaRivaException
|
||||
from litellm.types.utils import TranscriptionResponse
|
||||
|
||||
|
||||
def _make_wav_bytes(seconds: float = 1.0, sample_rate: int = 16000) -> bytes:
|
||||
n = int(sample_rate * seconds)
|
||||
samples = (0.05 * np.sin(np.linspace(0, 2 * np.pi * 220 * seconds, n))).astype(
|
||||
np.float32
|
||||
)
|
||||
buf = io.BytesIO()
|
||||
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
def _fake_word(word: str, start_ms: int, end_ms: int):
|
||||
return SimpleNamespace(word=word, start_time=start_ms, end_time=end_ms)
|
||||
|
||||
|
||||
def _fake_alternative(transcript: str, words=None):
|
||||
return SimpleNamespace(transcript=transcript, words=words or [])
|
||||
|
||||
|
||||
def _fake_result(is_final: bool, alternatives):
|
||||
return SimpleNamespace(is_final=is_final, alternatives=alternatives)
|
||||
|
||||
|
||||
def _fake_response(results):
|
||||
return SimpleNamespace(results=results)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_riva(monkeypatch):
|
||||
"""
|
||||
Stand-ins for the bits of ``riva.client`` the handler touches:
|
||||
- ``Auth`` (constructor)
|
||||
- ``ASRService`` with ``streaming_response_generator``
|
||||
- ``RecognitionConfig``, ``StreamingRecognitionConfig``, ``EndpointingConfig``
|
||||
- ``AudioEncoding`` namespace with ``LINEAR_PCM``
|
||||
"""
|
||||
auth_calls = {}
|
||||
|
||||
class FakeAuth:
|
||||
def __init__(self, *args, **kwargs):
|
||||
# Support both keyword and positional Auth constructors.
|
||||
if kwargs:
|
||||
auth_calls["uri"] = kwargs.get("uri")
|
||||
auth_calls["use_ssl"] = kwargs.get("use_ssl")
|
||||
auth_calls["metadata_args"] = kwargs.get("metadata_args")
|
||||
else:
|
||||
# positional: (None, use_ssl, uri, metadata)
|
||||
auth_calls["use_ssl"] = args[1] if len(args) > 1 else None
|
||||
auth_calls["uri"] = args[2] if len(args) > 2 else None
|
||||
auth_calls["metadata_args"] = args[3] if len(args) > 3 else None
|
||||
|
||||
class FakeRecognitionConfig:
|
||||
def __init__(self, **kwargs):
|
||||
self._kwargs = kwargs
|
||||
self.endpointing_config = SimpleNamespace(CopyFrom=lambda _: None)
|
||||
|
||||
class FakeStreamingRecognitionConfig:
|
||||
def __init__(self, config, interim_results):
|
||||
self.config = config
|
||||
self.interim_results = interim_results
|
||||
|
||||
class FakeEndpointingConfig:
|
||||
def __init__(self, **kwargs):
|
||||
self._kwargs = kwargs
|
||||
|
||||
class FakeAudioEncoding:
|
||||
LINEAR_PCM = "LINEAR_PCM"
|
||||
|
||||
streaming_responses_holder = {"value": []}
|
||||
|
||||
class FakeASRService:
|
||||
def __init__(self, auth):
|
||||
self.auth = auth
|
||||
|
||||
def streaming_response_generator(self, audio_chunks, streaming_config):
|
||||
# Drain audio_chunks generator so we exercise the chunking path.
|
||||
list(audio_chunks)
|
||||
yield from streaming_responses_holder["value"]
|
||||
|
||||
fake_riva_client = SimpleNamespace(
|
||||
Auth=FakeAuth,
|
||||
ASRService=FakeASRService,
|
||||
RecognitionConfig=FakeRecognitionConfig,
|
||||
StreamingRecognitionConfig=FakeStreamingRecognitionConfig,
|
||||
EndpointingConfig=FakeEndpointingConfig,
|
||||
AudioEncoding=FakeAudioEncoding,
|
||||
)
|
||||
|
||||
def fake_import_riva():
|
||||
return fake_riva_client, fake_riva_client
|
||||
|
||||
monkeypatch.setattr(handler_mod, "_import_riva", fake_import_riva)
|
||||
|
||||
return SimpleNamespace(
|
||||
auth_calls=auth_calls,
|
||||
responses=streaming_responses_holder,
|
||||
client=fake_riva_client,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def logging_obj():
|
||||
return MagicMock()
|
||||
|
||||
|
||||
def test_sync_path_aggregates_only_final_results(mock_riva, logging_obj):
|
||||
mock_riva.responses["value"] = [
|
||||
# Empty heartbeat chunk: ignore.
|
||||
_fake_response(results=[]),
|
||||
# Interim chunk (not final): ignore.
|
||||
_fake_response(
|
||||
results=[
|
||||
_fake_result(
|
||||
is_final=False, alternatives=[_fake_alternative("partial...")]
|
||||
)
|
||||
]
|
||||
),
|
||||
# Two final chunks aggregated.
|
||||
_fake_response(
|
||||
results=[
|
||||
_fake_result(
|
||||
is_final=True,
|
||||
alternatives=[
|
||||
_fake_alternative(
|
||||
"Hello,",
|
||||
words=[_fake_word("Hello,", 0, 320)],
|
||||
)
|
||||
],
|
||||
)
|
||||
]
|
||||
),
|
||||
_fake_response(
|
||||
results=[
|
||||
_fake_result(
|
||||
is_final=True,
|
||||
alternatives=[
|
||||
_fake_alternative(
|
||||
" world.",
|
||||
words=[_fake_word("world.", 480, 870)],
|
||||
)
|
||||
],
|
||||
)
|
||||
]
|
||||
),
|
||||
]
|
||||
|
||||
impl = NvidiaRivaAudioTranscription()
|
||||
response: TranscriptionResponse = impl.audio_transcriptions(
|
||||
model="nvidia/parakeet-ctc-1_1b-asr",
|
||||
audio_file=_make_wav_bytes(),
|
||||
optional_params={
|
||||
"language_code": "en-US",
|
||||
"enable_word_time_offsets": True,
|
||||
"response_format": "verbose_json",
|
||||
"timestamp_granularities": ["word"],
|
||||
},
|
||||
litellm_params={},
|
||||
model_response=TranscriptionResponse(),
|
||||
timeout=60,
|
||||
logging_obj=logging_obj,
|
||||
api_key="nvapi-xxx",
|
||||
api_base="grpc.nvcf.nvidia.com:443",
|
||||
)
|
||||
|
||||
assert response.text == "Hello, world."
|
||||
# duration is propagated from the resampler.
|
||||
assert response._hidden_params["audio_transcription_duration"] == pytest.approx(
|
||||
1.0, abs=0.05
|
||||
)
|
||||
# word timestamps converted from ms to seconds.
|
||||
words = response["words"]
|
||||
assert words[0]["start"] == pytest.approx(0.0)
|
||||
assert words[1]["end"] == pytest.approx(0.87)
|
||||
assert (
|
||||
logging_obj.pre_call.call_args.kwargs["additional_args"]["atranscription"]
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_auth_nvcf_defaults_use_ssl_and_attaches_function_id(mock_riva, logging_obj):
|
||||
mock_riva.responses["value"] = [
|
||||
_fake_response(
|
||||
results=[
|
||||
_fake_result(
|
||||
is_final=True,
|
||||
alternatives=[_fake_alternative("ok")],
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
impl = NvidiaRivaAudioTranscription()
|
||||
impl.audio_transcriptions(
|
||||
model="m",
|
||||
audio_file=_make_wav_bytes(),
|
||||
optional_params={
|
||||
"nvcf_function_id": "abc-123",
|
||||
"language_code": "en-US",
|
||||
},
|
||||
litellm_params={},
|
||||
model_response=TranscriptionResponse(),
|
||||
timeout=60,
|
||||
logging_obj=logging_obj,
|
||||
api_key="nvapi-xxx",
|
||||
api_base="grpc.nvcf.nvidia.com:443",
|
||||
)
|
||||
|
||||
assert mock_riva.auth_calls["uri"] == "grpc.nvcf.nvidia.com:443"
|
||||
assert mock_riva.auth_calls["use_ssl"] is True
|
||||
metadata = dict(mock_riva.auth_calls["metadata_args"])
|
||||
assert metadata["function-id"] == "abc-123"
|
||||
assert metadata["authorization"] == "Bearer nvapi-xxx"
|
||||
|
||||
|
||||
def test_auth_self_hosted_defaults_no_ssl_and_no_function_id(mock_riva, logging_obj):
|
||||
mock_riva.responses["value"] = [
|
||||
_fake_response(
|
||||
results=[
|
||||
_fake_result(is_final=True, alternatives=[_fake_alternative("ok")])
|
||||
]
|
||||
)
|
||||
]
|
||||
impl = NvidiaRivaAudioTranscription()
|
||||
impl.audio_transcriptions(
|
||||
model="m",
|
||||
audio_file=_make_wav_bytes(),
|
||||
optional_params={"language_code": "en-US"},
|
||||
litellm_params={},
|
||||
model_response=TranscriptionResponse(),
|
||||
timeout=60,
|
||||
logging_obj=logging_obj,
|
||||
api_key=None,
|
||||
api_base="localhost:50051",
|
||||
)
|
||||
|
||||
assert mock_riva.auth_calls["uri"] == "localhost:50051"
|
||||
assert mock_riva.auth_calls["use_ssl"] is False
|
||||
metadata = dict(mock_riva.auth_calls["metadata_args"])
|
||||
# No function-id, no authorization metadata.
|
||||
assert "function-id" not in metadata
|
||||
assert "authorization" not in metadata
|
||||
|
||||
|
||||
def test_explicit_use_ssl_override_wins(mock_riva, logging_obj):
|
||||
"""
|
||||
Self-hosted Riva behind an ingress with TLS termination is a real
|
||||
deployment topology. ``use_ssl=True`` must be honored even without an
|
||||
NVCF function id.
|
||||
"""
|
||||
mock_riva.responses["value"] = [
|
||||
_fake_response(
|
||||
results=[
|
||||
_fake_result(is_final=True, alternatives=[_fake_alternative("ok")])
|
||||
]
|
||||
)
|
||||
]
|
||||
impl = NvidiaRivaAudioTranscription()
|
||||
impl.audio_transcriptions(
|
||||
model="m",
|
||||
audio_file=_make_wav_bytes(),
|
||||
optional_params={"use_ssl": True, "language_code": "en-US"},
|
||||
litellm_params={},
|
||||
model_response=TranscriptionResponse(),
|
||||
timeout=60,
|
||||
logging_obj=logging_obj,
|
||||
api_key=None,
|
||||
api_base="riva.internal.company.com:443",
|
||||
)
|
||||
|
||||
assert mock_riva.auth_calls["use_ssl"] is True
|
||||
|
||||
|
||||
def test_missing_api_base_raises_clear_error(mock_riva, logging_obj):
|
||||
impl = NvidiaRivaAudioTranscription()
|
||||
with pytest.raises(NvidiaRivaException) as excinfo:
|
||||
impl.audio_transcriptions(
|
||||
model="m",
|
||||
audio_file=_make_wav_bytes(),
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
model_response=TranscriptionResponse(),
|
||||
timeout=60,
|
||||
logging_obj=logging_obj,
|
||||
api_key=None,
|
||||
api_base=None,
|
||||
)
|
||||
assert "api_base" in excinfo.value.message
|
||||
|
||||
|
||||
def test_async_path_uses_to_thread(mock_riva, logging_obj):
|
||||
mock_riva.responses["value"] = [
|
||||
_fake_response(
|
||||
results=[
|
||||
_fake_result(
|
||||
is_final=True, alternatives=[_fake_alternative("async ok")]
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
impl = NvidiaRivaAudioTranscription()
|
||||
response = asyncio.run(
|
||||
impl.async_audio_transcriptions(
|
||||
model="m",
|
||||
audio_file=_make_wav_bytes(),
|
||||
optional_params={"language_code": "en-US"},
|
||||
litellm_params={},
|
||||
model_response=TranscriptionResponse(),
|
||||
timeout=60,
|
||||
logging_obj=logging_obj,
|
||||
api_key=None,
|
||||
api_base="localhost:50051",
|
||||
)
|
||||
)
|
||||
assert response.text == "async ok"
|
||||
assert (
|
||||
logging_obj.pre_call.call_args.kwargs["additional_args"]["atranscription"]
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
def test_timeout_is_forwarded_to_streaming_generator_when_supported(
|
||||
mock_riva, logging_obj
|
||||
):
|
||||
"""
|
||||
Without a deadline the gRPC stream can block forever on a stalled Riva
|
||||
server. The handler must forward the call-level ``timeout`` to
|
||||
``streaming_response_generator`` whenever the installed riva-client
|
||||
accepts a ``timeout`` kwarg.
|
||||
"""
|
||||
captured_kwargs = {}
|
||||
|
||||
def streaming_with_timeout(self, audio_chunks, streaming_config, timeout=None):
|
||||
captured_kwargs["timeout"] = timeout
|
||||
list(audio_chunks)
|
||||
yield from [
|
||||
_fake_response(
|
||||
results=[
|
||||
_fake_result(is_final=True, alternatives=[_fake_alternative("ok")])
|
||||
]
|
||||
)
|
||||
]
|
||||
|
||||
mock_riva.client.ASRService.streaming_response_generator = streaming_with_timeout
|
||||
|
||||
impl = NvidiaRivaAudioTranscription()
|
||||
impl.audio_transcriptions(
|
||||
model="m",
|
||||
audio_file=_make_wav_bytes(),
|
||||
optional_params={"language_code": "en-US"},
|
||||
litellm_params={},
|
||||
model_response=TranscriptionResponse(),
|
||||
timeout=12.5,
|
||||
logging_obj=logging_obj,
|
||||
api_key=None,
|
||||
api_base="localhost:50051",
|
||||
)
|
||||
assert captured_kwargs["timeout"] == pytest.approx(12.5)
|
||||
|
||||
|
||||
def test_grpc_error_is_wrapped_as_nvidia_riva_exception(mock_riva, logging_obj):
|
||||
class FakeGrpcError(Exception):
|
||||
def code(self):
|
||||
return SimpleNamespace(name="UNAUTHENTICATED")
|
||||
|
||||
def details(self):
|
||||
return "bad token"
|
||||
|
||||
def raising_streaming_response_generator(self, audio_chunks, streaming_config):
|
||||
list(audio_chunks)
|
||||
raise FakeGrpcError("rpc fail")
|
||||
|
||||
mock_riva.client.ASRService.streaming_response_generator = (
|
||||
raising_streaming_response_generator
|
||||
)
|
||||
|
||||
impl = NvidiaRivaAudioTranscription()
|
||||
with pytest.raises(NvidiaRivaException) as excinfo:
|
||||
impl.audio_transcriptions(
|
||||
model="m",
|
||||
audio_file=_make_wav_bytes(),
|
||||
optional_params={"language_code": "en-US"},
|
||||
litellm_params={},
|
||||
model_response=TranscriptionResponse(),
|
||||
timeout=60,
|
||||
logging_obj=logging_obj,
|
||||
api_key="nvapi-xxx",
|
||||
api_base="grpc.nvcf.nvidia.com:443",
|
||||
)
|
||||
|
||||
assert excinfo.value.status_code == 401
|
||||
assert "UNAUTHENTICATED" in excinfo.value.message
|
||||
@@ -0,0 +1,275 @@
|
||||
"""
|
||||
Unit tests for NvidiaRivaAudioTranscriptionConfig.
|
||||
|
||||
These tests do not require ``nvidia-riva-client`` or any audio libs to be
|
||||
installed; the transformation layer is intentionally pure-Python on dicts.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../../../.."))
|
||||
|
||||
from litellm.llms.base_llm.audio_transcription.transformation import (
|
||||
AudioTranscriptionRequestData,
|
||||
)
|
||||
from litellm.llms.nvidia_riva.audio_transcription.transformation import (
|
||||
NvidiaRivaAudioTranscriptionConfig,
|
||||
)
|
||||
from litellm.llms.nvidia_riva.common_utils import NvidiaRivaException
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cfg():
|
||||
return NvidiaRivaAudioTranscriptionConfig()
|
||||
|
||||
|
||||
def test_supported_openai_params(cfg):
|
||||
params = cfg.get_supported_openai_params(model="nvidia/parakeet-ctc-1_1b-asr")
|
||||
assert "language" in params
|
||||
assert "response_format" in params
|
||||
assert "timestamp_granularities" in params
|
||||
|
||||
|
||||
def test_map_language_normalizes_bare_codes(cfg):
|
||||
out = cfg.map_openai_params(
|
||||
non_default_params={"language": "en"},
|
||||
optional_params={},
|
||||
model="m",
|
||||
drop_params=False,
|
||||
)
|
||||
assert out["language_code"] == "en-US"
|
||||
|
||||
|
||||
def test_map_language_passes_through_bcp47(cfg):
|
||||
out = cfg.map_openai_params(
|
||||
non_default_params={"language": "de-DE"},
|
||||
optional_params={},
|
||||
model="m",
|
||||
drop_params=False,
|
||||
)
|
||||
assert out["language_code"] == "de-DE"
|
||||
|
||||
|
||||
def test_map_language_es_defaults_to_castilian_spain(cfg):
|
||||
"""
|
||||
Bare ``es`` is ISO-639 Spanish; in BCP-47 it conventionally resolves to
|
||||
es-ES (Castilian / Spain), not es-US. Routing every Spanish caller to a
|
||||
US-tuned Riva model would silently degrade accuracy.
|
||||
"""
|
||||
out = cfg.map_openai_params(
|
||||
non_default_params={"language": "es"},
|
||||
optional_params={},
|
||||
model="m",
|
||||
drop_params=False,
|
||||
)
|
||||
assert out["language_code"] == "es-ES"
|
||||
|
||||
|
||||
def test_map_timestamp_granularities_word_enables_word_offsets(cfg):
|
||||
out = cfg.map_openai_params(
|
||||
non_default_params={"timestamp_granularities": ["word"]},
|
||||
optional_params={},
|
||||
model="m",
|
||||
drop_params=False,
|
||||
)
|
||||
assert out["enable_word_time_offsets"] is True
|
||||
assert out["timestamp_granularities"] == ["word"]
|
||||
|
||||
|
||||
def test_map_timestamp_granularities_segment_only_does_not_enable_word_offsets(cfg):
|
||||
out = cfg.map_openai_params(
|
||||
non_default_params={"timestamp_granularities": ["segment"]},
|
||||
optional_params={},
|
||||
model="m",
|
||||
drop_params=False,
|
||||
)
|
||||
assert "enable_word_time_offsets" not in out
|
||||
|
||||
|
||||
def test_transform_request_builds_recognition_config(cfg):
|
||||
result = cfg.transform_audio_transcription_request(
|
||||
model="nvidia/parakeet-ctc-1_1b-asr",
|
||||
audio_file=b"fake-audio",
|
||||
optional_params={
|
||||
"language_code": "en-US",
|
||||
"enable_word_time_offsets": True,
|
||||
"nvcf_function_id": "abc-123",
|
||||
"use_ssl": True,
|
||||
"riva_model_name": "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer",
|
||||
},
|
||||
litellm_params={
|
||||
"api_base": "grpc.nvcf.nvidia.com:443",
|
||||
"api_key": "nvapi-xxx",
|
||||
},
|
||||
)
|
||||
|
||||
assert isinstance(result, AudioTranscriptionRequestData)
|
||||
payload = result.data
|
||||
assert payload["recognition_config"]["language_code"] == "en-US"
|
||||
assert payload["recognition_config"]["sample_rate_hertz"] == 16000
|
||||
assert payload["recognition_config"]["audio_channel_count"] == 1
|
||||
assert payload["recognition_config"]["encoding"] == "LINEAR_PCM"
|
||||
assert payload["recognition_config"]["enable_word_time_offsets"] is True
|
||||
assert (
|
||||
payload["recognition_config"]["model"]
|
||||
== "parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer"
|
||||
)
|
||||
assert "audio_file" not in payload
|
||||
assert "auth" not in payload
|
||||
|
||||
|
||||
def test_transform_request_default_riva_model_is_empty_for_auto_select(cfg):
|
||||
"""
|
||||
Riva auto-selects the deployed model when ``model`` is empty. This is
|
||||
the right default because internal NVIDIA deployment names change
|
||||
across versions/regions.
|
||||
"""
|
||||
result = cfg.transform_audio_transcription_request(
|
||||
model="nvidia/parakeet-ctc-1_1b-asr",
|
||||
audio_file=b"fake-audio",
|
||||
optional_params={"language_code": "en-US"},
|
||||
litellm_params={"api_base": "grpc.nvcf.nvidia.com:443"},
|
||||
)
|
||||
assert result.data["recognition_config"]["model"] == ""
|
||||
|
||||
|
||||
def test_chunking_strategy_server_vad_maps_to_endpointing_config(cfg):
|
||||
result = cfg.transform_audio_transcription_request(
|
||||
model="m",
|
||||
audio_file=b"x",
|
||||
optional_params={
|
||||
"chunking_strategy": {
|
||||
"type": "server_vad",
|
||||
"threshold": 0.5,
|
||||
"silence_duration_ms": 700,
|
||||
"prefix_padding_ms": 250,
|
||||
}
|
||||
},
|
||||
litellm_params={"api_base": "localhost:50051"},
|
||||
)
|
||||
ep = result.data["recognition_config"].get("endpointing_config")
|
||||
assert ep is not None
|
||||
assert ep["start_threshold"] == 0.5
|
||||
assert ep["stop_threshold"] == 0.5
|
||||
assert ep["stop_history"] == 700
|
||||
assert ep["stop_history_eou"] == 250
|
||||
|
||||
|
||||
def test_chunking_strategy_auto_leaves_endpointing_config_unset(cfg):
|
||||
result = cfg.transform_audio_transcription_request(
|
||||
model="m",
|
||||
audio_file=b"x",
|
||||
optional_params={"chunking_strategy": "auto"},
|
||||
litellm_params={"api_base": "localhost:50051"},
|
||||
)
|
||||
assert "endpointing_config" not in result.data["recognition_config"]
|
||||
|
||||
|
||||
def test_explicit_endpointing_config_pass_through(cfg):
|
||||
result = cfg.transform_audio_transcription_request(
|
||||
model="m",
|
||||
audio_file=b"x",
|
||||
optional_params={
|
||||
"endpointing_config": {"stop_history": 1200, "start_threshold": 0.3}
|
||||
},
|
||||
litellm_params={"api_base": "localhost:50051"},
|
||||
)
|
||||
ep = result.data["recognition_config"]["endpointing_config"]
|
||||
assert ep == {"stop_history": 1200, "start_threshold": 0.3}
|
||||
|
||||
|
||||
def test_build_transcription_response_text_format():
|
||||
final_results = [
|
||||
{"transcript": "Hello,", "words": []},
|
||||
{"transcript": " this is parakeet.", "words": []},
|
||||
]
|
||||
response = NvidiaRivaAudioTranscriptionConfig.build_transcription_response(
|
||||
final_results=final_results,
|
||||
response_format="json",
|
||||
duration_seconds=2.4,
|
||||
timestamp_granularities=None,
|
||||
)
|
||||
assert response.text == "Hello, this is parakeet."
|
||||
assert response["task"] == "transcribe"
|
||||
# duration is only attached for verbose_json
|
||||
assert "duration" not in response
|
||||
|
||||
|
||||
def test_build_transcription_response_skips_empty_chunks():
|
||||
final_results = [
|
||||
{"transcript": "", "words": []},
|
||||
{"transcript": "actual content", "words": []},
|
||||
{"transcript": "", "words": []},
|
||||
]
|
||||
response = NvidiaRivaAudioTranscriptionConfig.build_transcription_response(
|
||||
final_results=final_results,
|
||||
response_format="json",
|
||||
duration_seconds=1.0,
|
||||
timestamp_granularities=None,
|
||||
)
|
||||
assert response.text == "actual content"
|
||||
|
||||
|
||||
def test_build_transcription_response_verbose_json_with_words():
|
||||
final_results = [
|
||||
{
|
||||
"transcript": "Hello,",
|
||||
"words": [
|
||||
{"word": "Hello,", "start_time_ms": 0, "end_time_ms": 320},
|
||||
],
|
||||
},
|
||||
{
|
||||
"transcript": " world.",
|
||||
"words": [
|
||||
{"word": "world.", "start_time_ms": 480, "end_time_ms": 870},
|
||||
],
|
||||
},
|
||||
]
|
||||
response = NvidiaRivaAudioTranscriptionConfig.build_transcription_response(
|
||||
final_results=final_results,
|
||||
response_format="verbose_json",
|
||||
duration_seconds=2.475,
|
||||
timestamp_granularities=["word"],
|
||||
)
|
||||
|
||||
assert response.text == "Hello, world."
|
||||
assert response["duration"] == 2.475
|
||||
words = response["words"]
|
||||
assert words[0]["word"] == "Hello,"
|
||||
# Riva returns ms; OpenAI exposes seconds.
|
||||
assert words[0]["start"] == pytest.approx(0.0)
|
||||
assert words[0]["end"] == pytest.approx(0.32)
|
||||
assert words[1]["start"] == pytest.approx(0.48)
|
||||
assert words[1]["end"] == pytest.approx(0.87)
|
||||
|
||||
|
||||
def test_build_transcription_response_verbose_json_without_word_granularity_omits_words():
|
||||
final_results = [
|
||||
{
|
||||
"transcript": "Hi.",
|
||||
"words": [
|
||||
{"word": "Hi.", "start_time_ms": 0, "end_time_ms": 200},
|
||||
],
|
||||
}
|
||||
]
|
||||
response = NvidiaRivaAudioTranscriptionConfig.build_transcription_response(
|
||||
final_results=final_results,
|
||||
response_format="verbose_json",
|
||||
duration_seconds=0.2,
|
||||
timestamp_granularities=["segment"],
|
||||
)
|
||||
assert "words" not in response
|
||||
|
||||
|
||||
def test_transform_response_not_used_raises_clear_error(cfg):
|
||||
with pytest.raises(NotImplementedError):
|
||||
cfg.transform_audio_transcription_response(raw_response=None) # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_get_error_class_returns_nvidia_riva_exception(cfg):
|
||||
err = cfg.get_error_class(error_message="bad", status_code=401, headers={})
|
||||
assert isinstance(err, NvidiaRivaException)
|
||||
assert err.status_code == 401
|
||||
@@ -9,7 +9,7 @@ resolution-markers = [
|
||||
]
|
||||
|
||||
[options]
|
||||
exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values.
|
||||
exclude-newer = "2026-05-02T11:18:44.200141Z"
|
||||
exclude-newer-span = "P3D"
|
||||
|
||||
[manifest]
|
||||
@@ -339,6 +339,59 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "audioop-lts"
|
||||
version = "0.2.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/38/53/946db57842a50b2da2e0c1e34bd37f36f5aadba1a929a3971c5d7841dbca/audioop_lts-0.2.2.tar.gz", hash = "sha256:64d0c62d88e67b98a1a5e71987b7aa7b5bcffc7dcee65b635823dbdd0a8dbbd0", size = 30686, upload-time = "2025-08-05T16:43:17.409Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/de/d4/94d277ca941de5a507b07f0b592f199c22454eeaec8f008a286b3fbbacd6/audioop_lts-0.2.2-cp313-abi3-macosx_10_13_universal2.whl", hash = "sha256:fd3d4602dc64914d462924a08c1a9816435a2155d74f325853c1f1ac3b2d9800", size = 46523, upload-time = "2025-08-05T16:42:20.836Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/5a/656d1c2da4b555920ce4177167bfeb8623d98765594af59702c8873f60ec/audioop_lts-0.2.2-cp313-abi3-macosx_10_13_x86_64.whl", hash = "sha256:550c114a8df0aafe9a05442a1162dfc8fec37e9af1d625ae6060fed6e756f303", size = 27455, upload-time = "2025-08-05T16:42:22.283Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1b/83/ea581e364ce7b0d41456fb79d6ee0ad482beda61faf0cab20cbd4c63a541/audioop_lts-0.2.2-cp313-abi3-macosx_11_0_arm64.whl", hash = "sha256:9a13dc409f2564de15dd68be65b462ba0dde01b19663720c68c1140c782d1d75", size = 26997, upload-time = "2025-08-05T16:42:23.849Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/3b/e8964210b5e216e5041593b7d33e97ee65967f17c282e8510d19c666dab4/audioop_lts-0.2.2-cp313-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:51c916108c56aa6e426ce611946f901badac950ee2ddaf302b7ed35d9958970d", size = 85844, upload-time = "2025-08-05T16:42:25.208Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c7/2e/0a1c52faf10d51def20531a59ce4c706cb7952323b11709e10de324d6493/audioop_lts-0.2.2-cp313-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47eba38322370347b1c47024defbd36374a211e8dd5b0dcbce7b34fdb6f8847b", size = 85056, upload-time = "2025-08-05T16:42:26.559Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/75/e8/cd95eef479656cb75ab05dfece8c1f8c395d17a7c651d88f8e6e291a63ab/audioop_lts-0.2.2-cp313-abi3-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba7c3a7e5f23e215cb271516197030c32aef2e754252c4c70a50aaff7031a2c8", size = 93892, upload-time = "2025-08-05T16:42:27.902Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5c/1e/a0c42570b74f83efa5cca34905b3eef03f7ab09fe5637015df538a7f3345/audioop_lts-0.2.2-cp313-abi3-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:def246fe9e180626731b26e89816e79aae2276f825420a07b4a647abaa84becc", size = 96660, upload-time = "2025-08-05T16:42:28.9Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/50/d5/8a0ae607ca07dbb34027bac8db805498ee7bfecc05fd2c148cc1ed7646e7/audioop_lts-0.2.2-cp313-abi3-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e160bf9df356d841bb6c180eeeea1834085464626dc1b68fa4e1d59070affdc3", size = 79143, upload-time = "2025-08-05T16:42:29.929Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/17/0d28c46179e7910bfb0bb62760ccb33edb5de973052cb2230b662c14ca2e/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4b4cd51a57b698b2d06cb9993b7ac8dfe89a3b2878e96bc7948e9f19ff51dba6", size = 84313, upload-time = "2025-08-05T16:42:30.949Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/ba/bd5d3806641564f2024e97ca98ea8f8811d4e01d9b9f9831474bc9e14f9e/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_ppc64le.whl", hash = "sha256:4a53aa7c16a60a6857e6b0b165261436396ef7293f8b5c9c828a3a203147ed4a", size = 93044, upload-time = "2025-08-05T16:42:31.959Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/5e/435ce8d5642f1f7679540d1e73c1c42d933331c0976eb397d1717d7f01a3/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_riscv64.whl", hash = "sha256:3fc38008969796f0f689f1453722a0f463da1b8a6fbee11987830bfbb664f623", size = 78766, upload-time = "2025-08-05T16:42:33.302Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ae/3b/b909e76b606cbfd53875693ec8c156e93e15a1366a012f0b7e4fb52d3c34/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_s390x.whl", hash = "sha256:15ab25dd3e620790f40e9ead897f91e79c0d3ce65fe193c8ed6c26cffdd24be7", size = 87640, upload-time = "2025-08-05T16:42:34.854Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/e7/8f1603b4572d79b775f2140d7952f200f5e6c62904585d08a01f0a70393a/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:03f061a1915538fd96272bac9551841859dbb2e3bf73ebe4a23ef043766f5449", size = 86052, upload-time = "2025-08-05T16:42:35.839Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/96/c37846df657ccdda62ba1ae2b6534fa90e2e1b1742ca8dcf8ebd38c53801/audioop_lts-0.2.2-cp313-abi3-win32.whl", hash = "sha256:3bcddaaf6cc5935a300a8387c99f7a7fbbe212a11568ec6cf6e4bc458c048636", size = 26185, upload-time = "2025-08-05T16:42:37.04Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/a5/9d78fdb5b844a83da8a71226c7bdae7cc638861085fff7a1d707cb4823fa/audioop_lts-0.2.2-cp313-abi3-win_amd64.whl", hash = "sha256:a2c2a947fae7d1062ef08c4e369e0ba2086049a5e598fda41122535557012e9e", size = 30503, upload-time = "2025-08-05T16:42:38.427Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/25/20d8fde083123e90c61b51afb547bb0ea7e77bab50d98c0ab243d02a0e43/audioop_lts-0.2.2-cp313-abi3-win_arm64.whl", hash = "sha256:5f93a5db13927a37d2d09637ccca4b2b6b48c19cd9eda7b17a2e9f77edee6a6f", size = 24173, upload-time = "2025-08-05T16:42:39.704Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/58/a7/0a764f77b5c4ac58dc13c01a580f5d32ae8c74c92020b961556a43e26d02/audioop_lts-0.2.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:73f80bf4cd5d2ca7814da30a120de1f9408ee0619cc75da87d0641273d202a09", size = 47096, upload-time = "2025-08-05T16:42:40.684Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/aa/ed/ebebedde1a18848b085ad0fa54b66ceb95f1f94a3fc04f1cd1b5ccb0ed42/audioop_lts-0.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:106753a83a25ee4d6f473f2be6b0966fc1c9af7e0017192f5531a3e7463dce58", size = 27748, upload-time = "2025-08-05T16:42:41.992Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/6e/11ca8c21af79f15dbb1c7f8017952ee8c810c438ce4e2b25638dfef2b02c/audioop_lts-0.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fbdd522624141e40948ab3e8cdae6e04c748d78710e9f0f8d4dae2750831de19", size = 27329, upload-time = "2025-08-05T16:42:42.987Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/52/0022f93d56d85eec5da6b9da6a958a1ef09e80c39f2cc0a590c6af81dcbb/audioop_lts-0.2.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:143fad0311e8209ece30a8dbddab3b65ab419cbe8c0dde6e8828da25999be911", size = 92407, upload-time = "2025-08-05T16:42:44.336Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/87/1d/48a889855e67be8718adbc7a01f3c01d5743c325453a5e81cf3717664aad/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dfbbc74ec68a0fd08cfec1f4b5e8cca3d3cd7de5501b01c4b5d209995033cde9", size = 91811, upload-time = "2025-08-05T16:42:45.325Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/98/a6/94b7213190e8077547ffae75e13ed05edc488653c85aa5c41472c297d295/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cfcac6aa6f42397471e4943e0feb2244549db5c5d01efcd02725b96af417f3fe", size = 100470, upload-time = "2025-08-05T16:42:46.468Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/e9/78450d7cb921ede0cfc33426d3a8023a3bda755883c95c868ee36db8d48d/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:752d76472d9804ac60f0078c79cdae8b956f293177acd2316cd1e15149aee132", size = 103878, upload-time = "2025-08-05T16:42:47.576Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4f/e2/cd5439aad4f3e34ae1ee852025dc6aa8f67a82b97641e390bf7bd9891d3e/audioop_lts-0.2.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:83c381767e2cc10e93e40281a04852facc4cd9334550e0f392f72d1c0a9c5753", size = 84867, upload-time = "2025-08-05T16:42:49.003Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/68/4b/9d853e9076c43ebba0d411e8d2aa19061083349ac695a7d082540bad64d0/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c0022283e9556e0f3643b7c3c03f05063ca72b3063291834cca43234f20c60bb", size = 90001, upload-time = "2025-08-05T16:42:50.038Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/58/26/4bae7f9d2f116ed5593989d0e521d679b0d583973d203384679323d8fa85/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a2d4f1513d63c795e82948e1305f31a6d530626e5f9f2605408b300ae6095093", size = 99046, upload-time = "2025-08-05T16:42:51.111Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b2/67/a9f4fb3e250dda9e9046f8866e9fa7d52664f8985e445c6b4ad6dfb55641/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:c9c8e68d8b4a56fda8c025e538e639f8c5953f5073886b596c93ec9b620055e7", size = 84788, upload-time = "2025-08-05T16:42:52.198Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/f7/3de86562db0121956148bcb0fe5b506615e3bcf6e63c4357a612b910765a/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:96f19de485a2925314f5020e85911fb447ff5fbef56e8c7c6927851b95533a1c", size = 94472, upload-time = "2025-08-05T16:42:53.59Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f1/32/fd772bf9078ae1001207d2df1eef3da05bea611a87dd0e8217989b2848fa/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e541c3ef484852ef36545f66209444c48b28661e864ccadb29daddb6a4b8e5f5", size = 92279, upload-time = "2025-08-05T16:42:54.632Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4f/41/affea7181592ab0ab560044632571a38edaf9130b84928177823fbf3176a/audioop_lts-0.2.2-cp313-cp313t-win32.whl", hash = "sha256:d5e73fa573e273e4f2e5ff96f9043858a5e9311e94ffefd88a3186a910c70917", size = 26568, upload-time = "2025-08-05T16:42:55.627Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/28/2b/0372842877016641db8fc54d5c88596b542eec2f8f6c20a36fb6612bf9ee/audioop_lts-0.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9191d68659eda01e448188f60364c7763a7ca6653ed3f87ebb165822153a8547", size = 30942, upload-time = "2025-08-05T16:42:56.674Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/ca/baf2b9cc7e96c179bb4a54f30fcd83e6ecb340031bde68f486403f943768/audioop_lts-0.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:c174e322bb5783c099aaf87faeb240c8d210686b04bd61dfd05a8e5a83d88969", size = 24603, upload-time = "2025-08-05T16:42:57.571Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "audioread"
|
||||
version = "3.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "standard-aifc", marker = "python_full_version >= '3.13'" },
|
||||
{ name = "standard-sunau", marker = "python_full_version >= '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a1/4a/874ecf9b472f998130c2b5e145dcdb9f6131e84786111489103b66772143/audioread-3.1.0.tar.gz", hash = "sha256:1c4ab2f2972764c896a8ac61ac53e261c8d29f0c6ccd652f84e18f08a4cab190", size = 20082, upload-time = "2025-10-26T19:44:13.484Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7e/16/fbe8e1e185a45042f7cd3a282def5bb8d95bb69ab9e9ef6a5368aa17e426/audioread-3.1.0-py3-none-any.whl", hash = "sha256:b30d1df6c5d3de5dcef0fb0e256f6ea17bdcf5f979408df0297d8a408e2971b4", size = 23143, upload-time = "2025-10-26T19:44:12.016Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aurelio-sdk"
|
||||
version = "0.0.19"
|
||||
@@ -2176,6 +2229,59 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/67/58/317b0134129b556a93a3b0afe00ee675b5657f0155509e22fcb853bafe2d/grpcio_status-1.71.2-py3-none-any.whl", hash = "sha256:803c98cb6a8b7dc6dbb785b1111aed739f241ab5e9da0bba96888aa74704cfd3", size = 14424, upload-time = "2025-06-28T04:23:42.136Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio-tools"
|
||||
version = "1.71.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "grpcio" },
|
||||
{ name = "protobuf" },
|
||||
{ name = "setuptools" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ad/9a/edfefb47f11ef6b0f39eea4d8f022c5bb05ac1d14fcc7058e84a51305b73/grpcio_tools-1.71.2.tar.gz", hash = "sha256:b5304d65c7569b21270b568e404a5a843cf027c66552a6a0978b23f137679c09", size = 5330655, upload-time = "2025-06-28T04:22:00.308Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/dd/ad/e74a4d1cffff628c2ef1ec5b9944fb098207cc4af6eb8db4bc52e6d99236/grpcio_tools-1.71.2-cp310-cp310-linux_armv7l.whl", hash = "sha256:ab8a28c2e795520d6dc6ffd7efaef4565026dbf9b4f5270de2f3dd1ce61d2318", size = 2385557, upload-time = "2025-06-28T04:20:38.833Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/63/bf/30b63418279d6fdc4fd4a3781a7976c40c7e8ee052333b9ce6bd4ce63f30/grpcio_tools-1.71.2-cp310-cp310-macosx_10_14_universal2.whl", hash = "sha256:654ecb284a592d39a85556098b8c5125163435472a20ead79b805cf91814b99e", size = 5446915, upload-time = "2025-06-28T04:20:40.947Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/83/cd/2994e0a0a67714fdb00c207c4bec60b9b356fbd6b0b7a162ecaabe925155/grpcio_tools-1.71.2-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:b49aded2b6c890ff690d960e4399a336c652315c6342232c27bd601b3705739e", size = 2348301, upload-time = "2025-06-28T04:20:42.766Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5b/8b/4f2315927af306af1b35793b332b9ca9dc5b5a2cde2d55811c9577b5f03f/grpcio_tools-1.71.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7811a6fc1c4b4e5438e5eb98dbd52c2dc4a69d1009001c13356e6636322d41a", size = 2742159, upload-time = "2025-06-28T04:20:44.206Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8d/98/d513f6c09df405c82583e7083c20718ea615ed0da69ec42c80ceae7ebdc5/grpcio_tools-1.71.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:393a9c80596aa2b3f05af854e23336ea8c295593bbb35d9adae3d8d7943672bd", size = 2473444, upload-time = "2025-06-28T04:20:45.5Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fa/fe/00af17cc841916d5e4227f11036bf443ce006629212c876937c7904b0ba3/grpcio_tools-1.71.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:823e1f23c12da00f318404c4a834bb77cd150d14387dee9789ec21b335249e46", size = 2850339, upload-time = "2025-06-28T04:20:46.758Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7d/59/745fc50dfdbed875fcfd6433883270d39d23fb1aa4ecc9587786f772dce3/grpcio_tools-1.71.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9bfbea79d6aec60f2587133ba766ede3dc3e229641d1a1e61d790d742a3d19eb", size = 3300795, upload-time = "2025-06-28T04:20:48.327Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/62/3e/d9d0fb2df78e601c28d02ef0cd5d007f113c1b04fc21e72bf56e8c3df66b/grpcio_tools-1.71.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:32f3a67b10728835b5ffb63fbdbe696d00e19a27561b9cf5153e72dbb93021ba", size = 2913729, upload-time = "2025-06-28T04:20:49.641Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/09/ae/ddb264b4a10c6c10336a7c177f8738b230c2c473d0c91dd5d8ce8ea1b857/grpcio_tools-1.71.2-cp310-cp310-win32.whl", hash = "sha256:7fcf9d92c710bfc93a1c0115f25e7d49a65032ff662b38b2f704668ce0a938df", size = 945997, upload-time = "2025-06-28T04:20:50.9Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/8d/5efd93698fe359f63719d934ebb2d9337e82d396e13d6bf00f4b06793e37/grpcio_tools-1.71.2-cp310-cp310-win_amd64.whl", hash = "sha256:914b4275be810290266e62349f2d020bb7cc6ecf9edb81da3c5cddb61a95721b", size = 1117474, upload-time = "2025-06-28T04:20:52.54Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/17/e4/0568d38b8da6237ea8ea15abb960fb7ab83eb7bb51e0ea5926dab3d865b1/grpcio_tools-1.71.2-cp311-cp311-linux_armv7l.whl", hash = "sha256:0acb8151ea866be5b35233877fbee6445c36644c0aa77e230c9d1b46bf34b18b", size = 2385557, upload-time = "2025-06-28T04:20:54.323Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/76/fb/700d46f72b0f636cf0e625f3c18a4f74543ff127471377e49a071f64f1e7/grpcio_tools-1.71.2-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:b28f8606f4123edb4e6da281547465d6e449e89f0c943c376d1732dc65e6d8b3", size = 5447590, upload-time = "2025-06-28T04:20:55.836Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/69/d9bb2aec3de305162b23c5c884b9f79b1a195d42b1e6dabcc084cc9d0804/grpcio_tools-1.71.2-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:cbae6f849ad2d1f5e26cd55448b9828e678cb947fa32c8729d01998238266a6a", size = 2348495, upload-time = "2025-06-28T04:20:57.33Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/83/f840aba1690461b65330efbca96170893ee02fae66651bcc75f28b33a46c/grpcio_tools-1.71.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4d1027615cfb1e9b1f31f2f384251c847d68c2f3e025697e5f5c72e26ed1316", size = 2742333, upload-time = "2025-06-28T04:20:59.051Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/34/c02cd9b37de26045190ba665ee6ab8597d47f033d098968f812d253bbf8c/grpcio_tools-1.71.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9bac95662dc69338edb9eb727cc3dd92342131b84b12b3e8ec6abe973d4cbf1b", size = 2473490, upload-time = "2025-06-28T04:21:00.614Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4d/c7/375718ae091c8f5776828ce97bdcb014ca26244296f8b7f70af1a803ed2f/grpcio_tools-1.71.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c50250c7248055040f89eb29ecad39d3a260a4b6d3696af1575945f7a8d5dcdc", size = 2850333, upload-time = "2025-06-28T04:21:01.95Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/37/efc69345bd92a73b2bc80f4f9e53d42dfdc234b2491ae58c87da20ca0ea5/grpcio_tools-1.71.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6ab1ad955e69027ef12ace4d700c5fc36341bdc2f420e87881e9d6d02af3d7b8", size = 3300748, upload-time = "2025-06-28T04:21:03.451Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d2/1f/15f787eb25ae42086f55ed3e4260e85f385921c788debf0f7583b34446e3/grpcio_tools-1.71.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dd75dde575781262b6b96cc6d0b2ac6002b2f50882bf5e06713f1bf364ee6e09", size = 2913178, upload-time = "2025-06-28T04:21:04.879Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/aa/69cb3a9dff7d143a05e4021c3c9b5cde07aacb8eb1c892b7c5b9fb4973e3/grpcio_tools-1.71.2-cp311-cp311-win32.whl", hash = "sha256:9a3cb244d2bfe0d187f858c5408d17cb0e76ca60ec9a274c8fd94cc81457c7fc", size = 946256, upload-time = "2025-06-28T04:21:06.518Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/df/fb951c5c87eadb507a832243942e56e67d50d7667b0e5324616ffd51b845/grpcio_tools-1.71.2-cp311-cp311-win_amd64.whl", hash = "sha256:00eb909997fd359a39b789342b476cbe291f4dd9c01ae9887a474f35972a257e", size = 1117661, upload-time = "2025-06-28T04:21:08.18Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/d3/3ed30a9c5b2424627b4b8411e2cd6a1a3f997d3812dbc6a8630a78bcfe26/grpcio_tools-1.71.2-cp312-cp312-linux_armv7l.whl", hash = "sha256:bfc0b5d289e383bc7d317f0e64c9dfb59dc4bef078ecd23afa1a816358fb1473", size = 2385479, upload-time = "2025-06-28T04:21:10.413Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/54/61/e0b7295456c7e21ef777eae60403c06835160c8d0e1e58ebfc7d024c51d3/grpcio_tools-1.71.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:b4669827716355fa913b1376b1b985855d5cfdb63443f8d18faf210180199006", size = 5431521, upload-time = "2025-06-28T04:21:12.261Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/75/d7/7bcad6bcc5f5b7fab53e6bce5db87041f38ef3e740b1ec2d8c49534fa286/grpcio_tools-1.71.2-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:d4071f9b44564e3f75cdf0f05b10b3e8c7ea0ca5220acbf4dc50b148552eef2f", size = 2350289, upload-time = "2025-06-28T04:21:13.625Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b2/8a/e4c1c4cb8c9ff7f50b7b2bba94abe8d1e98ea05f52a5db476e7f1c1a3c70/grpcio_tools-1.71.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a28eda8137d587eb30081384c256f5e5de7feda34776f89848b846da64e4be35", size = 2743321, upload-time = "2025-06-28T04:21:15.007Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fd/aa/95bc77fda5c2d56fb4a318c1b22bdba8914d5d84602525c99047114de531/grpcio_tools-1.71.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b19c083198f5eb15cc69c0a2f2c415540cbc636bfe76cea268e5894f34023b40", size = 2474005, upload-time = "2025-06-28T04:21:16.443Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c9/ff/ca11f930fe1daa799ee0ce1ac9630d58a3a3deed3dd2f465edb9a32f299d/grpcio_tools-1.71.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:784c284acda0d925052be19053d35afbf78300f4d025836d424cf632404f676a", size = 2851559, upload-time = "2025-06-28T04:21:18.139Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/64/10/c6fc97914c7e19c9bb061722e55052fa3f575165da9f6510e2038d6e8643/grpcio_tools-1.71.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:381e684d29a5d052194e095546eef067201f5af30fd99b07b5d94766f44bf1ae", size = 3300622, upload-time = "2025-06-28T04:21:20.291Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/d6/965f36cfc367c276799b730d5dd1311b90a54a33726e561393b808339b04/grpcio_tools-1.71.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3e4b4801fabd0427fc61d50d09588a01b1cfab0ec5e8a5f5d515fbdd0891fd11", size = 2913863, upload-time = "2025-06-28T04:21:22.196Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8d/f0/c05d5c3d0c1d79ac87df964e9d36f1e3a77b60d948af65bec35d3e5c75a3/grpcio_tools-1.71.2-cp312-cp312-win32.whl", hash = "sha256:84ad86332c44572305138eafa4cc30040c9a5e81826993eae8227863b700b490", size = 945744, upload-time = "2025-06-28T04:21:23.463Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/e9/c84c1078f0b7af7d8a40f5214a9bdd8d2a567ad6c09975e6e2613a08d29d/grpcio_tools-1.71.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e1108d37eecc73b1c4a27350a6ed921b5dda25091700c1da17cfe30761cd462", size = 1117695, upload-time = "2025-06-28T04:21:25.22Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/60/9c/bdf9c5055a1ad0a09123402d73ecad3629f75b9cf97828d547173b328891/grpcio_tools-1.71.2-cp313-cp313-linux_armv7l.whl", hash = "sha256:b0f0a8611614949c906e25c225e3360551b488d10a366c96d89856bcef09f729", size = 2384758, upload-time = "2025-06-28T04:21:26.712Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/49/d0/6aaee4940a8fb8269c13719f56d69c8d39569bee272924086aef81616d4a/grpcio_tools-1.71.2-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:7931783ea7ac42ac57f94c5047d00a504f72fbd96118bf7df911bb0e0435fc0f", size = 5443127, upload-time = "2025-06-28T04:21:28.383Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/11/50a471dcf301b89c0ed5ab92c533baced5bd8f796abfd133bbfadf6b60e5/grpcio_tools-1.71.2-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:d188dc28e069aa96bb48cb11b1338e47ebdf2e2306afa58a8162cc210172d7a8", size = 2349627, upload-time = "2025-06-28T04:21:30.254Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bb/66/e3dc58362a9c4c2fbe98a7ceb7e252385777ebb2bbc7f42d5ab138d07ace/grpcio_tools-1.71.2-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f36c4b3cc42ad6ef67430639174aaf4a862d236c03c4552c4521501422bfaa26", size = 2742932, upload-time = "2025-06-28T04:21:32.325Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b7/1e/1e07a07ed8651a2aa9f56095411198385a04a628beba796f36d98a5a03ec/grpcio_tools-1.71.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4bd9ed12ce93b310f0cef304176049d0bc3b9f825e9c8c6a23e35867fed6affd", size = 2473627, upload-time = "2025-06-28T04:21:33.752Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d3/f9/3b7b32e4acb419f3a0b4d381bc114fe6cd48e3b778e81273fc9e4748caad/grpcio_tools-1.71.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7ce27e76dd61011182d39abca38bae55d8a277e9b7fe30f6d5466255baccb579", size = 2850879, upload-time = "2025-06-28T04:21:35.241Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/99/cd9e1acd84315ce05ad1fcdfabf73b7df43807cf00c3b781db372d92b899/grpcio_tools-1.71.2-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:dcc17bf59b85c3676818f2219deacac0156492f32ca165e048427d2d3e6e1157", size = 3300216, upload-time = "2025-06-28T04:21:36.826Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9f/c0/66eab57b14550c5b22404dbf60635c9e33efa003bd747211981a9859b94b/grpcio_tools-1.71.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:706360c71bdd722682927a1fb517c276ccb816f1e30cb71f33553e5817dc4031", size = 2913521, upload-time = "2025-06-28T04:21:38.347Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/05/9b/7c90af8f937d77005625d705ab1160bc42a7e7b021ee5c788192763bccd6/grpcio_tools-1.71.2-cp313-cp313-win32.whl", hash = "sha256:bcf751d5a81c918c26adb2d6abcef71035c77d6eb9dd16afaf176ee096e22c1d", size = 945322, upload-time = "2025-06-28T04:21:39.864Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5f/80/6db6247f767c94fe551761772f89ceea355ff295fd4574cb8efc8b2d1199/grpcio_tools-1.71.2-cp313-cp313-win_amd64.whl", hash = "sha256:b1581a1133552aba96a730178bc44f6f1a071f0eb81c5b6bc4c0f89f5314e2b8", size = 1117234, upload-time = "2025-06-28T04:21:41.893Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gunicorn"
|
||||
version = "23.0.0"
|
||||
@@ -3174,6 +3280,13 @@ semantic-router = [
|
||||
{ name = "aurelio-sdk" },
|
||||
{ name = "semantic-router" },
|
||||
]
|
||||
stt-nvidia-riva = [
|
||||
{ name = "audioread" },
|
||||
{ name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },
|
||||
{ name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
|
||||
{ name = "nvidia-riva-client" },
|
||||
{ name = "soundfile" },
|
||||
]
|
||||
utils = [
|
||||
{ name = "numpydoc" },
|
||||
]
|
||||
@@ -3264,6 +3377,7 @@ requires-dist = [
|
||||
{ name = "aiohttp", specifier = "==3.13.4" },
|
||||
{ name = "anthropic", extras = ["vertex"], marker = "extra == 'proxy-runtime'", specifier = "==0.84.0" },
|
||||
{ name = "apscheduler", marker = "extra == 'proxy'", specifier = "==3.11.2" },
|
||||
{ name = "audioread", marker = "extra == 'stt-nvidia-riva'", specifier = ">=3.0.1" },
|
||||
{ name = "aurelio-sdk", marker = "python_full_version < '3.14' and extra == 'semantic-router'", specifier = "==0.0.19" },
|
||||
{ name = "azure-ai-contentsafety", marker = "extra == 'proxy-runtime'", specifier = "==1.0.0" },
|
||||
{ name = "azure-identity", marker = "extra == 'extra-proxy'", specifier = "==1.25.2" },
|
||||
@@ -3300,7 +3414,9 @@ requires-dist = [
|
||||
{ name = "mangum", marker = "extra == 'proxy-runtime'", specifier = "==0.17.0" },
|
||||
{ name = "mcp", marker = "extra == 'proxy'", specifier = "==1.26.0" },
|
||||
{ name = "mlflow", marker = "extra == 'mlflow'", specifier = "==3.11.1" },
|
||||
{ name = "numpy", marker = "extra == 'stt-nvidia-riva'", specifier = ">=1.26.0" },
|
||||
{ name = "numpydoc", marker = "extra == 'utils'", specifier = "==1.8.0" },
|
||||
{ name = "nvidia-riva-client", marker = "extra == 'stt-nvidia-riva'", specifier = ">=2.15.0" },
|
||||
{ name = "openai", specifier = "==2.33.0" },
|
||||
{ name = "opentelemetry-api", marker = "extra == 'proxy-runtime'", specifier = "==1.28.0" },
|
||||
{ name = "opentelemetry-exporter-otlp", marker = "extra == 'proxy-runtime'", specifier = "==1.28.0" },
|
||||
@@ -3325,13 +3441,14 @@ requires-dist = [
|
||||
{ name = "semantic-router", marker = "python_full_version < '3.14' and extra == 'semantic-router'", specifier = "==0.1.12" },
|
||||
{ name = "sentry-sdk", marker = "extra == 'proxy-runtime'", specifier = "==2.21.0" },
|
||||
{ name = "soundfile", marker = "extra == 'proxy'", specifier = "==0.12.1" },
|
||||
{ name = "soundfile", marker = "extra == 'stt-nvidia-riva'", specifier = ">=0.12.1" },
|
||||
{ name = "tiktoken", specifier = "==0.12.0" },
|
||||
{ name = "tokenizers", specifier = "==0.23.1" },
|
||||
{ name = "uvicorn", marker = "extra == 'proxy'", specifier = "==0.33.0" },
|
||||
{ name = "uvloop", marker = "sys_platform != 'win32' and extra == 'proxy'", specifier = "==0.21.0" },
|
||||
{ name = "websockets", marker = "extra == 'proxy'", specifier = "==15.0.1" },
|
||||
]
|
||||
provides-extras = ["proxy", "extra-proxy", "utils", "caching", "semantic-router", "mlflow", "grpc", "google", "proxy-runtime"]
|
||||
provides-extras = ["proxy", "extra-proxy", "utils", "caching", "semantic-router", "mlflow", "grpc", "stt-nvidia-riva", "google", "proxy-runtime"]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
ci = [
|
||||
@@ -4156,6 +4273,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/45/56d99ba9366476cd8548527667f01869279cedb9e66b28eb4dfb27701679/numpydoc-1.8.0-py3-none-any.whl", hash = "sha256:72024c7fd5e17375dec3608a27c03303e8ad00c81292667955c6fea7a3ccf541", size = 64003, upload-time = "2024-08-09T15:52:37.276Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nvidia-riva-client"
|
||||
version = "2.16.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "grpcio-tools" },
|
||||
{ name = "setuptools" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9d/82/0484c225bebe7ed37334474fba5c6ac7228638e692b84da0a0e7f2395672/nvidia_riva_client-2.16.0-py3-none-any.whl", hash = "sha256:99ef37b8f487d75a70c053736848221e09b728e5c910fb476333d375bd4347a3", size = 45491, upload-time = "2024-07-02T14:54:22.63Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "oauthlib"
|
||||
version = "3.3.1"
|
||||
@@ -7068,6 +7197,40 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/07/45c21ed03d708c477367305726b89919b020a3a2a01f72aaf5ad941caf35/sse_starlette-3.4.1-py3-none-any.whl", hash = "sha256:6b43cf21f1d574d582a6e1b0cfbde1c94dc86a32a701a7168c99c4475c6bd1d0", size = 16487, upload-time = "2026-04-26T13:32:30.819Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "standard-aifc"
|
||||
version = "3.13.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "audioop-lts", marker = "python_full_version >= '3.13'" },
|
||||
{ name = "standard-chunk", marker = "python_full_version >= '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c4/53/6050dc3dde1671eb3db592c13b55a8005e5040131f7509cef0215212cb84/standard_aifc-3.13.0.tar.gz", hash = "sha256:64e249c7cb4b3daf2fdba4e95721f811bde8bdfc43ad9f936589b7bb2fae2e43", size = 15240, upload-time = "2024-10-30T16:01:31.772Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c3/52/5fbb203394cc852334d1575cc020f6bcec768d2265355984dfd361968f36/standard_aifc-3.13.0-py3-none-any.whl", hash = "sha256:f7ae09cc57de1224a0dd8e3eb8f73830be7c3d0bc485de4c1f82b4a7f645ac66", size = 10492, upload-time = "2024-10-30T16:01:07.071Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "standard-chunk"
|
||||
version = "3.13.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/43/06/ce1bb165c1f111c7d23a1ad17204d67224baa69725bb6857a264db61beaf/standard_chunk-3.13.0.tar.gz", hash = "sha256:4ac345d37d7e686d2755e01836b8d98eda0d1a3ee90375e597ae43aaf064d654", size = 4672, upload-time = "2024-10-30T16:18:28.326Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7a/90/a5c1084d87767d787a6caba615aa50dc587229646308d9420c960cb5e4c0/standard_chunk-3.13.0-py3-none-any.whl", hash = "sha256:17880a26c285189c644bd5bd8f8ed2bdb795d216e3293e6dbe55bbd848e2982c", size = 4944, upload-time = "2024-10-30T16:18:26.694Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "standard-sunau"
|
||||
version = "3.13.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "audioop-lts", marker = "python_full_version >= '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/66/e3/ce8d38cb2d70e05ffeddc28bb09bad77cfef979eb0a299c9117f7ed4e6a9/standard_sunau-3.13.0.tar.gz", hash = "sha256:b319a1ac95a09a2378a8442f403c66f4fd4b36616d6df6ae82b8e536ee790908", size = 9368, upload-time = "2024-10-30T16:01:41.626Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/34/ae/e3707f6c1bc6f7aa0df600ba8075bfb8a19252140cd595335be60e25f9ee/standard_sunau-3.13.0-py3-none-any.whl", hash = "sha256:53af624a9529c41062f4c2fd33837f297f3baa196b0cfceffea6555654602622", size = 7364, upload-time = "2024-10-30T16:01:28.003Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "starlette"
|
||||
version = "0.50.0"
|
||||
|
||||
Reference in New Issue
Block a user