mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 22:48:35 +00:00
279 lines
9.6 KiB
Python
279 lines
9.6 KiB
Python
"""
|
|
Integration tests for SageMaker Nova provider.
|
|
|
|
These tests require a live SageMaker Nova endpoint and AWS credentials.
|
|
They are skipped by default — run manually with:
|
|
|
|
pytest tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py -v --no-header -rN
|
|
|
|
Prerequisites:
|
|
export AWS_PROFILE=<your-profile> # or set AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY
|
|
export AWS_REGION_NAME=us-east-1
|
|
export SAGEMAKER_NOVA_ENDPOINT=<your-endpoint-name>
|
|
"""
|
|
|
|
import base64
|
|
import io
|
|
import json
|
|
import os
|
|
import struct
|
|
import zlib
|
|
|
|
import pytest
|
|
|
|
import litellm
|
|
|
|
ENDPOINT = os.environ.get("SAGEMAKER_NOVA_ENDPOINT", "")
|
|
MODEL = f"sagemaker_nova/{ENDPOINT}"
|
|
|
|
skip_if_no_endpoint = pytest.mark.skipif(
|
|
not ENDPOINT,
|
|
reason="SAGEMAKER_NOVA_ENDPOINT not set — skipping live integration tests",
|
|
)
|
|
|
|
|
|
def _make_test_png() -> str:
|
|
"""Create a minimal 4x4 PNG (red border, blue center) and return base64."""
|
|
|
|
def chunk(ctype, data):
|
|
c = ctype + data
|
|
return (
|
|
struct.pack(">I", len(data))
|
|
+ c
|
|
+ struct.pack(">I", zlib.crc32(c) & 0xFFFFFFFF)
|
|
)
|
|
|
|
width, height = 4, 4
|
|
pixels = []
|
|
for y in range(height):
|
|
for x in range(width):
|
|
if 1 <= x <= 2 and 1 <= y <= 2:
|
|
pixels.append((0, 0, 255))
|
|
else:
|
|
pixels.append((255, 0, 0))
|
|
|
|
raw = b""
|
|
for y in range(height):
|
|
raw += b"\x00"
|
|
for x in range(width):
|
|
raw += bytes(pixels[y * width + x])
|
|
|
|
png = (
|
|
b"\x89PNG\r\n\x1a\n"
|
|
+ chunk(b"IHDR", struct.pack(">IIBBBBB", width, height, 8, 2, 0, 0, 0))
|
|
+ chunk(b"IDAT", zlib.compress(raw))
|
|
+ chunk(b"IEND", b"")
|
|
)
|
|
return base64.b64encode(png).decode()
|
|
|
|
|
|
@skip_if_no_endpoint
|
|
class TestSagemakerNovaIntegration:
|
|
"""Live integration tests for sagemaker_nova provider."""
|
|
|
|
def test_should_complete_basic_single_turn(self):
|
|
"""Basic single-turn chat completion."""
|
|
response = litellm.completion(
|
|
model=MODEL,
|
|
messages=[{"role": "user", "content": "What is 2+2? Reply in one word."}],
|
|
max_tokens=32,
|
|
temperature=0.1,
|
|
)
|
|
assert response.choices[0].message.content is not None
|
|
assert len(response.choices[0].message.content.strip()) > 0
|
|
assert response.choices[0].finish_reason == "stop"
|
|
assert response.usage.prompt_tokens > 0
|
|
assert response.usage.completion_tokens > 0
|
|
assert response.usage.total_tokens == (
|
|
response.usage.prompt_tokens + response.usage.completion_tokens
|
|
)
|
|
|
|
def test_should_complete_multi_turn_conversation(self):
|
|
"""Multi-turn conversation maintains context."""
|
|
messages = [
|
|
{"role": "user", "content": "My name is Alice."},
|
|
]
|
|
response1 = litellm.completion(
|
|
model=MODEL,
|
|
messages=messages,
|
|
max_tokens=64,
|
|
temperature=0.1,
|
|
)
|
|
assistant_msg = response1.choices[0].message.content
|
|
assert assistant_msg is not None
|
|
|
|
# Second turn — model should remember the name
|
|
messages.append({"role": "assistant", "content": assistant_msg})
|
|
messages.append({"role": "user", "content": "What is my name?"})
|
|
|
|
response2 = litellm.completion(
|
|
model=MODEL,
|
|
messages=messages,
|
|
max_tokens=64,
|
|
temperature=0.1,
|
|
)
|
|
answer = response2.choices[0].message.content.lower()
|
|
assert "alice" in answer, f"Expected 'alice' in response, got: {answer}"
|
|
|
|
def test_should_stream_response(self):
|
|
"""Streaming returns chunks with content and final usage."""
|
|
response = litellm.completion(
|
|
model=MODEL,
|
|
messages=[{"role": "user", "content": "Count from 1 to 5."}],
|
|
max_tokens=64,
|
|
stream=True,
|
|
stream_options={"include_usage": True},
|
|
)
|
|
|
|
chunks = []
|
|
full_content = ""
|
|
for chunk in response:
|
|
chunks.append(chunk)
|
|
delta = chunk.choices[0].delta.content or ""
|
|
full_content += delta
|
|
|
|
assert len(chunks) > 1, "Expected multiple streaming chunks"
|
|
assert len(full_content.strip()) > 0, "Expected non-empty streamed content"
|
|
|
|
# Last chunk should have finish_reason
|
|
final_chunks_with_finish = [
|
|
c for c in chunks if c.choices and c.choices[0].finish_reason is not None
|
|
]
|
|
assert (
|
|
len(final_chunks_with_finish) > 0
|
|
), "Expected at least one chunk with finish_reason"
|
|
|
|
def test_should_return_logprobs(self):
|
|
"""Logprobs are returned when requested."""
|
|
response = litellm.completion(
|
|
model=MODEL,
|
|
messages=[{"role": "user", "content": "Say hello."}],
|
|
max_tokens=16,
|
|
temperature=0.1,
|
|
logprobs=True,
|
|
top_logprobs=3,
|
|
)
|
|
lp = response.choices[0].logprobs
|
|
assert lp is not None, "Expected logprobs in response"
|
|
|
|
content = lp.content if hasattr(lp, "content") else lp.get("content")
|
|
assert content is not None and len(content) > 0, "Expected logprobs content"
|
|
|
|
first_token = content[0]
|
|
assert "token" in first_token or hasattr(first_token, "token")
|
|
assert "logprob" in first_token or hasattr(first_token, "logprob")
|
|
|
|
top = (
|
|
first_token.get("top_logprobs")
|
|
if isinstance(first_token, dict)
|
|
else first_token.top_logprobs
|
|
)
|
|
assert top is not None and len(top) == 3, "Expected 3 top_logprobs"
|
|
|
|
def test_should_handle_multimodal_image_input(self):
|
|
"""Multimodal with base64 image in content array."""
|
|
b64_image = _make_test_png()
|
|
response = litellm.completion(
|
|
model=MODEL,
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": "What colors do you see in this image? List them.",
|
|
},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {"url": f"data:image/png;base64,{b64_image}"},
|
|
},
|
|
],
|
|
}
|
|
],
|
|
max_tokens=128,
|
|
)
|
|
content = response.choices[0].message.content.lower()
|
|
assert response.choices[0].message.content is not None
|
|
assert len(content) > 0
|
|
# The image has red and blue — model should mention at least one
|
|
assert (
|
|
"red" in content or "blue" in content
|
|
), f"Expected 'red' or 'blue' in multimodal response, got: {content}"
|
|
|
|
def test_should_pass_nova_specific_params(self):
|
|
"""Nova-specific parameters (top_k) are accepted."""
|
|
response = litellm.completion(
|
|
model=MODEL,
|
|
messages=[{"role": "user", "content": "Say hello."}],
|
|
max_tokens=32,
|
|
top_k=40,
|
|
temperature=0.7,
|
|
)
|
|
assert response.choices[0].message.content is not None
|
|
assert response.usage.total_tokens > 0
|
|
|
|
def test_should_respect_system_message(self):
|
|
"""System message should influence the response."""
|
|
response = litellm.completion(
|
|
model=MODEL,
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "You are a pirate. Always respond in pirate speak.",
|
|
},
|
|
{"role": "user", "content": "How are you today?"},
|
|
],
|
|
max_tokens=128,
|
|
temperature=0.7,
|
|
)
|
|
content = response.choices[0].message.content.lower()
|
|
assert response.choices[0].message.content is not None
|
|
# Pirate-themed words likely in response
|
|
pirate_words = ["arr", "ahoy", "matey", "ye", "sail", "sea", "cap"]
|
|
assert any(
|
|
w in content for w in pirate_words
|
|
), f"Expected pirate speak, got: {content}"
|
|
|
|
|
|
NOVA2_ENDPOINT = os.environ.get("SAGEMAKER_NOVA2_LITE_ENDPOINT", "")
|
|
NOVA2_MODEL = f"sagemaker_nova/{NOVA2_ENDPOINT}"
|
|
|
|
skip_if_no_nova2_endpoint = pytest.mark.skipif(
|
|
not NOVA2_ENDPOINT,
|
|
reason="SAGEMAKER_NOVA2_LITE_ENDPOINT not set — requires Nova 2 Lite endpoint",
|
|
)
|
|
|
|
|
|
@skip_if_no_nova2_endpoint
|
|
class TestSagemakerNova2LiteIntegration:
|
|
"""
|
|
Integration tests requiring a Nova 2 Lite endpoint (reasoning_effort support).
|
|
|
|
Run with:
|
|
export SAGEMAKER_NOVA2_LITE_ENDPOINT=<your-nova-2-lite-endpoint>
|
|
pytest tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py::TestSagemakerNova2LiteIntegration -v
|
|
"""
|
|
|
|
def test_should_accept_reasoning_effort_low(self):
|
|
"""reasoning_effort='low' should be accepted by Nova 2 Lite."""
|
|
response = litellm.completion(
|
|
model=NOVA2_MODEL,
|
|
messages=[{"role": "user", "content": "What is 2+2?"}],
|
|
max_tokens=32,
|
|
reasoning_effort="low",
|
|
)
|
|
assert response.choices[0].message.content is not None
|
|
assert response.usage.total_tokens > 0
|
|
|
|
def test_should_accept_reasoning_effort_high(self):
|
|
"""reasoning_effort='high' should be accepted by Nova 2 Lite."""
|
|
response = litellm.completion(
|
|
model=NOVA2_MODEL,
|
|
messages=[{"role": "user", "content": "Explain why the sky is blue."}],
|
|
max_tokens=256,
|
|
reasoning_effort="high",
|
|
)
|
|
assert response.choices[0].message.content is not None
|
|
assert len(response.choices[0].message.content) > 0
|
|
assert response.usage.completion_tokens > 0
|