litellm/tests/local_testing/test_sagemaker_nova_integration.py

"""
Integration tests for SageMaker Nova provider.

These tests require a live SageMaker Nova endpoint and AWS credentials.
They are skipped by default — run manually with:

    pytest tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py -v --no-header -rN

Prerequisites:
    export AWS_PROFILE=<your-profile>      # or set AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY
    export AWS_REGION_NAME=us-east-1
    export SAGEMAKER_NOVA_ENDPOINT=<your-endpoint-name>
"""

import base64
import io
import json
import os
import struct
import zlib

import pytest

import litellm

ENDPOINT = os.environ.get("SAGEMAKER_NOVA_ENDPOINT", "")
MODEL = f"sagemaker_nova/{ENDPOINT}"

skip_if_no_endpoint = pytest.mark.skipif(
    not ENDPOINT,
    reason="SAGEMAKER_NOVA_ENDPOINT not set — skipping live integration tests",
)


def _make_test_png() -> str:
    """Create a minimal 4x4 PNG (red border, blue center) and return base64."""

    def chunk(ctype, data):
        c = ctype + data
        return (
            struct.pack(">I", len(data))
            + c
            + struct.pack(">I", zlib.crc32(c) & 0xFFFFFFFF)
        )

    width, height = 4, 4
    pixels = []
    for y in range(height):
        for x in range(width):
            if 1 <= x <= 2 and 1 <= y <= 2:
                pixels.append((0, 0, 255))
            else:
                pixels.append((255, 0, 0))

    raw = b""
    for y in range(height):
        raw += b"\x00"
        for x in range(width):
            raw += bytes(pixels[y * width + x])

    png = (
        b"\x89PNG\r\n\x1a\n"
        + chunk(b"IHDR", struct.pack(">IIBBBBB", width, height, 8, 2, 0, 0, 0))
        + chunk(b"IDAT", zlib.compress(raw))
        + chunk(b"IEND", b"")
    )
    return base64.b64encode(png).decode()


@skip_if_no_endpoint
class TestSagemakerNovaIntegration:
    """Live integration tests for sagemaker_nova provider."""

    def test_should_complete_basic_single_turn(self):
        """Basic single-turn chat completion."""
        response = litellm.completion(
            model=MODEL,
            messages=[{"role": "user", "content": "What is 2+2? Reply in one word."}],
            max_tokens=32,
            temperature=0.1,
        )
        assert response.choices[0].message.content is not None
        assert len(response.choices[0].message.content.strip()) > 0
        assert response.choices[0].finish_reason == "stop"
        assert response.usage.prompt_tokens > 0
        assert response.usage.completion_tokens > 0
        assert response.usage.total_tokens == (
            response.usage.prompt_tokens + response.usage.completion_tokens
        )

    def test_should_complete_multi_turn_conversation(self):
        """Multi-turn conversation maintains context."""
        messages = [
            {"role": "user", "content": "My name is Alice."},
        ]
        response1 = litellm.completion(
            model=MODEL,
            messages=messages,
            max_tokens=64,
            temperature=0.1,
        )
        assistant_msg = response1.choices[0].message.content
        assert assistant_msg is not None

        # Second turn — model should remember the name
        messages.append({"role": "assistant", "content": assistant_msg})
        messages.append({"role": "user", "content": "What is my name?"})

        response2 = litellm.completion(
            model=MODEL,
            messages=messages,
            max_tokens=64,
            temperature=0.1,
        )
        answer = response2.choices[0].message.content.lower()
        assert "alice" in answer, f"Expected 'alice' in response, got: {answer}"

    def test_should_stream_response(self):
        """Streaming returns chunks with content and final usage."""
        response = litellm.completion(
            model=MODEL,
            messages=[{"role": "user", "content": "Count from 1 to 5."}],
            max_tokens=64,
            stream=True,
            stream_options={"include_usage": True},
        )

        chunks = []
        full_content = ""
        for chunk in response:
            chunks.append(chunk)
            delta = chunk.choices[0].delta.content or ""
            full_content += delta

        assert len(chunks) > 1, "Expected multiple streaming chunks"
        assert len(full_content.strip()) > 0, "Expected non-empty streamed content"

        # Last chunk should have finish_reason
        final_chunks_with_finish = [
            c for c in chunks if c.choices and c.choices[0].finish_reason is not None
        ]
        assert (
            len(final_chunks_with_finish) > 0
        ), "Expected at least one chunk with finish_reason"

    def test_should_return_logprobs(self):
        """Logprobs are returned when requested."""
        response = litellm.completion(
            model=MODEL,
            messages=[{"role": "user", "content": "Say hello."}],
            max_tokens=16,
            temperature=0.1,
            logprobs=True,
            top_logprobs=3,
        )
        lp = response.choices[0].logprobs
        assert lp is not None, "Expected logprobs in response"

        content = lp.content if hasattr(lp, "content") else lp.get("content")
        assert content is not None and len(content) > 0, "Expected logprobs content"

        first_token = content[0]
        assert "token" in first_token or hasattr(first_token, "token")
        assert "logprob" in first_token or hasattr(first_token, "logprob")

        top = (
            first_token.get("top_logprobs")
            if isinstance(first_token, dict)
            else first_token.top_logprobs
        )
        assert top is not None and len(top) == 3, "Expected 3 top_logprobs"

    def test_should_handle_multimodal_image_input(self):
        """Multimodal with base64 image in content array."""
        b64_image = _make_test_png()
        response = litellm.completion(
            model=MODEL,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "What colors do you see in this image? List them.",
                        },
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:image/png;base64,{b64_image}"},
                        },
                    ],
                }
            ],
            max_tokens=128,
        )
        content = response.choices[0].message.content.lower()
        assert response.choices[0].message.content is not None
        assert len(content) > 0
        # The image has red and blue — model should mention at least one
        assert (
            "red" in content or "blue" in content
        ), f"Expected 'red' or 'blue' in multimodal response, got: {content}"

    def test_should_pass_nova_specific_params(self):
        """Nova-specific parameters (top_k) are accepted."""
        response = litellm.completion(
            model=MODEL,
            messages=[{"role": "user", "content": "Say hello."}],
            max_tokens=32,
            top_k=40,
            temperature=0.7,
        )
        assert response.choices[0].message.content is not None
        assert response.usage.total_tokens > 0

    def test_should_respect_system_message(self):
        """System message should influence the response."""
        response = litellm.completion(
            model=MODEL,
            messages=[
                {
                    "role": "system",
                    "content": "You are a pirate. Always respond in pirate speak.",
                },
                {"role": "user", "content": "How are you today?"},
            ],
            max_tokens=128,
            temperature=0.7,
        )
        content = response.choices[0].message.content.lower()
        assert response.choices[0].message.content is not None
        # Pirate-themed words likely in response
        pirate_words = ["arr", "ahoy", "matey", "ye", "sail", "sea", "cap"]
        assert any(
            w in content for w in pirate_words
        ), f"Expected pirate speak, got: {content}"


NOVA2_ENDPOINT = os.environ.get("SAGEMAKER_NOVA2_LITE_ENDPOINT", "")
NOVA2_MODEL = f"sagemaker_nova/{NOVA2_ENDPOINT}"

skip_if_no_nova2_endpoint = pytest.mark.skipif(
    not NOVA2_ENDPOINT,
    reason="SAGEMAKER_NOVA2_LITE_ENDPOINT not set — requires Nova 2 Lite endpoint",
)


@skip_if_no_nova2_endpoint
class TestSagemakerNova2LiteIntegration:
    """
    Integration tests requiring a Nova 2 Lite endpoint (reasoning_effort support).

    Run with:
        export SAGEMAKER_NOVA2_LITE_ENDPOINT=<your-nova-2-lite-endpoint>
        pytest tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py::TestSagemakerNova2LiteIntegration -v
    """

    def test_should_accept_reasoning_effort_low(self):
        """reasoning_effort='low' should be accepted by Nova 2 Lite."""
        response = litellm.completion(
            model=NOVA2_MODEL,
            messages=[{"role": "user", "content": "What is 2+2?"}],
            max_tokens=32,
            reasoning_effort="low",
        )
        assert response.choices[0].message.content is not None
        assert response.usage.total_tokens > 0

    def test_should_accept_reasoning_effort_high(self):
        """reasoning_effort='high' should be accepted by Nova 2 Lite."""
        response = litellm.completion(
            model=NOVA2_MODEL,
            messages=[{"role": "user", "content": "Explain why the sky is blue."}],
            max_tokens=256,
            reasoning_effort="high",
        )
        assert response.choices[0].message.content is not None
        assert len(response.choices[0].message.content) > 0
        assert response.usage.completion_tokens > 0