mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 22:48:35 +00:00
3c595f6fd2
The old test assumed ArizePhoenixLogger reused the global TracerProvider. With the nested traces fix, Phoenix now creates its own dedicated provider and produces litellm_proxy_request + litellm_request + raw_gen_ai_request spans independently. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
319 lines
10 KiB
Python
319 lines
10 KiB
Python
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime
|
|
from unittest.mock import AsyncMock
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system-path
|
|
|
|
import pytest
|
|
import litellm
|
|
import asyncio
|
|
import logging
|
|
from opentelemetry import trace
|
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
from litellm._logging import verbose_logger
|
|
from litellm.integrations.arize.arize_phoenix import ArizePhoenixLogger
|
|
from litellm.integrations._types.open_inference import (
|
|
OpenInferenceSpanKindValues,
|
|
SpanAttributes as OISpanAttributes,
|
|
)
|
|
from litellm.integrations.opentelemetry import (
|
|
LITELLM_PROXY_REQUEST_SPAN_NAME,
|
|
LITELLM_TRACER_NAME,
|
|
LITELLM_REQUEST_SPAN_NAME,
|
|
OpenTelemetry,
|
|
OpenTelemetryConfig,
|
|
RAW_REQUEST_SPAN_NAME,
|
|
Span,
|
|
)
|
|
from litellm.proxy._types import SpanAttributes
|
|
|
|
verbose_logger.setLevel(logging.DEBUG)
|
|
|
|
EXPECTED_SPAN_NAMES = ["litellm_request", "raw_gen_ai_request"]
|
|
exporter = InMemorySpanExporter()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("streaming", [True, False])
|
|
async def test_async_otel_callback(streaming):
|
|
litellm.set_verbose = True
|
|
|
|
# Clear exporter at the start to ensure clean state
|
|
exporter.clear()
|
|
|
|
litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))]
|
|
|
|
response = await litellm.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hi"}],
|
|
temperature=0.1,
|
|
user="OTEL_USER",
|
|
stream=streaming,
|
|
)
|
|
|
|
if streaming is True:
|
|
async for chunk in response:
|
|
print("chunk", chunk)
|
|
|
|
await asyncio.sleep(4)
|
|
spans = exporter.get_finished_spans()
|
|
print("spans", spans)
|
|
assert len(spans) == 2
|
|
|
|
_span_names = [span.name for span in spans]
|
|
print("recorded span names", _span_names)
|
|
assert set(_span_names) == set(EXPECTED_SPAN_NAMES)
|
|
|
|
# print the value of a span
|
|
for span in spans:
|
|
print("span name", span.name)
|
|
print("span attributes", span.attributes)
|
|
|
|
if span.name == "litellm_request":
|
|
validate_litellm_request(span)
|
|
# Additional specific checks
|
|
assert span._attributes["gen_ai.request.model"] == "gpt-3.5-turbo"
|
|
assert span._attributes["gen_ai.system"] == "openai"
|
|
assert span._attributes["gen_ai.request.temperature"] == 0.1
|
|
assert span._attributes["llm.is_streaming"] == str(streaming)
|
|
assert span._attributes["llm.user"] == "OTEL_USER"
|
|
elif span.name == "raw_gen_ai_request":
|
|
if streaming is True:
|
|
validate_raw_gen_ai_request_openai_streaming(span)
|
|
else:
|
|
validate_raw_gen_ai_request_openai_non_streaming(span)
|
|
|
|
# clear in memory exporter
|
|
exporter.clear()
|
|
|
|
|
|
def validate_litellm_request(span):
|
|
expected_attributes = [
|
|
"gen_ai.request.model",
|
|
"gen_ai.system",
|
|
"gen_ai.request.temperature",
|
|
"llm.is_streaming",
|
|
"llm.user",
|
|
"gen_ai.response.id",
|
|
"gen_ai.response.model",
|
|
"gen_ai.usage.total_tokens",
|
|
"gen_ai.usage.output_tokens",
|
|
"gen_ai.usage.input_tokens",
|
|
]
|
|
|
|
# get the str of all the span attributes
|
|
print("span attributes", span._attributes)
|
|
|
|
for attr in expected_attributes:
|
|
value = span._attributes[attr]
|
|
print("value", value)
|
|
assert value is not None, f"Attribute {attr} has None value"
|
|
|
|
|
|
def validate_raw_gen_ai_request_openai_non_streaming(span):
|
|
expected_attributes = [
|
|
"llm.openai.messages",
|
|
"llm.openai.temperature",
|
|
"llm.openai.user",
|
|
"llm.openai.extra_body",
|
|
"llm.openai.id",
|
|
"llm.openai.choices",
|
|
"llm.openai.created",
|
|
"llm.openai.model",
|
|
"llm.openai.object",
|
|
"llm.openai.service_tier",
|
|
"llm.openai.system_fingerprint",
|
|
"llm.openai.usage",
|
|
]
|
|
|
|
print("span attributes", span._attributes)
|
|
for attr in span._attributes:
|
|
print(attr)
|
|
|
|
for attr in expected_attributes:
|
|
assert span._attributes[attr] is not None, f"Attribute {attr} has None"
|
|
|
|
|
|
def validate_raw_gen_ai_request_openai_streaming(span):
|
|
expected_attributes = [
|
|
"llm.openai.messages",
|
|
"llm.openai.temperature",
|
|
"llm.openai.user",
|
|
"llm.openai.extra_body",
|
|
"llm.openai.model",
|
|
]
|
|
|
|
print("span attributes", span._attributes)
|
|
for attr in span._attributes:
|
|
print(attr)
|
|
|
|
for attr in expected_attributes:
|
|
assert span._attributes[attr] is not None, f"Attribute {attr} has None"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("streaming", [True, False])
|
|
@pytest.mark.parametrize("global_redact", [True, False])
|
|
async def test_awesome_otel_with_message_logging_off(streaming, global_redact):
|
|
"""
|
|
No content should be logged when message logging is off
|
|
|
|
tests when litellm.turn_off_message_logging is set to True
|
|
tests when OpenTelemetry(message_logging=False) is set
|
|
"""
|
|
litellm.set_verbose = True
|
|
|
|
# Clear exporter at the start to ensure clean state
|
|
exporter.clear()
|
|
|
|
litellm.callbacks = [OpenTelemetry(config=OpenTelemetryConfig(exporter=exporter))]
|
|
if global_redact is False:
|
|
otel_logger = OpenTelemetry(
|
|
message_logging=False, config=OpenTelemetryConfig(exporter="console")
|
|
)
|
|
else:
|
|
# use global redaction
|
|
litellm.turn_off_message_logging = True
|
|
otel_logger = OpenTelemetry(config=OpenTelemetryConfig(exporter="console"))
|
|
|
|
litellm.callbacks = [otel_logger]
|
|
litellm.success_callback = []
|
|
litellm.failure_callback = []
|
|
|
|
response = await litellm.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "hi"}],
|
|
mock_response="hi",
|
|
stream=streaming,
|
|
)
|
|
print("response", response)
|
|
|
|
if streaming is True:
|
|
async for chunk in response:
|
|
print("chunk", chunk)
|
|
|
|
await asyncio.sleep(1)
|
|
spans = exporter.get_finished_spans()
|
|
print("spans", spans)
|
|
assert len(spans) == 1
|
|
|
|
_span = spans[0]
|
|
print("span attributes", _span.attributes)
|
|
|
|
validate_redacted_message_span_attributes(_span)
|
|
|
|
# clear in memory exporter
|
|
exporter.clear()
|
|
|
|
if global_redact is True:
|
|
litellm.turn_off_message_logging = False
|
|
|
|
|
|
def validate_redacted_message_span_attributes(span):
|
|
# Required non-metadata attributes that must be present
|
|
required_attributes = [
|
|
"gen_ai.request.model",
|
|
"gen_ai.system",
|
|
"llm.is_streaming",
|
|
"llm.request.type",
|
|
"gen_ai.response.id",
|
|
"gen_ai.response.model",
|
|
"gen_ai.usage.total_tokens",
|
|
"gen_ai.usage.output_tokens",
|
|
"gen_ai.usage.input_tokens",
|
|
]
|
|
|
|
_all_attributes = set(
|
|
[
|
|
name.value if isinstance(name, SpanAttributes) else name
|
|
for name in span.attributes.keys()
|
|
]
|
|
)
|
|
print("all_attributes", _all_attributes)
|
|
|
|
for attr in _all_attributes:
|
|
print(f"attr: {attr}, type: {type(attr)}")
|
|
|
|
# Check that all required attributes are present
|
|
required_set = set(required_attributes)
|
|
assert required_set.issubset(
|
|
_all_attributes
|
|
), f"Missing required attributes: {required_set - _all_attributes}"
|
|
|
|
# Check that any additional attributes are metadata fields (start with "metadata.") or cost fields
|
|
non_required_attrs = _all_attributes - required_set
|
|
for attr in non_required_attrs:
|
|
assert (
|
|
attr.startswith("metadata.")
|
|
or attr.startswith("hidden_params")
|
|
or attr.startswith("gen_ai.cost.")
|
|
or attr.startswith("gen_ai.operation.")
|
|
or attr.startswith("gen_ai.request.")
|
|
), f"Non-metadata attribute found: {attr}"
|
|
|
|
pass
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_arize_phoenix_creates_nested_spans_on_dedicated_provider():
|
|
"""
|
|
ArizePhoenixLogger creates its own dedicated TracerProvider so it can
|
|
coexist with the generic ``otel`` callback. In proxy mode it creates a
|
|
``litellm_proxy_request`` parent span and a ``litellm_request`` child span
|
|
on its *own* provider — completely independent of the global provider.
|
|
|
|
This test verifies:
|
|
1. Phoenix creates both parent and child spans on its dedicated exporter.
|
|
2. The spans form a proper parent-child hierarchy (same trace ID).
|
|
3. A raw_gen_ai_request sub-span is also produced.
|
|
"""
|
|
from opentelemetry.sdk.trace import TracerProvider as SDKTracerProvider
|
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
|
|
|
phoenix_exporter = InMemorySpanExporter()
|
|
|
|
litellm.logging_callback_manager._reset_all_callbacks()
|
|
|
|
# ArizePhoenixLogger builds its own TracerProvider internally.
|
|
# We pass our in-memory exporter so we can inspect spans.
|
|
phoenix_logger = ArizePhoenixLogger(
|
|
config=OpenTelemetryConfig(exporter=phoenix_exporter),
|
|
callback_name="arize_phoenix",
|
|
)
|
|
|
|
litellm.callbacks = [phoenix_logger]
|
|
litellm.success_callback = []
|
|
litellm.failure_callback = []
|
|
|
|
# Simulate a proxy request by injecting proxy_server_request as a top-level kwarg.
|
|
# This triggers ArizePhoenixLogger._get_phoenix_context to create its own parent span.
|
|
await litellm.acompletion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "ping"}],
|
|
mock_response="pong",
|
|
proxy_server_request={"url": "/chat/completions", "method": "POST", "headers": {}},
|
|
)
|
|
|
|
# Flush async span processing
|
|
await asyncio.sleep(1)
|
|
|
|
spans = phoenix_exporter.get_finished_spans()
|
|
span_names = [s.name for s in spans]
|
|
|
|
# Phoenix creates its own span names on its dedicated TracerProvider:
|
|
# - "litellm_proxy_request" (parent) — created by _get_phoenix_context
|
|
# - "litellm_request" (child) — the LLM call span
|
|
# - "raw_gen_ai_request" — raw request sub-span
|
|
assert "litellm_proxy_request" in span_names, f"Expected proxy parent span, got: {span_names}"
|
|
assert LITELLM_REQUEST_SPAN_NAME in span_names, f"Expected request child span, got: {span_names}"
|
|
assert RAW_REQUEST_SPAN_NAME in span_names, f"Expected raw request span, got: {span_names}"
|
|
|
|
# All spans should share the same trace ID (proper hierarchy)
|
|
trace_ids = {s.context.trace_id for s in spans}
|
|
assert len(trace_ids) == 1, f"Expected single trace, got {len(trace_ids)} traces"
|
|
|
|
phoenix_exporter.clear()
|