Enable OpenTelemetry context propagation by external tracers (#15940)

* Fix: Enable OpenTelemetry context propagation with external tracers

* Add test for handling external tracers

* change priority to get tracer

---------

Co-authored-by: eycjur <eycjur@example.com>
This commit is contained in:
Katsuhiro Muto
2025-10-27 02:09:54 +09:00
committed by GitHub
parent a75e75ae1a
commit d8b44f4dbf
3 changed files with 131 additions and 15 deletions
+63 -15
View File
@@ -198,12 +198,43 @@ class OpenTelemetry(CustomLogger):
# use provided tracer or create a new one
if tracer_provider is None:
tracer_provider = TracerProvider(resource=_get_litellm_resource())
# Only add OTLP span processor if we created the tracer provider ourselves
tracer_provider.add_span_processor(self._get_span_processor())
# Check if a TracerProvider is already set globally (e.g., by Langfuse SDK)
try:
from opentelemetry.trace import ProxyTracerProvider
existing_provider = trace.get_tracer_provider()
# register global provider and grab our tracer
trace.set_tracer_provider(tracer_provider)
# If an actual provider exists (not the default proxy), use it
if not isinstance(existing_provider, ProxyTracerProvider):
verbose_logger.debug(
"OpenTelemetry: Using existing TracerProvider: %s",
type(existing_provider).__name__
)
tracer_provider = existing_provider
# Don't call set_tracer_provider to preserve existing context
else:
# No real provider exists yet, create our own
verbose_logger.debug("OpenTelemetry: Creating new TracerProvider")
tracer_provider = TracerProvider(resource=_get_litellm_resource())
tracer_provider.add_span_processor(self._get_span_processor())
trace.set_tracer_provider(tracer_provider)
except Exception as e:
# Fallback: create a new provider if something goes wrong
verbose_logger.debug(
"OpenTelemetry: Exception checking existing provider, creating new one: %s",
str(e)
)
tracer_provider = TracerProvider(resource=_get_litellm_resource())
tracer_provider.add_span_processor(self._get_span_processor())
trace.set_tracer_provider(tracer_provider)
else:
# Tracer provider explicitly provided (e.g., for testing)
verbose_logger.debug(
"OpenTelemetry: Using provided TracerProvider: %s",
type(tracer_provider).__name__
)
trace.set_tracer_provider(tracer_provider)
# grab our tracer
self.tracer = trace.get_tracer(LITELLM_TRACER_NAME)
self.span_kind = SpanKind
@@ -524,7 +555,6 @@ class OpenTelemetry(CustomLogger):
#########################################################
def _handle_success(self, kwargs, response_obj, start_time, end_time):
verbose_logger.debug(
"OpenTelemetry Logger: Logging kwargs: %s, OTEL config settings=%s",
kwargs,
@@ -1232,7 +1262,7 @@ class OpenTelemetry(CustomLogger):
return _parent_context
def _get_span_context(self, kwargs):
from opentelemetry import trace
from opentelemetry import context, trace
from opentelemetry.trace.propagation.tracecontext import (
TraceContextTextMapPropagator,
)
@@ -1244,20 +1274,38 @@ class OpenTelemetry(CustomLogger):
_metadata = litellm_params.get("metadata", {}) or {}
parent_otel_span = _metadata.get("litellm_parent_otel_span", None)
"""
Two way to use parents in opentelemetry
- using the traceparent header
- using the parent_otel_span in the [metadata][parent_otel_span]
"""
# Priority 1: Explicit parent span from metadata
if parent_otel_span is not None:
verbose_logger.debug("OpenTelemetry: Using explicit parent span from metadata")
return trace.set_span_in_context(parent_otel_span), parent_otel_span
if traceparent is None:
return None, None
else:
# Priority 2: HTTP traceparent header
if traceparent is not None:
verbose_logger.debug("OpenTelemetry: Using traceparent header for context propagation")
carrier = {"traceparent": traceparent}
return TraceContextTextMapPropagator().extract(carrier=carrier), None
# Priority 3: Active span from global context (auto-detection)
try:
current_span = trace.get_current_span()
if current_span is not None:
span_context = current_span.get_span_context()
if span_context.is_valid:
verbose_logger.debug(
"OpenTelemetry: Using active span from global context: %s (trace_id=%s, span_id=%s, is_recording=%s)",
current_span,
format(span_context.trace_id, '032x'),
format(span_context.span_id, '016x'),
current_span.is_recording()
)
return context.get_current(), current_span
except Exception as e:
verbose_logger.debug("OpenTelemetry: Error getting current span: %s", str(e))
# Priority 4: No parent context
verbose_logger.debug("OpenTelemetry: No parent context found, creating root span")
return None, None
def _get_span_processor(self, dynamic_headers: Optional[dict] = None):
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
OTLPSpanExporter as OTLPSpanExporterGRPC,
+5
View File
@@ -9,6 +9,7 @@
import ast
import asyncio
import contextvars
import base64
import binascii
import copy
@@ -1299,7 +1300,11 @@ def client(original_function): # noqa: PLR0915
# LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated
verbose_logger.info("Wrapper: Completed Call, calling success_handler")
# Copy the current context to propagate it to the background thread
# This is essential for OpenTelemetry span context propagation
ctx = contextvars.copy_context()
executor.submit(
ctx.run,
logging_obj.success_handler,
result,
start_time,
@@ -56,3 +56,66 @@ class TestOpentelemetryUnitTests(BaseLoggingCallbackTest):
await asyncio.sleep(1)
parent_otel_span.end.assert_called_once()
def test_init_tracing_respects_existing_tracer_provider(self):
"""
Unit test: _init_tracing() should respect existing TracerProvider.
When a TracerProvider already exists (e.g., set by Langfuse SDK),
LiteLLM should use it instead of creating a new one.
"""
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from litellm.integrations.opentelemetry import OpenTelemetry
# Setup: Create and set an existing TracerProvider
tracer_provider = TracerProvider()
trace.set_tracer_provider(tracer_provider)
existing_provider = trace.get_tracer_provider()
# Act: Initialize OpenTelemetry integration (should detect existing provider)
otel_integration = OpenTelemetry()
# Assert: The existing provider should still be active
current_provider = trace.get_tracer_provider()
assert current_provider is existing_provider, (
"Existing TracerProvider should be respected and not overridden"
)
def test_get_span_context_detects_active_span(self):
"""
Unit test: _get_span_context() should auto-detect active spans from global context.
Active spans should be automatically detected without explicit metadata
"""
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from litellm.integrations.opentelemetry import OpenTelemetry
# Setup: Create TracerProvider and tracer
tracer_provider = TracerProvider()
trace.set_tracer_provider(tracer_provider)
tracer = trace.get_tracer(__name__)
# Create OpenTelemetry integration
otel_integration = OpenTelemetry()
# Act: Create an active span and test detection
with tracer.start_as_current_span("test_parent") as parent_span:
parent_span_context = parent_span.get_span_context()
# Call _get_span_context without explicit parent in metadata
kwargs = {"litellm_params": {"metadata": {}}}
detected_context, detected_span = otel_integration._get_span_context(kwargs)
# Assert: Should detect the active span
assert detected_span is not None, "Should detect active span from global context"
assert detected_span is parent_span, "Detected span should be the active parent span"
detected_span_context = detected_span.get_span_context()
assert detected_span_context.trace_id == parent_span_context.trace_id, (
"Detected span should have same trace_id as parent"
)
assert detected_span_context.span_id == parent_span_context.span_id, (
"Detected span should have same span_id as parent"
)