From d8b44f4dbf183b2bb591a29362feaec4a1f5fcce Mon Sep 17 00:00:00 2001 From: Katsuhiro Muto <63308909+eycjur@users.noreply.github.com> Date: Mon, 27 Oct 2025 02:09:54 +0900 Subject: [PATCH] Enable OpenTelemetry context propagation by external tracers (#15940) * Fix: Enable OpenTelemetry context propagation with external tracers * Add test for handling external tracers * change priority to get tracer --------- Co-authored-by: eycjur --- litellm/integrations/opentelemetry.py | 78 +++++++++++++++---- litellm/utils.py | 5 ++ .../test_opentelemetry_unit_tests.py | 63 +++++++++++++++ 3 files changed, 131 insertions(+), 15 deletions(-) diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index 6e8fc6d0cc..267a63be5c 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -198,12 +198,43 @@ class OpenTelemetry(CustomLogger): # use provided tracer or create a new one if tracer_provider is None: - tracer_provider = TracerProvider(resource=_get_litellm_resource()) - # Only add OTLP span processor if we created the tracer provider ourselves - tracer_provider.add_span_processor(self._get_span_processor()) + # Check if a TracerProvider is already set globally (e.g., by Langfuse SDK) + try: + from opentelemetry.trace import ProxyTracerProvider + existing_provider = trace.get_tracer_provider() - # register global provider and grab our tracer - trace.set_tracer_provider(tracer_provider) + # If an actual provider exists (not the default proxy), use it + if not isinstance(existing_provider, ProxyTracerProvider): + verbose_logger.debug( + "OpenTelemetry: Using existing TracerProvider: %s", + type(existing_provider).__name__ + ) + tracer_provider = existing_provider + # Don't call set_tracer_provider to preserve existing context + else: + # No real provider exists yet, create our own + verbose_logger.debug("OpenTelemetry: Creating new TracerProvider") + tracer_provider = TracerProvider(resource=_get_litellm_resource()) + tracer_provider.add_span_processor(self._get_span_processor()) + trace.set_tracer_provider(tracer_provider) + except Exception as e: + # Fallback: create a new provider if something goes wrong + verbose_logger.debug( + "OpenTelemetry: Exception checking existing provider, creating new one: %s", + str(e) + ) + tracer_provider = TracerProvider(resource=_get_litellm_resource()) + tracer_provider.add_span_processor(self._get_span_processor()) + trace.set_tracer_provider(tracer_provider) + else: + # Tracer provider explicitly provided (e.g., for testing) + verbose_logger.debug( + "OpenTelemetry: Using provided TracerProvider: %s", + type(tracer_provider).__name__ + ) + trace.set_tracer_provider(tracer_provider) + + # grab our tracer self.tracer = trace.get_tracer(LITELLM_TRACER_NAME) self.span_kind = SpanKind @@ -524,7 +555,6 @@ class OpenTelemetry(CustomLogger): ######################################################### def _handle_success(self, kwargs, response_obj, start_time, end_time): - verbose_logger.debug( "OpenTelemetry Logger: Logging kwargs: %s, OTEL config settings=%s", kwargs, @@ -1232,7 +1262,7 @@ class OpenTelemetry(CustomLogger): return _parent_context def _get_span_context(self, kwargs): - from opentelemetry import trace + from opentelemetry import context, trace from opentelemetry.trace.propagation.tracecontext import ( TraceContextTextMapPropagator, ) @@ -1244,20 +1274,38 @@ class OpenTelemetry(CustomLogger): _metadata = litellm_params.get("metadata", {}) or {} parent_otel_span = _metadata.get("litellm_parent_otel_span", None) - """ - Two way to use parents in opentelemetry - - using the traceparent header - - using the parent_otel_span in the [metadata][parent_otel_span] - """ + # Priority 1: Explicit parent span from metadata if parent_otel_span is not None: + verbose_logger.debug("OpenTelemetry: Using explicit parent span from metadata") return trace.set_span_in_context(parent_otel_span), parent_otel_span - if traceparent is None: - return None, None - else: + # Priority 2: HTTP traceparent header + if traceparent is not None: + verbose_logger.debug("OpenTelemetry: Using traceparent header for context propagation") carrier = {"traceparent": traceparent} return TraceContextTextMapPropagator().extract(carrier=carrier), None + # Priority 3: Active span from global context (auto-detection) + try: + current_span = trace.get_current_span() + if current_span is not None: + span_context = current_span.get_span_context() + if span_context.is_valid: + verbose_logger.debug( + "OpenTelemetry: Using active span from global context: %s (trace_id=%s, span_id=%s, is_recording=%s)", + current_span, + format(span_context.trace_id, '032x'), + format(span_context.span_id, '016x'), + current_span.is_recording() + ) + return context.get_current(), current_span + except Exception as e: + verbose_logger.debug("OpenTelemetry: Error getting current span: %s", str(e)) + + # Priority 4: No parent context + verbose_logger.debug("OpenTelemetry: No parent context found, creating root span") + return None, None + def _get_span_processor(self, dynamic_headers: Optional[dict] = None): from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( OTLPSpanExporter as OTLPSpanExporterGRPC, diff --git a/litellm/utils.py b/litellm/utils.py index 87936d919c..94889d23d5 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -9,6 +9,7 @@ import ast import asyncio +import contextvars import base64 import binascii import copy @@ -1299,7 +1300,11 @@ def client(original_function): # noqa: PLR0915 # LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated verbose_logger.info("Wrapper: Completed Call, calling success_handler") + # Copy the current context to propagate it to the background thread + # This is essential for OpenTelemetry span context propagation + ctx = contextvars.copy_context() executor.submit( + ctx.run, logging_obj.success_handler, result, start_time, diff --git a/tests/logging_callback_tests/test_opentelemetry_unit_tests.py b/tests/logging_callback_tests/test_opentelemetry_unit_tests.py index 018447f00b..a1b80e0d1d 100644 --- a/tests/logging_callback_tests/test_opentelemetry_unit_tests.py +++ b/tests/logging_callback_tests/test_opentelemetry_unit_tests.py @@ -56,3 +56,66 @@ class TestOpentelemetryUnitTests(BaseLoggingCallbackTest): await asyncio.sleep(1) parent_otel_span.end.assert_called_once() + + def test_init_tracing_respects_existing_tracer_provider(self): + """ + Unit test: _init_tracing() should respect existing TracerProvider. + + When a TracerProvider already exists (e.g., set by Langfuse SDK), + LiteLLM should use it instead of creating a new one. + """ + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from litellm.integrations.opentelemetry import OpenTelemetry + + # Setup: Create and set an existing TracerProvider + tracer_provider = TracerProvider() + trace.set_tracer_provider(tracer_provider) + existing_provider = trace.get_tracer_provider() + + # Act: Initialize OpenTelemetry integration (should detect existing provider) + otel_integration = OpenTelemetry() + + # Assert: The existing provider should still be active + current_provider = trace.get_tracer_provider() + assert current_provider is existing_provider, ( + "Existing TracerProvider should be respected and not overridden" + ) + + def test_get_span_context_detects_active_span(self): + """ + Unit test: _get_span_context() should auto-detect active spans from global context. + + Active spans should be automatically detected without explicit metadata + """ + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from litellm.integrations.opentelemetry import OpenTelemetry + + # Setup: Create TracerProvider and tracer + tracer_provider = TracerProvider() + trace.set_tracer_provider(tracer_provider) + tracer = trace.get_tracer(__name__) + + # Create OpenTelemetry integration + otel_integration = OpenTelemetry() + + # Act: Create an active span and test detection + with tracer.start_as_current_span("test_parent") as parent_span: + parent_span_context = parent_span.get_span_context() + + # Call _get_span_context without explicit parent in metadata + kwargs = {"litellm_params": {"metadata": {}}} + detected_context, detected_span = otel_integration._get_span_context(kwargs) + + # Assert: Should detect the active span + assert detected_span is not None, "Should detect active span from global context" + assert detected_span is parent_span, "Detected span should be the active parent span" + + detected_span_context = detected_span.get_span_context() + assert detected_span_context.trace_id == parent_span_context.trace_id, ( + "Detected span should have same trace_id as parent" + ) + assert detected_span_context.span_id == parent_span_context.span_id, ( + "Detected span should have same span_id as parent" + )