Files
litellm/tests/test_litellm/integrations/test_langfuse.py
T
Harshit28j cac041c944 Align Langfuse trace_id fallback with DB session_id for failed requests
When standard_logging_object is None (failure case), Langfuse was falling
back to litellm_call_id while the DB used litellm_trace_id as session_id.
This caused the Session ID in LiteLLM logs to not match the trace in
Langfuse. Now Langfuse checks litellm_trace_id first, matching the DB.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 20:55:31 +05:30

923 lines
36 KiB
Python

import datetime
import os
import sys
import types
import unittest
from typing import Optional
from unittest.mock import MagicMock, patch
import pytest
import litellm
from litellm.integrations.langfuse import langfuse as langfuse_module
from litellm.integrations.langfuse.langfuse import LangFuseLogger
sys.path.insert(0, os.path.abspath("../.."))
from litellm.integrations.langfuse.langfuse import LangFuseLogger
# Import LangfuseUsageDetails directly from the module where it's defined
from litellm.types.integrations.langfuse import *
class TestLangfuseUsageDetails(unittest.TestCase):
def setUp(self):
# Save global Langfuse client counter to restore after test
self._original_langfuse_clients_count = litellm.initialized_langfuse_clients
# Set up environment variables for testing
self.env_patcher = patch.dict(
"os.environ",
{
"LANGFUSE_SECRET_KEY": "test-secret-key",
"LANGFUSE_PUBLIC_KEY": "test-public-key",
"LANGFUSE_HOST": "https://test.langfuse.com",
},
)
self.env_patcher.start()
# Create mock objects
self.mock_langfuse_client = MagicMock()
# Mock the client attribute to prevent errors during logger initialization
self.mock_langfuse_client.client = MagicMock()
self.mock_langfuse_trace = MagicMock()
self.mock_langfuse_generation = MagicMock()
self.mock_langfuse_generation.trace_id = "test-trace-id"
# Mock span method for trace (used by log_provider_specific_information_as_span and _log_guardrail_information_as_span)
self.mock_langfuse_span = MagicMock()
self.mock_langfuse_span.end = MagicMock()
self.mock_langfuse_trace.span.return_value = self.mock_langfuse_span
# Setup the trace and generation chain
self.mock_langfuse_trace.generation.return_value = self.mock_langfuse_generation
self.last_trace_kwargs = {}
def _trace_side_effect(*args, **kwargs):
self.last_trace_kwargs = kwargs
return self.mock_langfuse_trace
self.mock_langfuse_client.trace.side_effect = _trace_side_effect
# Mock the langfuse module that's imported locally in methods
self.langfuse_module_patcher = patch.dict(
"sys.modules", {"langfuse": MagicMock()}
)
self.mock_langfuse_module = self.langfuse_module_patcher.start()
# Create a mock for the langfuse module with version
self.mock_langfuse = MagicMock()
self.mock_langfuse.version = MagicMock()
self.mock_langfuse.version.__version__ = (
"3.0.0" # Set a version that supports all features
)
# Mock the Langfuse class
self.mock_langfuse_class = MagicMock()
self.mock_langfuse_class.return_value = self.mock_langfuse_client
# Set up the sys.modules['langfuse'] mock
sys.modules["langfuse"] = self.mock_langfuse
sys.modules["langfuse"].Langfuse = self.mock_langfuse_class
# Create a fresh logger instance for each test
self.logger = LangFuseLogger()
# Explicitly set the Langfuse client to our mock
self.logger.Langfuse = self.mock_langfuse_client
# Ensure langfuse_sdk_version is set correctly for _supports_* methods
self.logger.langfuse_sdk_version = "3.0.0"
# Add the log_event_on_langfuse method to the instance
def log_event_on_langfuse(
self,
kwargs,
response_obj,
start_time=None,
end_time=None,
user_id=None,
level="DEFAULT",
status_message=None,
):
# This implementation calls _log_langfuse_v2 directly
return self._log_langfuse_v2(
user_id=user_id,
metadata=kwargs.get("litellm_params", {}).get("metadata", {}),
litellm_params=kwargs.get("litellm_params", {}),
output=None,
start_time=start_time,
end_time=end_time,
kwargs=kwargs,
optional_params=kwargs.get("optional_params", {}),
input=None,
response_obj=response_obj,
level=level,
litellm_call_id=kwargs.get("litellm_call_id", None),
)
# Bind the method to the instance
self.logger.log_event_on_langfuse = types.MethodType(
log_event_on_langfuse, self.logger
)
# Make sure _is_langfuse_v2 returns True
def mock_is_langfuse_v2(self):
return True
self.logger._is_langfuse_v2 = types.MethodType(mock_is_langfuse_v2, self.logger)
def tearDown(self):
# Clean up logger instance to prevent state leakage
if hasattr(self, 'logger'):
# Reset logger's Langfuse client to break any references
self.logger.Langfuse = None
# Delete logger instance to ensure complete cleanup
del self.logger
# Restore global Langfuse client counter to prevent cross-test pollution
litellm.initialized_langfuse_clients = self._original_langfuse_clients_count
self.env_patcher.stop()
self.langfuse_module_patcher.stop() # patch.dict automatically restores sys.modules
def test_langfuse_usage_details_type(self):
"""Test that LangfuseUsageDetails TypedDict is properly defined with the correct fields"""
# Create an instance of LangfuseUsageDetails
usage_details: LangfuseUsageDetails = {
"input": 10,
"output": 20,
"total": 30,
"cache_creation_input_tokens": 5,
"cache_read_input_tokens": 3,
}
# Verify all fields are present
self.assertEqual(usage_details["input"], 10)
self.assertEqual(usage_details["output"], 20)
self.assertEqual(usage_details["total"], 30)
self.assertEqual(usage_details["cache_creation_input_tokens"], 5)
self.assertEqual(usage_details["cache_read_input_tokens"], 3)
# Test with all fields (all fields are required in TypedDict by default)
minimal_usage_details: LangfuseUsageDetails = {
"input": 10,
"output": 20,
"total": 30,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0,
}
self.assertEqual(minimal_usage_details["input"], 10)
self.assertEqual(minimal_usage_details["output"], 20)
self.assertEqual(minimal_usage_details["total"], 30)
def test_log_langfuse_v2_usage_details(self):
"""Test that usage_details in _log_langfuse_v2 is correctly typed and assigned"""
# Create a mock response object with usage information
response_obj = MagicMock()
response_obj.usage = MagicMock()
response_obj.usage.prompt_tokens = 15
response_obj.usage.completion_tokens = 25
# Add the cache token attributes using get method
def mock_get(key, default=None):
if key == "cache_creation_input_tokens":
return 7
elif key == "cache_read_input_tokens":
return 4
return default
response_obj.usage.get = mock_get
# Create kwargs for the log_event method
kwargs = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "Hello"}],
"litellm_params": {"metadata": {}},
}
# Create start and end times
start_time = datetime.datetime.now()
end_time = start_time + datetime.timedelta(seconds=1)
# Call the log_event method
with patch.object(self.logger, "_log_langfuse_v2") as mock_log_langfuse_v2:
self.logger.log_event_on_langfuse(
kwargs=kwargs,
response_obj=response_obj,
start_time=start_time,
end_time=end_time,
)
# Check if _log_langfuse_v2 was called
mock_log_langfuse_v2.assert_called_once()
# Get the arguments passed to _log_langfuse_v2
call_args = mock_log_langfuse_v2.call_args[1]
# Verify response_obj was passed correctly
self.assertEqual(call_args["response_obj"], response_obj)
def test_langfuse_usage_details_optional_fields(self):
"""Test that LangfuseUsageDetails fields are properly defined as Optional"""
# Create an instance with None values for optional fields
usage_details: LangfuseUsageDetails = {
"input": 10,
"output": 20,
"total": 30,
"cache_creation_input_tokens": None,
"cache_read_input_tokens": None,
}
# Verify fields can be None
self.assertEqual(usage_details["input"], 10)
self.assertEqual(usage_details["output"], 20)
self.assertEqual(usage_details["total"], 30)
self.assertIsNone(usage_details["cache_creation_input_tokens"])
self.assertIsNone(usage_details["cache_read_input_tokens"])
def test_langfuse_usage_details_structure(self):
"""Test that LangfuseUsageDetails has the correct structure as defined in the commit"""
# This test directly verifies the structure of the TypedDict
# without relying on the LangFuseLogger class
# Create a dictionary that matches the LangfuseUsageDetails structure
usage_details = {
"input": 15,
"output": 25,
"total": 40,
"cache_creation_input_tokens": 7,
"cache_read_input_tokens": 4,
}
# Verify the structure matches what we expect
self.assertIn("input", usage_details)
self.assertIn("output", usage_details)
self.assertIn("total", usage_details)
self.assertIn("cache_creation_input_tokens", usage_details)
self.assertIn("cache_read_input_tokens", usage_details)
# Verify the values
self.assertEqual(usage_details["input"], 15)
self.assertEqual(usage_details["output"], 25)
self.assertEqual(usage_details["total"], 40)
self.assertEqual(usage_details["cache_creation_input_tokens"], 7)
self.assertEqual(usage_details["cache_read_input_tokens"], 4)
def test_log_langfuse_v2_handles_null_usage_values(self):
"""
Test that _log_langfuse_v2 correctly handles None values in the usage object
by converting them to 0, preventing validation errors.
"""
# Reset the mock to ensure clean state; clear side_effect so return_value takes effect
self.mock_langfuse_client.reset_mock(side_effect=True)
self.mock_langfuse_trace.reset_mock(side_effect=True)
self.mock_langfuse_generation.reset_mock(side_effect=True)
# Re-setup the trace and generation chain with clean state
self.mock_langfuse_generation.trace_id = "test-trace-id"
mock_span = MagicMock()
mock_span.end = MagicMock()
self.mock_langfuse_trace.span.return_value = mock_span
self.mock_langfuse_trace.generation.return_value = self.mock_langfuse_generation
# Ensure trace returns our mock
self.mock_langfuse_client.trace.return_value = self.mock_langfuse_trace
self.logger.Langfuse = self.mock_langfuse_client
with patch(
"litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
side_effect=lambda generation_params, **kwargs: generation_params,
create=True,
) as mock_add_prompt_params, patch.object(
self.logger, "_supports_prompt", return_value=True
):
# Create a mock response object with usage information containing None values
response_obj = MagicMock()
response_obj.usage = MagicMock()
response_obj.usage.prompt_tokens = None
response_obj.usage.completion_tokens = None
response_obj.usage.total_tokens = None
# Mock the .get() method to return None for cache-related fields
def mock_get(key, default=None):
if key in ["cache_creation_input_tokens", "cache_read_input_tokens"]:
return None
return default
response_obj.usage.get = mock_get
# Prepare standard kwargs for the call
kwargs = {
"model": "gpt-4-null-usage",
"messages": [{"role": "user", "content": "Test"}],
"litellm_params": {"metadata": {}},
"optional_params": {},
"litellm_call_id": "test-call-id-null-usage",
"standard_logging_object": None,
"response_cost": 0.0,
}
# Use fixed timestamps to avoid timing-related flakiness
fixed_time = datetime.datetime(2024, 1, 1, 12, 0, 0)
# Call the method under test
try:
self.logger._log_langfuse_v2(
user_id="test-user",
metadata={},
litellm_params=kwargs["litellm_params"],
output={"role": "assistant", "content": "Response"},
start_time=fixed_time,
end_time=fixed_time + datetime.timedelta(seconds=1),
kwargs=kwargs,
optional_params=kwargs["optional_params"],
input={"messages": kwargs["messages"]},
response_obj=response_obj,
level="DEFAULT",
litellm_call_id=kwargs["litellm_call_id"],
)
except Exception as e:
self.fail(f"_log_langfuse_v2 raised an exception: {e}")
# Verify that trace was called first
self.mock_langfuse_client.trace.assert_called()
# Check the arguments passed to the mocked langfuse generation call
self.mock_langfuse_trace.generation.assert_called_once()
call_args, call_kwargs = self.mock_langfuse_trace.generation.call_args
# Inspect the usage and usage_details dictionaries
usage_arg = call_kwargs.get("usage")
usage_details_arg = call_kwargs.get("usage_details")
self.assertIsNotNone(usage_arg)
self.assertIsNotNone(usage_details_arg)
# Verify that None values were converted to 0
self.assertEqual(usage_arg["prompt_tokens"], 0)
self.assertEqual(usage_arg["completion_tokens"], 0)
self.assertEqual(usage_details_arg["input"], 0)
self.assertEqual(usage_details_arg["output"], 0)
self.assertEqual(usage_details_arg["total"], 0)
self.assertEqual(usage_details_arg["cache_creation_input_tokens"], 0)
self.assertEqual(usage_details_arg["cache_read_input_tokens"], 0)
mock_add_prompt_params.assert_called_once()
def _build_standard_logging_payload(self, trace_id: Optional[str] = None):
payload = {
"id": "payload-id",
"call_type": "completion",
"response_cost": 0.0,
"status": "success",
"total_tokens": 0,
"prompt_tokens": 0,
"completion_tokens": 0,
"startTime": 0.0,
"endTime": 0.0,
"completionStartTime": 0.0,
"model": "gpt-4",
"model_id": "model-123",
"model_group": "openai",
"api_base": "https://api.openai.com",
"metadata": {
"user_api_key_end_user_id": None,
"prompt_management_metadata": None,
"session_id": None,
"trace_name": None,
"trace_version": None,
"headers": None,
"endpoint": None,
"caching_groups": None,
"previous_models": None,
},
"hidden_params": {},
"request_tags": [],
"messages": [],
"response": {"id": "resp"},
"model_parameters": {},
"guardrail_information": None,
"standard_built_in_tools_params": None,
}
if trace_id is not None:
payload["trace_id"] = trace_id
return payload
def _build_langfuse_kwargs(self, standard_logging_payload):
return {
"standard_logging_object": standard_logging_payload,
"model": standard_logging_payload["model"],
"call_type": standard_logging_payload["call_type"],
"cache_hit": False,
"messages": [],
}
def test_log_langfuse_v2_uses_standard_trace_id_when_available(self):
payload = self._build_standard_logging_payload(trace_id="std-trace-id")
kwargs = self._build_langfuse_kwargs(payload)
self.last_trace_kwargs = {}
with patch(
"litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
side_effect=lambda generation_params, **kwargs: generation_params,
create=True,
):
self.logger._log_langfuse_v2(
user_id="user-1",
metadata={},
litellm_params={"metadata": {}},
output=None,
start_time=datetime.datetime.utcnow(),
end_time=datetime.datetime.utcnow(),
kwargs=kwargs,
optional_params={},
input=None,
response_obj=None,
level="INFO",
litellm_call_id="call-id-xyz",
)
assert self.last_trace_kwargs.get("id") == "std-trace-id"
def test_log_langfuse_v2_defaults_to_call_id_without_standard_trace_id(self):
payload = self._build_standard_logging_payload()
kwargs = self._build_langfuse_kwargs(payload)
self.last_trace_kwargs = {}
with patch(
"litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
side_effect=lambda generation_params, **kwargs: generation_params,
create=True,
):
self.logger._log_langfuse_v2(
user_id="user-1",
metadata={},
litellm_params={"metadata": {}},
output=None,
start_time=datetime.datetime.utcnow(),
end_time=datetime.datetime.utcnow(),
kwargs=kwargs,
optional_params={},
input=None,
response_obj=None,
level="INFO",
litellm_call_id="call-id-xyz",
)
assert self.last_trace_kwargs.get("id") == "call-id-xyz"
def test_log_langfuse_v2_uses_litellm_trace_id_fallback_over_call_id(self):
"""
When standard_logging_object has no trace_id, but kwargs contains
litellm_trace_id (the same ID the DB stores as Session ID), Langfuse
should use litellm_trace_id — NOT litellm_call_id. This ensures the
trace_id in Langfuse matches the Session ID shown in LiteLLM logs.
"""
payload = self._build_standard_logging_payload() # no trace_id
kwargs = self._build_langfuse_kwargs(payload)
kwargs["litellm_trace_id"] = "trace-id-from-kwargs"
self.last_trace_kwargs = {}
with patch(
"litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
side_effect=lambda generation_params, **kwargs: generation_params,
create=True,
):
self.logger._log_langfuse_v2(
user_id="user-1",
metadata={},
litellm_params={"metadata": {}},
output=None,
start_time=datetime.datetime.utcnow(),
end_time=datetime.datetime.utcnow(),
kwargs=kwargs,
optional_params={},
input=None,
response_obj=None,
level="ERROR",
litellm_call_id="call-id-xyz",
)
# litellm_trace_id should be preferred over litellm_call_id
assert self.last_trace_kwargs.get("id") == "trace-id-from-kwargs"
def test_log_langfuse_v2_uses_litellm_trace_id_when_standard_logging_object_none(self):
"""
When standard_logging_object is None (failure case where
get_standard_logging_object_payload threw), litellm_trace_id from kwargs
should be used as the Langfuse trace_id. This matches the DB Session ID.
"""
kwargs = {
"standard_logging_object": None,
"model": "gpt-4",
"call_type": "completion",
"cache_hit": False,
"messages": [],
"litellm_trace_id": "trace-id-failure",
}
self.last_trace_kwargs = {}
with patch(
"litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
side_effect=lambda generation_params, **kwargs: generation_params,
create=True,
):
self.logger._log_langfuse_v2(
user_id="user-1",
metadata={},
litellm_params={"metadata": {}},
output=None,
start_time=datetime.datetime.utcnow(),
end_time=datetime.datetime.utcnow(),
kwargs=kwargs,
optional_params={},
input=None,
response_obj=None,
level="ERROR",
litellm_call_id="call-id-different",
)
# Must use litellm_trace_id, not litellm_call_id
assert self.last_trace_kwargs.get("id") == "trace-id-failure"
def test_log_langfuse_v2_session_id_passed_as_trace_session_id(self):
"""
Test that metadata.session_id is correctly passed as trace_params["session_id"]
for Langfuse session grouping, and does NOT override trace_id.
Each LLM call should get its own unique trace_id while sharing the session_id.
"""
payload = self._build_standard_logging_payload(trace_id="std-trace-123")
kwargs = self._build_langfuse_kwargs(payload)
self.last_trace_kwargs = {}
with patch(
"litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
side_effect=lambda generation_params, **kwargs: generation_params,
create=True,
):
self.logger._log_langfuse_v2(
user_id="user-1",
metadata={"session_id": "my-session-abc"},
litellm_params={"metadata": {"session_id": "my-session-abc"}},
output=None,
start_time=datetime.datetime.utcnow(),
end_time=datetime.datetime.utcnow(),
kwargs=kwargs,
optional_params={},
input=None,
response_obj=None,
level="INFO",
litellm_call_id="call-id-456",
)
# session_id should be set for Langfuse session grouping
assert self.last_trace_kwargs.get("session_id") == "my-session-abc"
# trace_id should remain the standard trace_id, NOT the session_id
assert self.last_trace_kwargs.get("id") == "std-trace-123"
def test_log_langfuse_v2_session_id_preserved_for_error_level(self):
"""
Test that session_id is correctly passed in trace_params even when
the log level is ERROR (failure case). This verifies the fix for
failed requests losing session_id mapping in Langfuse.
"""
payload = self._build_standard_logging_payload(trace_id="std-trace-err")
kwargs = self._build_langfuse_kwargs(payload)
self.last_trace_kwargs = {}
with patch(
"litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
side_effect=lambda generation_params, **kwargs: generation_params,
create=True,
):
self.logger._log_langfuse_v2(
user_id="user-1",
metadata={"session_id": "error-session-xyz"},
litellm_params={"metadata": {"session_id": "error-session-xyz"}},
output="BadRequestError: model not found",
start_time=datetime.datetime.utcnow(),
end_time=datetime.datetime.utcnow(),
kwargs=kwargs,
optional_params={},
input={"messages": [{"role": "user", "content": "test"}]},
response_obj=None,
level="ERROR",
litellm_call_id="call-id-err-789",
)
# session_id must be preserved even for ERROR level logs
assert self.last_trace_kwargs.get("session_id") == "error-session-xyz"
# trace_id should be the standard trace_id, not the session_id
assert self.last_trace_kwargs.get("id") == "std-trace-err"
# status_message should be set for error traces
assert self.last_trace_kwargs.get("status_message") is not None
def test_log_langfuse_v2_explicit_trace_id_takes_priority_over_session_id(self):
"""
Test that when both trace_id and session_id are provided in metadata,
trace_id takes priority as the trace identifier.
"""
payload = self._build_standard_logging_payload()
kwargs = self._build_langfuse_kwargs(payload)
self.last_trace_kwargs = {}
with patch(
"litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
side_effect=lambda generation_params, **kwargs: generation_params,
create=True,
):
self.logger._log_langfuse_v2(
user_id="user-1",
metadata={
"session_id": "session-999",
"trace_id": "explicit-trace-id-777",
},
litellm_params={
"metadata": {
"session_id": "session-999",
"trace_id": "explicit-trace-id-777",
}
},
output=None,
start_time=datetime.datetime.utcnow(),
end_time=datetime.datetime.utcnow(),
kwargs=kwargs,
optional_params={},
input=None,
response_obj=None,
level="DEFAULT",
litellm_call_id="call-id-aaa",
)
# Explicit trace_id must take priority
assert self.last_trace_kwargs.get("id") == "explicit-trace-id-777"
# session_id must still be set for session grouping
assert self.last_trace_kwargs.get("session_id") == "session-999"
def test_failure_handler_langfuse_kwargs_excludes_original_response():
"""
Test that the actual Logging.failure_handler() passes kwargs without
'original_response' to the Langfuse logger. Exercises the real code path
rather than simulating the filtering logic.
"""
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging
# Create a Logging instance
logging_obj = Logging(
model="gpt-4",
messages=[{"role": "user", "content": "test"}],
stream=False,
call_type="completion",
start_time=datetime.datetime.utcnow(),
litellm_call_id="test-call-id-failure",
function_id="test-function-id",
)
# Set up model_call_details with original_response (simulates a coroutine)
mock_coroutine = MagicMock()
logging_obj.model_call_details["original_response"] = mock_coroutine
logging_obj.model_call_details["litellm_params"] = {
"metadata": {"session_id": "test-session-failure"},
"litellm_session_id": None,
}
logging_obj.model_call_details["optional_params"] = {}
# Capture what gets passed to log_event_on_langfuse
captured_kwargs = {}
mock_langfuse_logger = MagicMock()
def capture_log_event(**log_kwargs):
captured_kwargs.update(log_kwargs)
return {"trace_id": "mock-trace-id", "generation_id": "mock-gen-id"}
mock_langfuse_logger.log_event_on_langfuse.side_effect = capture_log_event
# Set "langfuse" as a failure callback so the failure_handler processes it
original_failure_callback = litellm.failure_callback
litellm.failure_callback = ["langfuse"]
try:
# Mock LangFuseHandler to return our capturing mock logger
with patch(
"litellm.litellm_core_utils.litellm_logging.LangFuseHandler"
) as mock_handler_class:
mock_handler_class.get_langfuse_logger_for_request.return_value = (
mock_langfuse_logger
)
# Call the actual failure_handler
test_exception = Exception("TestError: model not found")
logging_obj.failure_handler(
exception=test_exception,
traceback_exception="Traceback: test",
start_time=datetime.datetime.utcnow(),
end_time=datetime.datetime.utcnow(),
)
# Verify log_event_on_langfuse was actually called
assert mock_langfuse_logger.log_event_on_langfuse.called, (
"log_event_on_langfuse was not called"
)
# Verify original_response is NOT in the kwargs passed to Langfuse
langfuse_kwargs = captured_kwargs.get("kwargs", {})
assert "original_response" not in langfuse_kwargs, (
"original_response should be excluded from kwargs passed to Langfuse"
)
# Verify session_id metadata is preserved in the kwargs
langfuse_metadata = langfuse_kwargs.get("litellm_params", {}).get(
"metadata", {}
)
assert langfuse_metadata.get("session_id") == "test-session-failure", (
"session_id should be preserved in kwargs passed to Langfuse"
)
# Verify level is ERROR
assert captured_kwargs.get("level") == "ERROR"
finally:
litellm.failure_callback = original_failure_callback
@pytest.mark.asyncio
async def test_async_log_failure_event_logs_to_langfuse():
"""
Test that LangfusePromptManagement.async_log_failure_event() calls
log_event_on_langfuse with level=ERROR even when standard_logging_object
is present. This is the code path the proxy uses for failed LLM calls.
"""
from litellm.integrations.langfuse.langfuse_prompt_management import (
LangfusePromptManagement,
)
mock_langfuse_module = MagicMock()
mock_langfuse_module.version.__version__ = "3.0.0"
with patch.dict(
"os.environ",
{
"LANGFUSE_SECRET_KEY": "test-secret",
"LANGFUSE_PUBLIC_KEY": "test-public",
"LANGFUSE_HOST": "https://test.langfuse.com",
},
), patch.dict("sys.modules", {"langfuse": mock_langfuse_module}):
prompt_mgmt = LangfusePromptManagement()
# Mock the langfuse logger returned by get_langfuse_logger_for_request
mock_logger = MagicMock()
mock_logger.log_event_on_langfuse.return_value = {
"trace_id": "mock-trace",
"generation_id": "mock-gen",
}
with patch(
"litellm.integrations.langfuse.langfuse_prompt_management.LangFuseHandler"
) as mock_handler:
mock_handler.get_langfuse_logger_for_request.return_value = mock_logger
kwargs = {
"litellm_params": {
"metadata": {"session_id": "test-session-fail"},
},
"litellm_call_id": "call-fail-123",
"user": "test-user",
"exception": Exception("API error: model not found"),
"standard_logging_object": {
"error_str": "API error: model not found",
"trace_id": "std-trace-fail",
"metadata": {},
},
}
await prompt_mgmt.async_log_failure_event(
kwargs=kwargs,
response_obj=None,
start_time=datetime.datetime.utcnow(),
end_time=datetime.datetime.utcnow(),
)
# Verify log_event_on_langfuse was called
assert mock_logger.log_event_on_langfuse.called, (
"log_event_on_langfuse was not called for failure event"
)
call_kwargs = mock_logger.log_event_on_langfuse.call_args[1]
assert call_kwargs["level"] == "ERROR"
assert call_kwargs["status_message"] == "API error: model not found"
assert call_kwargs["response_obj"] is None
@pytest.mark.asyncio
async def test_async_log_failure_event_works_without_standard_logging_object():
"""
Test that async_log_failure_event() still logs to Langfuse even when
standard_logging_object is None (e.g. when get_standard_logging_object_payload
threw an exception). This is the critical fix — before, it silently returned.
"""
from litellm.integrations.langfuse.langfuse_prompt_management import (
LangfusePromptManagement,
)
mock_langfuse_module = MagicMock()
mock_langfuse_module.version.__version__ = "3.0.0"
with patch.dict(
"os.environ",
{
"LANGFUSE_SECRET_KEY": "test-secret",
"LANGFUSE_PUBLIC_KEY": "test-public",
"LANGFUSE_HOST": "https://test.langfuse.com",
},
), patch.dict("sys.modules", {"langfuse": mock_langfuse_module}):
prompt_mgmt = LangfusePromptManagement()
mock_logger = MagicMock()
mock_logger.log_event_on_langfuse.return_value = {
"trace_id": "mock-trace",
"generation_id": "mock-gen",
}
with patch(
"litellm.integrations.langfuse.langfuse_prompt_management.LangFuseHandler"
) as mock_handler:
mock_handler.get_langfuse_logger_for_request.return_value = mock_logger
kwargs = {
"litellm_params": {
"metadata": {"session_id": "test-session-no-slo"},
},
"litellm_call_id": "call-no-slo-456",
"user": "test-user",
"exception": Exception("InternalServerError: something broke"),
"standard_logging_object": None, # This is the key — it's None
}
await prompt_mgmt.async_log_failure_event(
kwargs=kwargs,
response_obj=None,
start_time=datetime.datetime.utcnow(),
end_time=datetime.datetime.utcnow(),
)
# CRITICAL: log_event_on_langfuse MUST still be called
assert mock_logger.log_event_on_langfuse.called, (
"log_event_on_langfuse was NOT called when standard_logging_object "
"is None — failure trace would be silently dropped"
)
call_kwargs = mock_logger.log_event_on_langfuse.call_args[1]
assert call_kwargs["level"] == "ERROR"
# Falls back to exception from kwargs
assert "InternalServerError" in call_kwargs["status_message"]
def test_max_langfuse_clients_limit():
"""
Test that the max langfuse clients limit is respected when initializing multiple clients
"""
# Mock langfuse package to avoid triggering real import.
# The real langfuse import fails on Python 3.14 due to pydantic v1 incompatibility,
# and sys.modules["langfuse"] may be absent after other tests in the suite clean up.
mock_langfuse = MagicMock()
mock_langfuse.version.__version__ = "3.0.0"
# Set max clients to 2 for testing
original_initialized_langfuse_clients = litellm.initialized_langfuse_clients
with patch.dict("sys.modules", {"langfuse": mock_langfuse}), patch.object(
langfuse_module, "MAX_LANGFUSE_INITIALIZED_CLIENTS", 2
):
# Reset the counter
litellm.initialized_langfuse_clients = 0
# First client should succeed
logger1 = LangFuseLogger(
langfuse_public_key="test_key_1",
langfuse_secret="test_secret_1",
langfuse_host="https://test1.langfuse.com",
)
assert litellm.initialized_langfuse_clients == 1
# Second client should succeed
logger2 = LangFuseLogger(
langfuse_public_key="test_key_2",
langfuse_secret="test_secret_2",
langfuse_host="https://test2.langfuse.com",
)
assert litellm.initialized_langfuse_clients == 2
# Third client should fail with exception
with pytest.raises(Exception) as exc_info:
logger3 = LangFuseLogger(
langfuse_public_key="test_key_3",
langfuse_secret="test_secret_3",
langfuse_host="https://test3.langfuse.com",
)
# Verify the error message contains the expected text
assert "Max langfuse clients reached" in str(exc_info.value)
# Counter should still be 2 (third client failed to initialize)
assert litellm.initialized_langfuse_clients == 2
litellm.initialized_langfuse_clients = original_initialized_langfuse_clients