litellm/tests/test_litellm/integrations/test_langfuse.py

import datetime
import os
import sys
import types
import unittest
from typing import Optional
from unittest.mock import MagicMock, patch

import pytest

import litellm
from litellm.integrations.langfuse import langfuse as langfuse_module
from litellm.integrations.langfuse.langfuse import LangFuseLogger

sys.path.insert(0, os.path.abspath("../.."))
from litellm.integrations.langfuse.langfuse import LangFuseLogger

# Import LangfuseUsageDetails directly from the module where it's defined
from litellm.types.integrations.langfuse import *


class TestLangfuseUsageDetails(unittest.TestCase):
    def setUp(self):
        # Save global Langfuse client counter to restore after test
        self._original_langfuse_clients_count = litellm.initialized_langfuse_clients

        # Set up environment variables for testing
        self.env_patcher = patch.dict(
            "os.environ",
            {
                "LANGFUSE_SECRET_KEY": "test-secret-key",
                "LANGFUSE_PUBLIC_KEY": "test-public-key",
                "LANGFUSE_HOST": "https://test.langfuse.com",
            },
        )
        self.env_patcher.start()

        # Create mock objects
        self.mock_langfuse_client = MagicMock()
        # Mock the client attribute to prevent errors during logger initialization
        self.mock_langfuse_client.client = MagicMock()
        self.mock_langfuse_trace = MagicMock()
        self.mock_langfuse_generation = MagicMock()
        self.mock_langfuse_generation.trace_id = "test-trace-id"

        # Mock span method for trace (used by log_provider_specific_information_as_span and _log_guardrail_information_as_span)
        self.mock_langfuse_span = MagicMock()
        self.mock_langfuse_span.end = MagicMock()
        self.mock_langfuse_trace.span.return_value = self.mock_langfuse_span

        # Setup the trace and generation chain
        self.mock_langfuse_trace.generation.return_value = self.mock_langfuse_generation
        self.last_trace_kwargs = {}

        def _trace_side_effect(*args, **kwargs):
            self.last_trace_kwargs = kwargs
            return self.mock_langfuse_trace

        self.mock_langfuse_client.trace.side_effect = _trace_side_effect

        # Mock the langfuse module that's imported locally in methods
        self.langfuse_module_patcher = patch.dict(
            "sys.modules", {"langfuse": MagicMock()}
        )
        self.mock_langfuse_module = self.langfuse_module_patcher.start()

        # Create a mock for the langfuse module with version
        self.mock_langfuse = MagicMock()
        self.mock_langfuse.version = MagicMock()
        self.mock_langfuse.version.__version__ = (
            "3.0.0"  # Set a version that supports all features
        )

        # Mock the Langfuse class
        self.mock_langfuse_class = MagicMock()
        self.mock_langfuse_class.return_value = self.mock_langfuse_client

        # Set up the sys.modules['langfuse'] mock
        sys.modules["langfuse"] = self.mock_langfuse
        sys.modules["langfuse"].Langfuse = self.mock_langfuse_class

        # Create a fresh logger instance for each test
        self.logger = LangFuseLogger()

        # Explicitly set the Langfuse client to our mock
        self.logger.Langfuse = self.mock_langfuse_client
        # Ensure langfuse_sdk_version is set correctly for _supports_* methods
        self.logger.langfuse_sdk_version = "3.0.0"

        # Add the log_event_on_langfuse method to the instance
        def log_event_on_langfuse(
            self,
            kwargs,
            response_obj,
            start_time=None,
            end_time=None,
            user_id=None,
            level="DEFAULT",
            status_message=None,
        ):
            # This implementation calls _log_langfuse_v2 directly
            return self._log_langfuse_v2(
                user_id=user_id,
                metadata=kwargs.get("litellm_params", {}).get("metadata", {}),
                litellm_params=kwargs.get("litellm_params", {}),
                output=None,
                start_time=start_time,
                end_time=end_time,
                kwargs=kwargs,
                optional_params=kwargs.get("optional_params", {}),
                input=None,
                response_obj=response_obj,
                level=level,
                litellm_call_id=kwargs.get("litellm_call_id", None),
            )

        # Bind the method to the instance
        self.logger.log_event_on_langfuse = types.MethodType(
            log_event_on_langfuse, self.logger
        )

        # Make sure _is_langfuse_v2 returns True
        def mock_is_langfuse_v2(self):
            return True

        self.logger._is_langfuse_v2 = types.MethodType(mock_is_langfuse_v2, self.logger)

    def tearDown(self):
        # Clean up logger instance to prevent state leakage
        if hasattr(self, 'logger'):
            # Reset logger's Langfuse client to break any references
            self.logger.Langfuse = None
            # Delete logger instance to ensure complete cleanup
            del self.logger

        # Restore global Langfuse client counter to prevent cross-test pollution
        litellm.initialized_langfuse_clients = self._original_langfuse_clients_count

        self.env_patcher.stop()
        self.langfuse_module_patcher.stop()  # patch.dict automatically restores sys.modules

    def test_langfuse_usage_details_type(self):
        """Test that LangfuseUsageDetails TypedDict is properly defined with the correct fields"""
        # Create an instance of LangfuseUsageDetails
        usage_details: LangfuseUsageDetails = {
            "input": 10,
            "output": 20,
            "total": 30,
            "cache_creation_input_tokens": 5,
            "cache_read_input_tokens": 3,
        }

        # Verify all fields are present
        self.assertEqual(usage_details["input"], 10)
        self.assertEqual(usage_details["output"], 20)
        self.assertEqual(usage_details["total"], 30)
        self.assertEqual(usage_details["cache_creation_input_tokens"], 5)
        self.assertEqual(usage_details["cache_read_input_tokens"], 3)

        # Test with all fields (all fields are required in TypedDict by default)
        minimal_usage_details: LangfuseUsageDetails = {
            "input": 10,
            "output": 20,
            "total": 30,
            "cache_creation_input_tokens": 0,
            "cache_read_input_tokens": 0,
        }

        self.assertEqual(minimal_usage_details["input"], 10)
        self.assertEqual(minimal_usage_details["output"], 20)
        self.assertEqual(minimal_usage_details["total"], 30)

    def test_log_langfuse_v2_usage_details(self):
        """Test that usage_details in _log_langfuse_v2 is correctly typed and assigned"""
        # Create a mock response object with usage information
        response_obj = MagicMock()
        response_obj.usage = MagicMock()
        response_obj.usage.prompt_tokens = 15
        response_obj.usage.completion_tokens = 25

        # Add the cache token attributes using get method
        def mock_get(key, default=None):
            if key == "cache_creation_input_tokens":
                return 7
            elif key == "cache_read_input_tokens":
                return 4
            return default

        response_obj.usage.get = mock_get

        # Create kwargs for the log_event method
        kwargs = {
            "model": "gpt-4",
            "messages": [{"role": "user", "content": "Hello"}],
            "litellm_params": {"metadata": {}},
        }

        # Create start and end times
        start_time = datetime.datetime.now()
        end_time = start_time + datetime.timedelta(seconds=1)

        # Call the log_event method
        with patch.object(self.logger, "_log_langfuse_v2") as mock_log_langfuse_v2:
            self.logger.log_event_on_langfuse(
                kwargs=kwargs,
                response_obj=response_obj,
                start_time=start_time,
                end_time=end_time,
            )

            # Check if _log_langfuse_v2 was called
            mock_log_langfuse_v2.assert_called_once()

            # Get the arguments passed to _log_langfuse_v2
            call_args = mock_log_langfuse_v2.call_args[1]

            # Verify response_obj was passed correctly
            self.assertEqual(call_args["response_obj"], response_obj)

    def test_langfuse_usage_details_optional_fields(self):
        """Test that LangfuseUsageDetails fields are properly defined as Optional"""
        # Create an instance with None values for optional fields
        usage_details: LangfuseUsageDetails = {
            "input": 10,
            "output": 20,
            "total": 30,
            "cache_creation_input_tokens": None,
            "cache_read_input_tokens": None,
        }

        # Verify fields can be None
        self.assertEqual(usage_details["input"], 10)
        self.assertEqual(usage_details["output"], 20)
        self.assertEqual(usage_details["total"], 30)
        self.assertIsNone(usage_details["cache_creation_input_tokens"])
        self.assertIsNone(usage_details["cache_read_input_tokens"])

    def test_langfuse_usage_details_structure(self):
        """Test that LangfuseUsageDetails has the correct structure as defined in the commit"""
        # This test directly verifies the structure of the TypedDict
        # without relying on the LangFuseLogger class

        # Create a dictionary that matches the LangfuseUsageDetails structure
        usage_details = {
            "input": 15,
            "output": 25,
            "total": 40,
            "cache_creation_input_tokens": 7,
            "cache_read_input_tokens": 4,
        }

        # Verify the structure matches what we expect
        self.assertIn("input", usage_details)
        self.assertIn("output", usage_details)
        self.assertIn("total", usage_details)
        self.assertIn("cache_creation_input_tokens", usage_details)
        self.assertIn("cache_read_input_tokens", usage_details)

        # Verify the values
        self.assertEqual(usage_details["input"], 15)
        self.assertEqual(usage_details["output"], 25)
        self.assertEqual(usage_details["total"], 40)
        self.assertEqual(usage_details["cache_creation_input_tokens"], 7)
        self.assertEqual(usage_details["cache_read_input_tokens"], 4)

    def test_log_langfuse_v2_handles_null_usage_values(self):
        """
        Test that _log_langfuse_v2 correctly handles None values in the usage object
        by converting them to 0, preventing validation errors.
        """
        # Reset the mock to ensure clean state; clear side_effect so return_value takes effect
        self.mock_langfuse_client.reset_mock(side_effect=True)
        self.mock_langfuse_trace.reset_mock(side_effect=True)
        self.mock_langfuse_generation.reset_mock(side_effect=True)

        # Re-setup the trace and generation chain with clean state
        self.mock_langfuse_generation.trace_id = "test-trace-id"
        mock_span = MagicMock()
        mock_span.end = MagicMock()
        self.mock_langfuse_trace.span.return_value = mock_span
        self.mock_langfuse_trace.generation.return_value = self.mock_langfuse_generation

        # Ensure trace returns our mock
        self.mock_langfuse_client.trace.return_value = self.mock_langfuse_trace
        self.logger.Langfuse = self.mock_langfuse_client

        with patch(
            "litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
            side_effect=lambda generation_params, **kwargs: generation_params,
            create=True,
        ) as mock_add_prompt_params, patch.object(
            self.logger, "_supports_prompt", return_value=True
        ):
            # Create a mock response object with usage information containing None values
            response_obj = MagicMock()
            response_obj.usage = MagicMock()
            response_obj.usage.prompt_tokens = None
            response_obj.usage.completion_tokens = None
            response_obj.usage.total_tokens = None

            # Mock the .get() method to return None for cache-related fields
            def mock_get(key, default=None):
                if key in ["cache_creation_input_tokens", "cache_read_input_tokens"]:
                    return None
                return default

            response_obj.usage.get = mock_get

            # Prepare standard kwargs for the call
            kwargs = {
                "model": "gpt-4-null-usage",
                "messages": [{"role": "user", "content": "Test"}],
                "litellm_params": {"metadata": {}},
                "optional_params": {},
                "litellm_call_id": "test-call-id-null-usage",
                "standard_logging_object": None,
                "response_cost": 0.0,
            }

            # Use fixed timestamps to avoid timing-related flakiness
            fixed_time = datetime.datetime(2024, 1, 1, 12, 0, 0)

            # Call the method under test
            try:
                self.logger._log_langfuse_v2(
                    user_id="test-user",
                    metadata={},
                    litellm_params=kwargs["litellm_params"],
                    output={"role": "assistant", "content": "Response"},
                    start_time=fixed_time,
                    end_time=fixed_time + datetime.timedelta(seconds=1),
                    kwargs=kwargs,
                    optional_params=kwargs["optional_params"],
                    input={"messages": kwargs["messages"]},
                    response_obj=response_obj,
                    level="DEFAULT",
                    litellm_call_id=kwargs["litellm_call_id"],
                )
            except Exception as e:
                self.fail(f"_log_langfuse_v2 raised an exception: {e}")

            # Verify that trace was called first
            self.mock_langfuse_client.trace.assert_called()

            #  Check the arguments passed to the mocked langfuse generation call
            self.mock_langfuse_trace.generation.assert_called_once()
            call_args, call_kwargs = self.mock_langfuse_trace.generation.call_args

            #  Inspect the usage and usage_details dictionaries
            usage_arg = call_kwargs.get("usage")
            usage_details_arg = call_kwargs.get("usage_details")

            self.assertIsNotNone(usage_arg)
            self.assertIsNotNone(usage_details_arg)

            # Verify that None values were converted to 0
            self.assertEqual(usage_arg["prompt_tokens"], 0)
            self.assertEqual(usage_arg["completion_tokens"], 0)

            self.assertEqual(usage_details_arg["input"], 0)
            self.assertEqual(usage_details_arg["output"], 0)
            self.assertEqual(usage_details_arg["total"], 0)
            self.assertEqual(usage_details_arg["cache_creation_input_tokens"], 0)
            self.assertEqual(usage_details_arg["cache_read_input_tokens"], 0)

            mock_add_prompt_params.assert_called_once()

    def _build_standard_logging_payload(self, trace_id: Optional[str] = None):
        payload = {
            "id": "payload-id",
            "call_type": "completion",
            "response_cost": 0.0,
            "status": "success",
            "total_tokens": 0,
            "prompt_tokens": 0,
            "completion_tokens": 0,
            "startTime": 0.0,
            "endTime": 0.0,
            "completionStartTime": 0.0,
            "model": "gpt-4",
            "model_id": "model-123",
            "model_group": "openai",
            "api_base": "https://api.openai.com",
            "metadata": {
                "user_api_key_end_user_id": None,
                "prompt_management_metadata": None,
                "session_id": None,
                "trace_name": None,
                "trace_version": None,
                "headers": None,
                "endpoint": None,
                "caching_groups": None,
                "previous_models": None,
            },
            "hidden_params": {},
            "request_tags": [],
            "messages": [],
            "response": {"id": "resp"},
            "model_parameters": {},
            "guardrail_information": None,
            "standard_built_in_tools_params": None,
        }
        if trace_id is not None:
            payload["trace_id"] = trace_id
        return payload

    def _build_langfuse_kwargs(self, standard_logging_payload):
        return {
            "standard_logging_object": standard_logging_payload,
            "model": standard_logging_payload["model"],
            "call_type": standard_logging_payload["call_type"],
            "cache_hit": False,
            "messages": [],
        }

    def test_log_langfuse_v2_uses_standard_trace_id_when_available(self):
        payload = self._build_standard_logging_payload(trace_id="std-trace-id")
        kwargs = self._build_langfuse_kwargs(payload)
        self.last_trace_kwargs = {}

        with patch(
            "litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
            side_effect=lambda generation_params, **kwargs: generation_params,
            create=True,
        ):
            self.logger._log_langfuse_v2(
                user_id="user-1",
                metadata={},
                litellm_params={"metadata": {}},
                output=None,
                start_time=datetime.datetime.utcnow(),
                end_time=datetime.datetime.utcnow(),
                kwargs=kwargs,
                optional_params={},
                input=None,
                response_obj=None,
                level="INFO",
                litellm_call_id="call-id-xyz",
            )

        assert self.last_trace_kwargs.get("id") == "std-trace-id"

    def test_log_langfuse_v2_defaults_to_call_id_without_standard_trace_id(self):
        payload = self._build_standard_logging_payload()
        kwargs = self._build_langfuse_kwargs(payload)
        self.last_trace_kwargs = {}

        with patch(
            "litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
            side_effect=lambda generation_params, **kwargs: generation_params,
            create=True,
        ):
            self.logger._log_langfuse_v2(
                user_id="user-1",
                metadata={},
                litellm_params={"metadata": {}},
                output=None,
                start_time=datetime.datetime.utcnow(),
                end_time=datetime.datetime.utcnow(),
                kwargs=kwargs,
                optional_params={},
                input=None,
                response_obj=None,
                level="INFO",
                litellm_call_id="call-id-xyz",
            )

        assert self.last_trace_kwargs.get("id") == "call-id-xyz"

    def test_log_langfuse_v2_uses_litellm_trace_id_fallback_over_call_id(self):
        """
        When standard_logging_object has no trace_id, but kwargs contains
        litellm_trace_id (the same ID the DB stores as Session ID), Langfuse
        should use litellm_trace_id — NOT litellm_call_id. This ensures the
        trace_id in Langfuse matches the Session ID shown in LiteLLM logs.
        """
        payload = self._build_standard_logging_payload()  # no trace_id
        kwargs = self._build_langfuse_kwargs(payload)
        kwargs["litellm_trace_id"] = "trace-id-from-kwargs"
        self.last_trace_kwargs = {}

        with patch(
            "litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
            side_effect=lambda generation_params, **kwargs: generation_params,
            create=True,
        ):
            self.logger._log_langfuse_v2(
                user_id="user-1",
                metadata={},
                litellm_params={"metadata": {}},
                output=None,
                start_time=datetime.datetime.utcnow(),
                end_time=datetime.datetime.utcnow(),
                kwargs=kwargs,
                optional_params={},
                input=None,
                response_obj=None,
                level="ERROR",
                litellm_call_id="call-id-xyz",
            )

        # litellm_trace_id should be preferred over litellm_call_id
        assert self.last_trace_kwargs.get("id") == "trace-id-from-kwargs"

    def test_log_langfuse_v2_uses_litellm_trace_id_when_standard_logging_object_none(self):
        """
        When standard_logging_object is None (failure case where
        get_standard_logging_object_payload threw), litellm_trace_id from kwargs
        should be used as the Langfuse trace_id. This matches the DB Session ID.
        """
        kwargs = {
            "standard_logging_object": None,
            "model": "gpt-4",
            "call_type": "completion",
            "cache_hit": False,
            "messages": [],
            "litellm_trace_id": "trace-id-failure",
        }
        self.last_trace_kwargs = {}

        with patch(
            "litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
            side_effect=lambda generation_params, **kwargs: generation_params,
            create=True,
        ):
            self.logger._log_langfuse_v2(
                user_id="user-1",
                metadata={},
                litellm_params={"metadata": {}},
                output=None,
                start_time=datetime.datetime.utcnow(),
                end_time=datetime.datetime.utcnow(),
                kwargs=kwargs,
                optional_params={},
                input=None,
                response_obj=None,
                level="ERROR",
                litellm_call_id="call-id-different",
            )

        # Must use litellm_trace_id, not litellm_call_id
        assert self.last_trace_kwargs.get("id") == "trace-id-failure"

    def test_log_langfuse_v2_session_id_passed_as_trace_session_id(self):
        """
        Test that metadata.session_id is correctly passed as trace_params["session_id"]
        for Langfuse session grouping, and does NOT override trace_id.
        Each LLM call should get its own unique trace_id while sharing the session_id.
        """
        payload = self._build_standard_logging_payload(trace_id="std-trace-123")
        kwargs = self._build_langfuse_kwargs(payload)
        self.last_trace_kwargs = {}

        with patch(
            "litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
            side_effect=lambda generation_params, **kwargs: generation_params,
            create=True,
        ):
            self.logger._log_langfuse_v2(
                user_id="user-1",
                metadata={"session_id": "my-session-abc"},
                litellm_params={"metadata": {"session_id": "my-session-abc"}},
                output=None,
                start_time=datetime.datetime.utcnow(),
                end_time=datetime.datetime.utcnow(),
                kwargs=kwargs,
                optional_params={},
                input=None,
                response_obj=None,
                level="INFO",
                litellm_call_id="call-id-456",
            )

        # session_id should be set for Langfuse session grouping
        assert self.last_trace_kwargs.get("session_id") == "my-session-abc"
        # trace_id should remain the standard trace_id, NOT the session_id
        assert self.last_trace_kwargs.get("id") == "std-trace-123"

    def test_log_langfuse_v2_session_id_preserved_for_error_level(self):
        """
        Test that session_id is correctly passed in trace_params even when
        the log level is ERROR (failure case). This verifies the fix for
        failed requests losing session_id mapping in Langfuse.
        """
        payload = self._build_standard_logging_payload(trace_id="std-trace-err")
        kwargs = self._build_langfuse_kwargs(payload)
        self.last_trace_kwargs = {}

        with patch(
            "litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
            side_effect=lambda generation_params, **kwargs: generation_params,
            create=True,
        ):
            self.logger._log_langfuse_v2(
                user_id="user-1",
                metadata={"session_id": "error-session-xyz"},
                litellm_params={"metadata": {"session_id": "error-session-xyz"}},
                output="BadRequestError: model not found",
                start_time=datetime.datetime.utcnow(),
                end_time=datetime.datetime.utcnow(),
                kwargs=kwargs,
                optional_params={},
                input={"messages": [{"role": "user", "content": "test"}]},
                response_obj=None,
                level="ERROR",
                litellm_call_id="call-id-err-789",
            )

        # session_id must be preserved even for ERROR level logs
        assert self.last_trace_kwargs.get("session_id") == "error-session-xyz"
        # trace_id should be the standard trace_id, not the session_id
        assert self.last_trace_kwargs.get("id") == "std-trace-err"
        # status_message should be set for error traces
        assert self.last_trace_kwargs.get("status_message") is not None

    def test_log_langfuse_v2_explicit_trace_id_takes_priority_over_session_id(self):
        """
        Test that when both trace_id and session_id are provided in metadata,
        trace_id takes priority as the trace identifier.
        """
        payload = self._build_standard_logging_payload()
        kwargs = self._build_langfuse_kwargs(payload)
        self.last_trace_kwargs = {}

        with patch(
            "litellm.integrations.langfuse.langfuse._add_prompt_to_generation_params",
            side_effect=lambda generation_params, **kwargs: generation_params,
            create=True,
        ):
            self.logger._log_langfuse_v2(
                user_id="user-1",
                metadata={
                    "session_id": "session-999",
                    "trace_id": "explicit-trace-id-777",
                },
                litellm_params={
                    "metadata": {
                        "session_id": "session-999",
                        "trace_id": "explicit-trace-id-777",
                    }
                },
                output=None,
                start_time=datetime.datetime.utcnow(),
                end_time=datetime.datetime.utcnow(),
                kwargs=kwargs,
                optional_params={},
                input=None,
                response_obj=None,
                level="DEFAULT",
                litellm_call_id="call-id-aaa",
            )

        # Explicit trace_id must take priority
        assert self.last_trace_kwargs.get("id") == "explicit-trace-id-777"
        # session_id must still be set for session grouping
        assert self.last_trace_kwargs.get("session_id") == "session-999"


def test_failure_handler_langfuse_kwargs_excludes_original_response():
    """
    Test that the actual Logging.failure_handler() passes kwargs without
    'original_response' to the Langfuse logger. Exercises the real code path
    rather than simulating the filtering logic.
    """
    import litellm
    from litellm.litellm_core_utils.litellm_logging import Logging

    # Create a Logging instance
    logging_obj = Logging(
        model="gpt-4",
        messages=[{"role": "user", "content": "test"}],
        stream=False,
        call_type="completion",
        start_time=datetime.datetime.utcnow(),
        litellm_call_id="test-call-id-failure",
        function_id="test-function-id",
    )

    # Set up model_call_details with original_response (simulates a coroutine)
    mock_coroutine = MagicMock()
    logging_obj.model_call_details["original_response"] = mock_coroutine
    logging_obj.model_call_details["litellm_params"] = {
        "metadata": {"session_id": "test-session-failure"},
        "litellm_session_id": None,
    }
    logging_obj.model_call_details["optional_params"] = {}

    # Capture what gets passed to log_event_on_langfuse
    captured_kwargs = {}
    mock_langfuse_logger = MagicMock()

    def capture_log_event(**log_kwargs):
        captured_kwargs.update(log_kwargs)
        return {"trace_id": "mock-trace-id", "generation_id": "mock-gen-id"}

    mock_langfuse_logger.log_event_on_langfuse.side_effect = capture_log_event

    # Set "langfuse" as a failure callback so the failure_handler processes it
    original_failure_callback = litellm.failure_callback
    litellm.failure_callback = ["langfuse"]

    try:
        # Mock LangFuseHandler to return our capturing mock logger
        with patch(
            "litellm.litellm_core_utils.litellm_logging.LangFuseHandler"
        ) as mock_handler_class:
            mock_handler_class.get_langfuse_logger_for_request.return_value = (
                mock_langfuse_logger
            )

            # Call the actual failure_handler
            test_exception = Exception("TestError: model not found")
            logging_obj.failure_handler(
                exception=test_exception,
                traceback_exception="Traceback: test",
                start_time=datetime.datetime.utcnow(),
                end_time=datetime.datetime.utcnow(),
            )

        # Verify log_event_on_langfuse was actually called
        assert mock_langfuse_logger.log_event_on_langfuse.called, (
            "log_event_on_langfuse was not called"
        )

        # Verify original_response is NOT in the kwargs passed to Langfuse
        langfuse_kwargs = captured_kwargs.get("kwargs", {})
        assert "original_response" not in langfuse_kwargs, (
            "original_response should be excluded from kwargs passed to Langfuse"
        )

        # Verify session_id metadata is preserved in the kwargs
        langfuse_metadata = langfuse_kwargs.get("litellm_params", {}).get(
            "metadata", {}
        )
        assert langfuse_metadata.get("session_id") == "test-session-failure", (
            "session_id should be preserved in kwargs passed to Langfuse"
        )

        # Verify level is ERROR
        assert captured_kwargs.get("level") == "ERROR"
    finally:
        litellm.failure_callback = original_failure_callback


@pytest.mark.asyncio
async def test_async_log_failure_event_logs_to_langfuse():
    """
    Test that LangfusePromptManagement.async_log_failure_event() calls
    log_event_on_langfuse with level=ERROR even when standard_logging_object
    is present. This is the code path the proxy uses for failed LLM calls.
    """
    from litellm.integrations.langfuse.langfuse_prompt_management import (
        LangfusePromptManagement,
    )

    mock_langfuse_module = MagicMock()
    mock_langfuse_module.version.__version__ = "3.0.0"

    with patch.dict(
        "os.environ",
        {
            "LANGFUSE_SECRET_KEY": "test-secret",
            "LANGFUSE_PUBLIC_KEY": "test-public",
            "LANGFUSE_HOST": "https://test.langfuse.com",
        },
    ), patch.dict("sys.modules", {"langfuse": mock_langfuse_module}):
        prompt_mgmt = LangfusePromptManagement()

        # Mock the langfuse logger returned by get_langfuse_logger_for_request
        mock_logger = MagicMock()
        mock_logger.log_event_on_langfuse.return_value = {
            "trace_id": "mock-trace",
            "generation_id": "mock-gen",
        }

        with patch(
            "litellm.integrations.langfuse.langfuse_prompt_management.LangFuseHandler"
        ) as mock_handler:
            mock_handler.get_langfuse_logger_for_request.return_value = mock_logger

            kwargs = {
                "litellm_params": {
                    "metadata": {"session_id": "test-session-fail"},
                },
                "litellm_call_id": "call-fail-123",
                "user": "test-user",
                "exception": Exception("API error: model not found"),
                "standard_logging_object": {
                    "error_str": "API error: model not found",
                    "trace_id": "std-trace-fail",
                    "metadata": {},
                },
            }

            await prompt_mgmt.async_log_failure_event(
                kwargs=kwargs,
                response_obj=None,
                start_time=datetime.datetime.utcnow(),
                end_time=datetime.datetime.utcnow(),
            )

            # Verify log_event_on_langfuse was called
            assert mock_logger.log_event_on_langfuse.called, (
                "log_event_on_langfuse was not called for failure event"
            )
            call_kwargs = mock_logger.log_event_on_langfuse.call_args[1]
            assert call_kwargs["level"] == "ERROR"
            assert call_kwargs["status_message"] == "API error: model not found"
            assert call_kwargs["response_obj"] is None


@pytest.mark.asyncio
async def test_async_log_failure_event_works_without_standard_logging_object():
    """
    Test that async_log_failure_event() still logs to Langfuse even when
    standard_logging_object is None (e.g. when get_standard_logging_object_payload
    threw an exception). This is the critical fix — before, it silently returned.
    """
    from litellm.integrations.langfuse.langfuse_prompt_management import (
        LangfusePromptManagement,
    )

    mock_langfuse_module = MagicMock()
    mock_langfuse_module.version.__version__ = "3.0.0"

    with patch.dict(
        "os.environ",
        {
            "LANGFUSE_SECRET_KEY": "test-secret",
            "LANGFUSE_PUBLIC_KEY": "test-public",
            "LANGFUSE_HOST": "https://test.langfuse.com",
        },
    ), patch.dict("sys.modules", {"langfuse": mock_langfuse_module}):
        prompt_mgmt = LangfusePromptManagement()

        mock_logger = MagicMock()
        mock_logger.log_event_on_langfuse.return_value = {
            "trace_id": "mock-trace",
            "generation_id": "mock-gen",
        }

        with patch(
            "litellm.integrations.langfuse.langfuse_prompt_management.LangFuseHandler"
        ) as mock_handler:
            mock_handler.get_langfuse_logger_for_request.return_value = mock_logger

            kwargs = {
                "litellm_params": {
                    "metadata": {"session_id": "test-session-no-slo"},
                },
                "litellm_call_id": "call-no-slo-456",
                "user": "test-user",
                "exception": Exception("InternalServerError: something broke"),
                "standard_logging_object": None,  # This is the key — it's None
            }

            await prompt_mgmt.async_log_failure_event(
                kwargs=kwargs,
                response_obj=None,
                start_time=datetime.datetime.utcnow(),
                end_time=datetime.datetime.utcnow(),
            )

            # CRITICAL: log_event_on_langfuse MUST still be called
            assert mock_logger.log_event_on_langfuse.called, (
                "log_event_on_langfuse was NOT called when standard_logging_object "
                "is None — failure trace would be silently dropped"
            )
            call_kwargs = mock_logger.log_event_on_langfuse.call_args[1]
            assert call_kwargs["level"] == "ERROR"
            # Falls back to exception from kwargs
            assert "InternalServerError" in call_kwargs["status_message"]


def test_max_langfuse_clients_limit():
    """
    Test that the max langfuse clients limit is respected when initializing multiple clients
    """
    # Mock langfuse package to avoid triggering real import.
    # The real langfuse import fails on Python 3.14 due to pydantic v1 incompatibility,
    # and sys.modules["langfuse"] may be absent after other tests in the suite clean up.
    mock_langfuse = MagicMock()
    mock_langfuse.version.__version__ = "3.0.0"
    # Set max clients to 2 for testing
    original_initialized_langfuse_clients = litellm.initialized_langfuse_clients
    with patch.dict("sys.modules", {"langfuse": mock_langfuse}), patch.object(
        langfuse_module, "MAX_LANGFUSE_INITIALIZED_CLIENTS", 2
    ):
        # Reset the counter
        litellm.initialized_langfuse_clients = 0

        # First client should succeed
        logger1 = LangFuseLogger(
            langfuse_public_key="test_key_1",
            langfuse_secret="test_secret_1",
            langfuse_host="https://test1.langfuse.com",
        )
        assert litellm.initialized_langfuse_clients == 1

        # Second client should succeed
        logger2 = LangFuseLogger(
            langfuse_public_key="test_key_2",
            langfuse_secret="test_secret_2",
            langfuse_host="https://test2.langfuse.com",
        )
        assert litellm.initialized_langfuse_clients == 2

        # Third client should fail with exception
        with pytest.raises(Exception) as exc_info:
            logger3 = LangFuseLogger(
                langfuse_public_key="test_key_3",
                langfuse_secret="test_secret_3",
                langfuse_host="https://test3.langfuse.com",
            )

        # Verify the error message contains the expected text
        assert "Max langfuse clients reached" in str(exc_info.value)

        # Counter should still be 2 (third client failed to initialize)
        assert litellm.initialized_langfuse_clients == 2

    litellm.initialized_langfuse_clients = original_initialized_langfuse_clients