Files
litellm/tests/test_litellm/integrations/test_deepeval.py
T
Krish Dholakia ef42461c1e Litellm fix GitHub action testing (#11163)
* test: add __init__.py files

* refactor: rename test folder to avoid naming conflict

* test: update workflows

* test: update tests

* test: update imports

* test: update tests

* test: remove unused import

* ci(test-litellm.yml): add pytest retry to github workflow

* test: fix test
2025-05-26 14:41:42 -07:00

121 lines
4.3 KiB
Python

import os
import unittest
import uuid
from datetime import datetime, timezone
from unittest.mock import MagicMock, patch
from litellm.integrations.deepeval.api import Endpoints, HttpMethods
from litellm.integrations.deepeval.deepeval import DeepEvalLogger
from litellm.integrations.deepeval.types import SpanApiType, TraceSpanApiStatus
class TestDeepEvalLogger(unittest.TestCase):
@patch.dict(os.environ, {"CONFIDENT_API_KEY": "test-api-key"})
def setUp(self):
# Mock the Api class before initializing DeepEvalLogger
self.api_patcher = patch("litellm.integrations.deepeval.deepeval.Api")
self.mock_api_class = self.api_patcher.start()
self.mock_api_instance = MagicMock()
self.mock_api_class.return_value = self.mock_api_instance
self.logger = DeepEvalLogger()
self.start_time = datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
self.end_time = datetime(2023, 1, 1, 12, 0, 1, tzinfo=timezone.utc)
self.mock_response_obj = {"id": "resp_123"}
self.trace_id = str(uuid.uuid4())
self.span_id = str(uuid.uuid4())
self.model = "gpt-3.5-turbo"
self.input_str = "Hello, world!"
def tearDown(self):
self.api_patcher.stop()
def _common_assertions(
self, expected_status: TraceSpanApiStatus, expected_output: str
):
self.mock_api_instance.send_request.assert_called_once()
call_args = self.mock_api_instance.send_request.call_args
self.assertEqual(call_args.kwargs["method"], HttpMethods.POST)
self.assertEqual(call_args.kwargs["endpoint"], Endpoints.TRACING_ENDPOINT)
body = call_args.kwargs["body"]
self.assertIsInstance(body, dict)
self.assertEqual(body["uuid"], self.trace_id)
self.assertIn("startTime", body)
self.assertIn("endTime", body)
self.assertIsInstance(body["llmSpans"], list)
self.assertEqual(len(body["llmSpans"]), 1)
llm_span = body["llmSpans"][0]
self.assertEqual(llm_span["uuid"], self.span_id)
expected_name = (
"litellm_success_callback"
if expected_status == TraceSpanApiStatus.SUCCESS
else "litellm_failure_callback"
)
self.assertEqual(llm_span["name"], expected_name)
self.assertEqual(llm_span["status"], expected_status.value)
self.assertEqual(llm_span["type"], SpanApiType.LLM.value)
self.assertEqual(llm_span["traceUuid"], self.trace_id)
self.assertIn("startTime", llm_span)
self.assertIn("endTime", llm_span)
self.assertEqual(llm_span["input"], self.input_str)
self.assertEqual(llm_span["output"], expected_output)
self.assertEqual(llm_span["model"], self.model)
return llm_span
def test_log_success_event(self):
kwargs = {
"input": self.input_str,
"standard_logging_object": {
"id": self.span_id,
"trace_id": self.trace_id,
"model": self.model,
"response": {
"usage": {"prompt_tokens": 10, "completion_tokens": 20},
"choices": [{"message": {"content": "This is a success."}}],
},
},
}
self.logger.log_success_event(
kwargs, self.mock_response_obj, self.start_time, self.end_time
)
llm_span = self._common_assertions(
TraceSpanApiStatus.SUCCESS, "This is a success."
)
self.assertEqual(llm_span["inputTokenCount"], 10)
self.assertEqual(llm_span["outputTokenCount"], 20)
def test_log_failure_event(self):
error_message = "This is an error."
kwargs = {
"input": self.input_str,
"standard_logging_object": {
"id": self.span_id,
"trace_id": self.trace_id,
"model": self.model,
"error_string": error_message,
"response": {},
},
}
self.logger.log_failure_event(
kwargs, self.mock_response_obj, self.start_time, self.end_time
)
llm_span = self._common_assertions(TraceSpanApiStatus.ERRORED, error_message)
self.assertIsNone(llm_span.get("inputTokenCount"))
self.assertIsNone(llm_span.get("outputTokenCount"))
if __name__ == "__main__":
unittest.main()