mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-18 03:31:23 +00:00
386f334fee
* refactor: new agentic loop event hook simplifies how to create logic for tool based multi llm calls * fix: compress - make it work on anthropic input as well * fix(compress.py): working prompt compression for claude code ensures claude code messages can run through proxy easily * docs: add agentic loop hook guide * docs: add agentic_loop_hook to sidebar * fix: fix multiple arguments error * fix: fix tool call loop for compression on streaming /v1/messages * fix: fix linting errors * fix: fix ci/cd errors * feat(litellm_pre_call_utils.py): use claude code session for litellm session id allows claude code logs to be stitched together, making it easy to know they were all part of the same conversation * fix: suppress incorrect mypy warning rE: module * revert: drop PR's changes to litellm/proxy/_experimental/out/ Restores the 34 HTML files under _experimental/out/ to their pre-PR paths (X/index.html -> X.html). All renames are R100 (content unchanged); no other files are touched. * fix: address greptile review comments on PR #25729 - Skip ``kwargs["tools"] = []`` injection when compression is a no-op — Anthropic Messages rejects empty tool arrays on requests that did not originally declare tools. - Move agentic-loop safety guards (fingerprint cycle / max depth) out of the per-callback try/except so they propagate instead of being swallowed by the generic exception handler. Extracted _check_agentic_loop_safety. - Gate generic ``x-<vendor>-session-id`` capture behind the LITELLM_CAPTURE_VENDOR_SESSION_HEADERS env var (off by default) to preserve backwards compatibility; explicit x-litellm-* headers are unaffected. - Fix monkeypatch target in pre-call-hook test to patch the actual module-level binding (litellm.integrations.compression_interception.handler.compress). - Add regression tests for empty-tools skip and opt-in session capture. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * revert: drop LITELLM_CAPTURE_VENDOR_SESSION_HEADERS flag Generic x-<vendor>-session-id header capture is a new feature and only runs *after* the explicit x-litellm-trace-id / x-litellm-session-id checks, so it does not change behavior for any existing caller that was already using the LiteLLM headers — no backwards-incompatibility to gate. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * refactor(compress): replace input_type with CallTypes call_type Drop the bespoke ``CompressionInputType`` literal and use the existing ``litellm.types.utils.CallTypes`` enum instead. ``litellm.compress()`` now takes ``call_type: Union[CallTypes, str]`` (default ``CallTypes.completion``) — no new concept to learn, and the enum is already the way the rest of the codebase talks about request shapes. Supported values: ``completion`` / ``acompletion`` (OpenAI chat-completions shape) and ``anthropic_messages`` (Anthropic structured content blocks). Updated: compress(), the compression_interception handler, tests, docs, and the two eval scripts. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
122 lines
3.6 KiB
Python
122 lines
3.6 KiB
Python
import sys
|
|
import os
|
|
from types import SimpleNamespace
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../../..")
|
|
) # Adds the parent directory to the system path
|
|
|
|
from litellm.proxy.common_utils.callback_utils import (
|
|
initialize_callbacks_on_proxy,
|
|
get_remaining_tokens_and_requests_from_request_data,
|
|
normalize_callback_names,
|
|
)
|
|
import litellm
|
|
|
|
from unittest.mock import patch
|
|
from litellm.proxy.common_utils.callback_utils import process_callback
|
|
|
|
|
|
def test_get_remaining_tokens_and_requests_from_request_data():
|
|
model_group = "openrouter/google/gemini-2.0-flash-001"
|
|
casedata = {
|
|
"metadata": {
|
|
"model_group": model_group,
|
|
f"litellm-key-remaining-requests-{model_group}": 100,
|
|
f"litellm-key-remaining-tokens-{model_group}": 200,
|
|
}
|
|
}
|
|
|
|
headers = get_remaining_tokens_and_requests_from_request_data(casedata)
|
|
|
|
expected_name = "openrouter-google-gemini-2.0-flash-001"
|
|
assert headers == {
|
|
f"x-litellm-key-remaining-requests-{expected_name}": 100,
|
|
f"x-litellm-key-remaining-tokens-{expected_name}": 200,
|
|
}
|
|
|
|
|
|
@patch(
|
|
"litellm.proxy.common_utils.callback_utils.CustomLogger.get_callback_env_vars",
|
|
return_value=["API_KEY", "MISSING_VAR"],
|
|
)
|
|
def test_process_callback_with_env_vars(mock_get_env_vars):
|
|
environment_variables = {
|
|
"API_KEY": "PLAIN_VALUE",
|
|
"UNUSED": "SHOULD_BE_IGNORED",
|
|
}
|
|
|
|
result = process_callback(
|
|
_callback="my_callback",
|
|
callback_type="input",
|
|
environment_variables=environment_variables,
|
|
)
|
|
|
|
assert result["name"] == "my_callback"
|
|
assert result["type"] == "input"
|
|
assert result["variables"] == {
|
|
"API_KEY": "PLAIN_VALUE",
|
|
"MISSING_VAR": None,
|
|
}
|
|
|
|
|
|
@patch(
|
|
"litellm.proxy.common_utils.callback_utils.CustomLogger.get_callback_env_vars",
|
|
return_value=[],
|
|
)
|
|
def test_process_callback_with_no_required_env_vars(mock_get_env_vars):
|
|
result = process_callback(
|
|
_callback="another_callback",
|
|
callback_type="output",
|
|
environment_variables={"SHOULD_NOT_BE_USED": "VALUE"},
|
|
)
|
|
|
|
assert result["name"] == "another_callback"
|
|
assert result["type"] == "output"
|
|
assert result["variables"] == {}
|
|
|
|
|
|
def test_normalize_callback_names_none_returns_empty_list():
|
|
assert normalize_callback_names(None) == []
|
|
assert normalize_callback_names([]) == []
|
|
|
|
|
|
def test_normalize_callback_names_lowercases_strings():
|
|
assert normalize_callback_names(["SQS", "S3", "CUSTOM_CALLBACK"]) == [
|
|
"sqs",
|
|
"s3",
|
|
"custom_callback",
|
|
]
|
|
|
|
|
|
def test_initialize_callbacks_on_proxy_instantiates_compression_interception(
|
|
monkeypatch,
|
|
):
|
|
dummy_callback = object()
|
|
monkeypatch.setitem(
|
|
sys.modules,
|
|
"litellm.proxy.proxy_server",
|
|
SimpleNamespace(prisma_client=None),
|
|
)
|
|
monkeypatch.setattr(
|
|
"litellm.integrations.compression_interception.handler.CompressionInterceptionLogger.initialize_from_proxy_config",
|
|
lambda litellm_settings, callback_specific_params: dummy_callback,
|
|
)
|
|
|
|
original_callbacks = (
|
|
list(litellm.callbacks) if isinstance(litellm.callbacks, list) else []
|
|
)
|
|
litellm.callbacks = []
|
|
try:
|
|
initialize_callbacks_on_proxy(
|
|
value=["compression_interception"],
|
|
premium_user=False,
|
|
config_file_path=".",
|
|
litellm_settings={"compression_interception_params": {"enabled": True}},
|
|
callback_specific_params={},
|
|
)
|
|
assert dummy_callback in litellm.callbacks
|
|
assert "compression_interception" not in litellm.callbacks
|
|
finally:
|
|
litellm.callbacks = original_callbacks
|