mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-18 00:48:01 +00:00
* Normalize Realtime usage dict keys before ResponseAPIUsage transform * Test usage transform for Realtime versus tokens_details keys * Avoid usage_input dict in-place * Fix audio cost calculation * fix(responses): forward output audio_tokens into completion usage details Pass audio_tokens from output_tokens_details into CompletionTokensDetailsWrapper so cost can use output_cost_per_audio_token. Support dict output details like prompt path. Extend tests for Realtime and mixed completion audio. Co-authored-by: Cursor <cursoragent@cursor.com> * Fix audio token usage formatting * style: Black-format Realtime usage and completion usage merge Resolve combine_usage_objects and responses/utils wrapping for CI black --check. Restore model_fields comments above completion_tokens_details merge loop. Co-authored-by: Cursor <cursoragent@cursor.com> * Add test to cover combined usage objects * Fix merge conflict with test cases Removed unnecessary import statement and cleaned up assertions in test. * fix(cost_calculator): remove dead None guard in completion_tokens_details combiner --------- Co-authored-by: Liam McDonald <lmcdonald@godaddy.com> Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -2425,12 +2425,11 @@ class BaseTokenUsageProcessor:
|
||||
if not attr.startswith("_") and not callable(
|
||||
getattr(usage.completion_tokens_details, attr)
|
||||
):
|
||||
current_val = getattr(
|
||||
combined.completion_tokens_details, attr, 0
|
||||
current_val = (
|
||||
getattr(combined.completion_tokens_details, attr, 0) or 0
|
||||
)
|
||||
new_val = getattr(usage.completion_tokens_details, attr, 0)
|
||||
|
||||
if new_val is not None and current_val is not None:
|
||||
new_val = getattr(usage.completion_tokens_details, attr, 0) or 0
|
||||
if isinstance(new_val, (int, float)):
|
||||
setattr(
|
||||
combined.completion_tokens_details,
|
||||
attr,
|
||||
|
||||
@@ -1006,6 +1006,20 @@ class ResponseAPILoggingUtils:
|
||||
)
|
||||
response_api_usage: ResponseAPIUsage
|
||||
if isinstance(usage_input, dict):
|
||||
usage_input = dict(usage_input) # shallow copy; avoid mutating caller
|
||||
# Realtime *_token_details → *_tokens_details when unset.
|
||||
if (
|
||||
usage_input.get("input_tokens_details") is None
|
||||
and "input_token_details" in usage_input
|
||||
):
|
||||
usage_input["input_tokens_details"] = usage_input["input_token_details"]
|
||||
if (
|
||||
usage_input.get("output_tokens_details") is None
|
||||
and "output_token_details" in usage_input
|
||||
):
|
||||
usage_input["output_tokens_details"] = usage_input[
|
||||
"output_token_details"
|
||||
]
|
||||
total_tokens = usage_input.get("total_tokens")
|
||||
if total_tokens is None:
|
||||
input_tokens = usage_input.get("input_tokens")
|
||||
@@ -1050,6 +1064,7 @@ class ResponseAPILoggingUtils:
|
||||
),
|
||||
image_tokens=getattr(output_tokens_details, "image_tokens", None),
|
||||
text_tokens=getattr(output_tokens_details, "text_tokens", None),
|
||||
audio_tokens=getattr(output_tokens_details, "audio_tokens", None),
|
||||
)
|
||||
|
||||
chat_usage = Usage(
|
||||
|
||||
@@ -327,7 +327,8 @@ class TestResponseAPILoggingUtils:
|
||||
"output_tokens_details": {
|
||||
"reasoning_tokens": 30,
|
||||
"image_tokens": 100,
|
||||
"text_tokens": 70,
|
||||
"text_tokens": 50,
|
||||
"audio_tokens": 20,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -346,7 +347,61 @@ class TestResponseAPILoggingUtils:
|
||||
assert result.completion_tokens_details is not None
|
||||
assert result.completion_tokens_details.reasoning_tokens == 30
|
||||
assert result.completion_tokens_details.image_tokens == 100
|
||||
assert result.completion_tokens_details.text_tokens == 70
|
||||
assert result.completion_tokens_details.text_tokens == 50
|
||||
assert result.completion_tokens_details.audio_tokens == 20
|
||||
|
||||
def test_transform_response_api_usage_with_realtime_keys(self):
|
||||
"""Realtime input_token_details / output_token_details normalize for Usage."""
|
||||
usage = {
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 20,
|
||||
"total_tokens": 30,
|
||||
"input_token_details": {
|
||||
"text_tokens": 8,
|
||||
"audio_tokens": 2,
|
||||
"cached_tokens": 0,
|
||||
},
|
||||
"output_token_details": {
|
||||
"text_tokens": 12,
|
||||
"audio_tokens": 8,
|
||||
},
|
||||
}
|
||||
|
||||
result = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
|
||||
usage
|
||||
)
|
||||
|
||||
assert result.prompt_tokens_details is not None
|
||||
assert result.prompt_tokens_details.text_tokens == 8
|
||||
assert result.prompt_tokens_details.audio_tokens == 2
|
||||
|
||||
assert result.completion_tokens_details is not None
|
||||
assert result.completion_tokens_details.text_tokens == 12
|
||||
assert result.completion_tokens_details.audio_tokens == 8
|
||||
|
||||
def test_transform_response_api_usage_tokens_details_keep_values(self):
|
||||
"""Keeps input_tokens_details / output_tokens_details when singular keys are also present."""
|
||||
usage = {
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 20,
|
||||
"total_tokens": 30,
|
||||
"input_tokens_details": {"text_tokens": 10},
|
||||
"output_tokens_details": {"text_tokens": 20},
|
||||
"input_token_details": {"text_tokens": 1, "audio_tokens": 99},
|
||||
"output_token_details": {"text_tokens": 2, "audio_tokens": 98},
|
||||
}
|
||||
|
||||
result = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
|
||||
usage
|
||||
)
|
||||
|
||||
assert result.prompt_tokens_details is not None
|
||||
assert result.prompt_tokens_details.text_tokens == 10
|
||||
assert result.prompt_tokens_details.audio_tokens is None
|
||||
|
||||
assert result.completion_tokens_details is not None
|
||||
assert result.completion_tokens_details.text_tokens == 20
|
||||
assert result.completion_tokens_details.audio_tokens is None
|
||||
|
||||
|
||||
class TestResponsesAPIProviderSpecificParams:
|
||||
|
||||
@@ -385,6 +385,51 @@ def test_handle_realtime_stream_cost_calculation():
|
||||
)
|
||||
assert cost == 0.0 # No usage, no cost
|
||||
|
||||
|
||||
def test_realtime_stream_combines_text_and_audio_token_details():
|
||||
"""Realtime response.done usage with input_token_details / output_token_details."""
|
||||
from litellm.cost_calculator import RealtimeAPITokenUsageProcessor
|
||||
|
||||
results: OpenAIRealtimeStreamList = [
|
||||
{"type": "session.created", "session": {"model": "gpt-4o-realtime-preview"}},
|
||||
{
|
||||
"type": "response.done",
|
||||
"response": {
|
||||
"usage": {
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 20,
|
||||
"total_tokens": 30,
|
||||
"input_token_details": {"text_tokens": 8, "audio_tokens": 2},
|
||||
"output_token_details": {"text_tokens": 12, "audio_tokens": 8},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "response.done",
|
||||
"response": {
|
||||
"usage": {
|
||||
"input_tokens": 5,
|
||||
"output_tokens": 15,
|
||||
"total_tokens": 20,
|
||||
"input_token_details": {"text_tokens": 3, "audio_tokens": 2},
|
||||
"output_token_details": {"text_tokens": 5, "audio_tokens": 10},
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
combined = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
|
||||
results=results,
|
||||
)
|
||||
|
||||
assert combined.prompt_tokens_details is not None
|
||||
assert combined.prompt_tokens_details.text_tokens == 11
|
||||
assert combined.prompt_tokens_details.audio_tokens == 4
|
||||
|
||||
assert combined.completion_tokens_details is not None
|
||||
assert combined.completion_tokens_details.text_tokens == 17
|
||||
assert combined.completion_tokens_details.audio_tokens == 18
|
||||
|
||||
|
||||
def test_realtime_logging_object_allows_null_transcript_in_conversation_item_added():
|
||||
results: OpenAIRealtimeStreamList = [
|
||||
|
||||
Reference in New Issue
Block a user