[internal copy of #27491] fix(realtime): Fix Realtime Audio Token Cost Tracking (#29722)

* Normalize Realtime usage dict keys before ResponseAPIUsage transform

* Test usage transform for Realtime versus tokens_details keys

* Avoid usage_input dict in-place

* Fix audio cost calculation

* fix(responses): forward output audio_tokens into completion usage details

Pass audio_tokens from output_tokens_details into CompletionTokensDetailsWrapper
so cost can use output_cost_per_audio_token. Support dict output details like
prompt path. Extend tests for Realtime and mixed completion audio.

Co-authored-by: Cursor <cursoragent@cursor.com>

* Fix audio token usage formatting

* style: Black-format Realtime usage and completion usage merge

Resolve combine_usage_objects and responses/utils wrapping for CI black --check.
Restore model_fields comments above completion_tokens_details merge loop.

Co-authored-by: Cursor <cursoragent@cursor.com>

* Add test to cover combined usage objects

* Fix merge conflict with test cases

Removed unnecessary import statement and cleaned up assertions in test.

* fix(cost_calculator): remove dead None guard in completion_tokens_details combiner

---------

Co-authored-by: Liam McDonald <lmcdonald@godaddy.com>
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Mateo Wang
2026-06-05 06:23:17 -07:00
committed by GitHub
parent 3f79222350
commit ffd0e9fa7f
4 changed files with 121 additions and 7 deletions
+4 -5
View File
@@ -2425,12 +2425,11 @@ class BaseTokenUsageProcessor:
if not attr.startswith("_") and not callable(
getattr(usage.completion_tokens_details, attr)
):
current_val = getattr(
combined.completion_tokens_details, attr, 0
current_val = (
getattr(combined.completion_tokens_details, attr, 0) or 0
)
new_val = getattr(usage.completion_tokens_details, attr, 0)
if new_val is not None and current_val is not None:
new_val = getattr(usage.completion_tokens_details, attr, 0) or 0
if isinstance(new_val, (int, float)):
setattr(
combined.completion_tokens_details,
attr,
+15
View File
@@ -1006,6 +1006,20 @@ class ResponseAPILoggingUtils:
)
response_api_usage: ResponseAPIUsage
if isinstance(usage_input, dict):
usage_input = dict(usage_input) # shallow copy; avoid mutating caller
# Realtime *_token_details → *_tokens_details when unset.
if (
usage_input.get("input_tokens_details") is None
and "input_token_details" in usage_input
):
usage_input["input_tokens_details"] = usage_input["input_token_details"]
if (
usage_input.get("output_tokens_details") is None
and "output_token_details" in usage_input
):
usage_input["output_tokens_details"] = usage_input[
"output_token_details"
]
total_tokens = usage_input.get("total_tokens")
if total_tokens is None:
input_tokens = usage_input.get("input_tokens")
@@ -1050,6 +1064,7 @@ class ResponseAPILoggingUtils:
),
image_tokens=getattr(output_tokens_details, "image_tokens", None),
text_tokens=getattr(output_tokens_details, "text_tokens", None),
audio_tokens=getattr(output_tokens_details, "audio_tokens", None),
)
chat_usage = Usage(
@@ -327,7 +327,8 @@ class TestResponseAPILoggingUtils:
"output_tokens_details": {
"reasoning_tokens": 30,
"image_tokens": 100,
"text_tokens": 70,
"text_tokens": 50,
"audio_tokens": 20,
},
}
@@ -346,7 +347,61 @@ class TestResponseAPILoggingUtils:
assert result.completion_tokens_details is not None
assert result.completion_tokens_details.reasoning_tokens == 30
assert result.completion_tokens_details.image_tokens == 100
assert result.completion_tokens_details.text_tokens == 70
assert result.completion_tokens_details.text_tokens == 50
assert result.completion_tokens_details.audio_tokens == 20
def test_transform_response_api_usage_with_realtime_keys(self):
"""Realtime input_token_details / output_token_details normalize for Usage."""
usage = {
"input_tokens": 10,
"output_tokens": 20,
"total_tokens": 30,
"input_token_details": {
"text_tokens": 8,
"audio_tokens": 2,
"cached_tokens": 0,
},
"output_token_details": {
"text_tokens": 12,
"audio_tokens": 8,
},
}
result = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
usage
)
assert result.prompt_tokens_details is not None
assert result.prompt_tokens_details.text_tokens == 8
assert result.prompt_tokens_details.audio_tokens == 2
assert result.completion_tokens_details is not None
assert result.completion_tokens_details.text_tokens == 12
assert result.completion_tokens_details.audio_tokens == 8
def test_transform_response_api_usage_tokens_details_keep_values(self):
"""Keeps input_tokens_details / output_tokens_details when singular keys are also present."""
usage = {
"input_tokens": 10,
"output_tokens": 20,
"total_tokens": 30,
"input_tokens_details": {"text_tokens": 10},
"output_tokens_details": {"text_tokens": 20},
"input_token_details": {"text_tokens": 1, "audio_tokens": 99},
"output_token_details": {"text_tokens": 2, "audio_tokens": 98},
}
result = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
usage
)
assert result.prompt_tokens_details is not None
assert result.prompt_tokens_details.text_tokens == 10
assert result.prompt_tokens_details.audio_tokens is None
assert result.completion_tokens_details is not None
assert result.completion_tokens_details.text_tokens == 20
assert result.completion_tokens_details.audio_tokens is None
class TestResponsesAPIProviderSpecificParams:
@@ -385,6 +385,51 @@ def test_handle_realtime_stream_cost_calculation():
)
assert cost == 0.0 # No usage, no cost
def test_realtime_stream_combines_text_and_audio_token_details():
"""Realtime response.done usage with input_token_details / output_token_details."""
from litellm.cost_calculator import RealtimeAPITokenUsageProcessor
results: OpenAIRealtimeStreamList = [
{"type": "session.created", "session": {"model": "gpt-4o-realtime-preview"}},
{
"type": "response.done",
"response": {
"usage": {
"input_tokens": 10,
"output_tokens": 20,
"total_tokens": 30,
"input_token_details": {"text_tokens": 8, "audio_tokens": 2},
"output_token_details": {"text_tokens": 12, "audio_tokens": 8},
}
},
},
{
"type": "response.done",
"response": {
"usage": {
"input_tokens": 5,
"output_tokens": 15,
"total_tokens": 20,
"input_token_details": {"text_tokens": 3, "audio_tokens": 2},
"output_token_details": {"text_tokens": 5, "audio_tokens": 10},
}
},
},
]
combined = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
results=results,
)
assert combined.prompt_tokens_details is not None
assert combined.prompt_tokens_details.text_tokens == 11
assert combined.prompt_tokens_details.audio_tokens == 4
assert combined.completion_tokens_details is not None
assert combined.completion_tokens_details.text_tokens == 17
assert combined.completion_tokens_details.audio_tokens == 18
def test_realtime_logging_object_allows_null_transcript_in_conversation_item_added():
results: OpenAIRealtimeStreamList = [