Files
litellm/tests/test_litellm/test_stream_chunk_builder_annotations.py
T
bbarwik 7ed9be55b1 fix: merge annotations from all streaming chunks in stream_chunk_builder
Previously, stream_chunk_builder only took annotations from the first
chunk that contained them, losing any annotations from later chunks.

This is a problem because providers like Gemini/Vertex AI send grounding
metadata (converted to annotations) in the final streaming chunk, while
other providers may spread annotations across multiple chunks.

Changes:
- Collect and merge annotations from ALL annotation-bearing chunks
  instead of only using the first one
2026-03-15 14:20:45 +05:30

192 lines
5.6 KiB
Python

"""
Tests for stream_chunk_builder annotation merging.
Previously, stream_chunk_builder only took annotations from the FIRST
annotation chunk, losing any annotations that arrived in later chunks.
This fix merges annotations from ALL chunks.
"""
from litellm import stream_chunk_builder
from litellm.types.utils import Delta, ModelResponseStream, StreamingChoices
def test_stream_chunk_builder_merges_annotations_from_multiple_chunks():
"""
stream_chunk_builder must merge annotations from ALL streaming chunks,
not just take them from the first annotation chunk.
Providers may spread annotations across multiple chunks (e.g. Gemini
sends grounding metadata in the final chunk, while intermediate chunks
may carry different annotations).
"""
annotation_a = {
"type": "url_citation",
"url_citation": {
"url": "https://example.com/a",
"title": "Source A",
"start_index": 0,
"end_index": 10,
},
}
annotation_b = {
"type": "url_citation",
"url_citation": {
"url": "https://example.com/b",
"title": "Source B",
"start_index": 20,
"end_index": 30,
},
}
chunks = [
ModelResponseStream(
id="chatcmpl-test",
created=1700000000,
model="test-model",
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason=None,
index=0,
delta=Delta(
content="Part one. ",
role="assistant",
annotations=[annotation_a],
),
)
],
),
ModelResponseStream(
id="chatcmpl-test",
created=1700000000,
model="test-model",
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason=None,
index=0,
delta=Delta(content="Part two."),
)
],
),
ModelResponseStream(
id="chatcmpl-test",
created=1700000000,
model="test-model",
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason="stop",
index=0,
delta=Delta(
content=None,
annotations=[annotation_b],
),
)
],
),
]
response = stream_chunk_builder(chunks=chunks)
assert response is not None
message = response["choices"][0]["message"]
assert message.annotations is not None
assert len(message.annotations) == 2
assert message.annotations[0] == annotation_a
assert message.annotations[1] == annotation_b
def test_stream_chunk_builder_single_annotation_chunk_still_works():
"""
When annotations come from a single chunk (most common case),
stream_chunk_builder must still work correctly (no regression).
"""
annotation = {
"type": "url_citation",
"url_citation": {
"url": "https://example.com/only",
"title": "Only Source",
"start_index": 0,
"end_index": 5,
},
}
chunks = [
ModelResponseStream(
id="chatcmpl-test",
created=1700000000,
model="test-model",
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason=None,
index=0,
delta=Delta(content="Hello", role="assistant"),
)
],
),
ModelResponseStream(
id="chatcmpl-test",
created=1700000000,
model="test-model",
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason="stop",
index=0,
delta=Delta(content=None, annotations=[annotation]),
)
],
),
]
response = stream_chunk_builder(chunks=chunks)
assert response is not None
message = response["choices"][0]["message"]
assert message.annotations is not None
assert len(message.annotations) == 1
assert message.annotations[0] == annotation
def test_stream_chunk_builder_no_annotations():
"""
When no chunks contain annotations, the message should not have
an annotations key (no regression).
"""
chunks = [
ModelResponseStream(
id="chatcmpl-test",
created=1700000000,
model="test-model",
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason=None,
index=0,
delta=Delta(content="Hello", role="assistant"),
)
],
),
ModelResponseStream(
id="chatcmpl-test",
created=1700000000,
model="test-model",
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason="stop",
index=0,
delta=Delta(content=None),
)
],
),
]
response = stream_chunk_builder(chunks=chunks)
assert response is not None
message = response["choices"][0]["message"]
assert not hasattr(message, "annotations") or message.annotations is None