mirror of
https://github.com/tiennm99/litellm.git
synced 2026-06-17 22:48:35 +00:00
7ed9be55b1
Previously, stream_chunk_builder only took annotations from the first chunk that contained them, losing any annotations from later chunks. This is a problem because providers like Gemini/Vertex AI send grounding metadata (converted to annotations) in the final streaming chunk, while other providers may spread annotations across multiple chunks. Changes: - Collect and merge annotations from ALL annotation-bearing chunks instead of only using the first one
192 lines
5.6 KiB
Python
192 lines
5.6 KiB
Python
"""
|
|
Tests for stream_chunk_builder annotation merging.
|
|
|
|
Previously, stream_chunk_builder only took annotations from the FIRST
|
|
annotation chunk, losing any annotations that arrived in later chunks.
|
|
This fix merges annotations from ALL chunks.
|
|
"""
|
|
|
|
from litellm import stream_chunk_builder
|
|
from litellm.types.utils import Delta, ModelResponseStream, StreamingChoices
|
|
|
|
|
|
def test_stream_chunk_builder_merges_annotations_from_multiple_chunks():
|
|
"""
|
|
stream_chunk_builder must merge annotations from ALL streaming chunks,
|
|
not just take them from the first annotation chunk.
|
|
|
|
Providers may spread annotations across multiple chunks (e.g. Gemini
|
|
sends grounding metadata in the final chunk, while intermediate chunks
|
|
may carry different annotations).
|
|
"""
|
|
annotation_a = {
|
|
"type": "url_citation",
|
|
"url_citation": {
|
|
"url": "https://example.com/a",
|
|
"title": "Source A",
|
|
"start_index": 0,
|
|
"end_index": 10,
|
|
},
|
|
}
|
|
annotation_b = {
|
|
"type": "url_citation",
|
|
"url_citation": {
|
|
"url": "https://example.com/b",
|
|
"title": "Source B",
|
|
"start_index": 20,
|
|
"end_index": 30,
|
|
},
|
|
}
|
|
|
|
chunks = [
|
|
ModelResponseStream(
|
|
id="chatcmpl-test",
|
|
created=1700000000,
|
|
model="test-model",
|
|
object="chat.completion.chunk",
|
|
choices=[
|
|
StreamingChoices(
|
|
finish_reason=None,
|
|
index=0,
|
|
delta=Delta(
|
|
content="Part one. ",
|
|
role="assistant",
|
|
annotations=[annotation_a],
|
|
),
|
|
)
|
|
],
|
|
),
|
|
ModelResponseStream(
|
|
id="chatcmpl-test",
|
|
created=1700000000,
|
|
model="test-model",
|
|
object="chat.completion.chunk",
|
|
choices=[
|
|
StreamingChoices(
|
|
finish_reason=None,
|
|
index=0,
|
|
delta=Delta(content="Part two."),
|
|
)
|
|
],
|
|
),
|
|
ModelResponseStream(
|
|
id="chatcmpl-test",
|
|
created=1700000000,
|
|
model="test-model",
|
|
object="chat.completion.chunk",
|
|
choices=[
|
|
StreamingChoices(
|
|
finish_reason="stop",
|
|
index=0,
|
|
delta=Delta(
|
|
content=None,
|
|
annotations=[annotation_b],
|
|
),
|
|
)
|
|
],
|
|
),
|
|
]
|
|
|
|
response = stream_chunk_builder(chunks=chunks)
|
|
assert response is not None
|
|
|
|
message = response["choices"][0]["message"]
|
|
assert message.annotations is not None
|
|
assert len(message.annotations) == 2
|
|
assert message.annotations[0] == annotation_a
|
|
assert message.annotations[1] == annotation_b
|
|
|
|
|
|
def test_stream_chunk_builder_single_annotation_chunk_still_works():
|
|
"""
|
|
When annotations come from a single chunk (most common case),
|
|
stream_chunk_builder must still work correctly (no regression).
|
|
"""
|
|
annotation = {
|
|
"type": "url_citation",
|
|
"url_citation": {
|
|
"url": "https://example.com/only",
|
|
"title": "Only Source",
|
|
"start_index": 0,
|
|
"end_index": 5,
|
|
},
|
|
}
|
|
|
|
chunks = [
|
|
ModelResponseStream(
|
|
id="chatcmpl-test",
|
|
created=1700000000,
|
|
model="test-model",
|
|
object="chat.completion.chunk",
|
|
choices=[
|
|
StreamingChoices(
|
|
finish_reason=None,
|
|
index=0,
|
|
delta=Delta(content="Hello", role="assistant"),
|
|
)
|
|
],
|
|
),
|
|
ModelResponseStream(
|
|
id="chatcmpl-test",
|
|
created=1700000000,
|
|
model="test-model",
|
|
object="chat.completion.chunk",
|
|
choices=[
|
|
StreamingChoices(
|
|
finish_reason="stop",
|
|
index=0,
|
|
delta=Delta(content=None, annotations=[annotation]),
|
|
)
|
|
],
|
|
),
|
|
]
|
|
|
|
response = stream_chunk_builder(chunks=chunks)
|
|
assert response is not None
|
|
|
|
message = response["choices"][0]["message"]
|
|
assert message.annotations is not None
|
|
assert len(message.annotations) == 1
|
|
assert message.annotations[0] == annotation
|
|
|
|
|
|
def test_stream_chunk_builder_no_annotations():
|
|
"""
|
|
When no chunks contain annotations, the message should not have
|
|
an annotations key (no regression).
|
|
"""
|
|
chunks = [
|
|
ModelResponseStream(
|
|
id="chatcmpl-test",
|
|
created=1700000000,
|
|
model="test-model",
|
|
object="chat.completion.chunk",
|
|
choices=[
|
|
StreamingChoices(
|
|
finish_reason=None,
|
|
index=0,
|
|
delta=Delta(content="Hello", role="assistant"),
|
|
)
|
|
],
|
|
),
|
|
ModelResponseStream(
|
|
id="chatcmpl-test",
|
|
created=1700000000,
|
|
model="test-model",
|
|
object="chat.completion.chunk",
|
|
choices=[
|
|
StreamingChoices(
|
|
finish_reason="stop",
|
|
index=0,
|
|
delta=Delta(content=None),
|
|
)
|
|
],
|
|
),
|
|
]
|
|
|
|
response = stream_chunk_builder(chunks=chunks)
|
|
assert response is not None
|
|
|
|
message = response["choices"][0]["message"]
|
|
assert not hasattr(message, "annotations") or message.annotations is None
|