From 7333060fb0469f12f8d51000f88232675c110b52 Mon Sep 17 00:00:00 2001 From: TomuHirata Date: Thu, 28 Aug 2025 18:43:43 +0900 Subject: [PATCH 1/4] feat(databricks): add anthropic citation support --- docs/my-website/docs/providers/databricks.md | 5 ++ .../llms/databricks/chat/transformation.py | 28 ++++++++++ litellm/types/llms/databricks.py | 5 +- .../test_databricks_chat_transformation.py | 53 ++++++++++++++++++- 4 files changed, 88 insertions(+), 3 deletions(-) diff --git a/docs/my-website/docs/providers/databricks.md b/docs/my-website/docs/providers/databricks.md index 8631cbfdad..921b06a17b 100644 --- a/docs/my-website/docs/providers/databricks.md +++ b/docs/my-website/docs/providers/databricks.md @@ -282,6 +282,11 @@ ModelResponse( ) ``` +### Citations + +Anthropic models served through Databricks can return citation metadata. LiteLLM +exposes these via `response.choices[0].message.provider_specific_fields["citations"]`. + ### Pass `thinking` to Anthropic models You can also pass the `thinking` parameter to Anthropic models. diff --git a/litellm/llms/databricks/chat/transformation.py b/litellm/llms/databricks/chat/transformation.py index 908419f719..5600d5c642 100644 --- a/litellm/llms/databricks/chat/transformation.py +++ b/litellm/llms/databricks/chat/transformation.py @@ -379,6 +379,21 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig): thinking_blocks.append(thinking_block) return reasoning_content, thinking_blocks + @staticmethod + def extract_citations( + content: Optional[AllDatabricksContentValues], + ) -> Optional[List[Any]]: + if content is None: + return None + citations: Optional[List[Any]] = None + if isinstance(content, list): + for item in content: + if item.get("citations") is not None: + if citations is None: + citations = [] + citations.append(item["citations"]) + return citations + def _transform_dbrx_choices( self, choices: List[DatabricksChoice], json_mode: Optional[bool] = None ) -> List[Choices]: @@ -427,12 +442,19 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig): choice["message"].get("content") ) + citations = DatabricksConfig.extract_citations( + choice["message"].get("content") + ) + translated_message = Message( role="assistant", content=content_str, reasoning_content=reasoning_content, thinking_blocks=thinking_blocks, tool_calls=choice["message"].get("tool_calls"), + provider_specific_fields={"citations": citations} + if citations is not None + else None, ) if finish_reason is None: @@ -561,6 +583,12 @@ class DatabricksChatResponseIterator(BaseModelResponseIterator): for _tc in tool_calls: if _tc.get("function", {}).get("arguments") == "{}": _tc["function"]["arguments"] = "" # avoid invalid json + citation = choice["delta"].get("citation") + if citation is not None: + choice["delta"].setdefault("provider_specific_fields", {})[ + "citation" + ] = citation + choice["delta"].pop("citation", None) # extract the content str content_str = DatabricksConfig.extract_content_str( choice["delta"].get("content") diff --git a/litellm/types/llms/databricks.py b/litellm/types/llms/databricks.py index bb59b692ef..3715140816 100644 --- a/litellm/types/llms/databricks.py +++ b/litellm/types/llms/databricks.py @@ -1,5 +1,5 @@ import json -from typing import Any, List, Literal, Optional, TypedDict, Union +from typing import Any, Dict, List, Literal, Optional, TypedDict, Union from pydantic import BaseModel from typing_extensions import ( @@ -24,9 +24,10 @@ class GenericStreamingChunk(TypedDict, total=False): usage: Optional[BaseModel] -class DatabricksTextContent(TypedDict): +class DatabricksTextContent(TypedDict, total=False): type: Literal["text"] text: Required[str] + citations: Optional[List[Dict[str, Any]]] class DatabricksReasoningSummary(TypedDict): diff --git a/tests/test_litellm/llms/databricks/chat/test_databricks_chat_transformation.py b/tests/test_litellm/llms/databricks/chat/test_databricks_chat_transformation.py index fc44d44aba..d61f826e89 100644 --- a/tests/test_litellm/llms/databricks/chat/test_databricks_chat_transformation.py +++ b/tests/test_litellm/llms/databricks/chat/test_databricks_chat_transformation.py @@ -10,7 +10,10 @@ sys.path.insert( ) # Adds the parent directory to the system path from unittest.mock import MagicMock, patch -from litellm.llms.databricks.chat.transformation import DatabricksConfig +from litellm.llms.databricks.chat.transformation import ( + DatabricksChatResponseIterator, + DatabricksConfig, +) def test_transform_choices(): @@ -90,3 +93,51 @@ def test_transform_choices_without_signature(): thinking_block = choices[0].message.thinking_blocks[0] assert thinking_block["type"] == "thinking" assert thinking_block["thinking"] == "i'm thinking without signature." + + +def test_transform_choices_with_citations(): + config = DatabricksConfig() + databricks_choices = [ + { + "message": { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Paris", + "citations": [{"source": "wiki"}], + } + ], + }, + "index": 0, + "finish_reason": "stop", + } + ] + + choices = config._transform_dbrx_choices(choices=databricks_choices) + + assert choices[0].message.provider_specific_fields == { + "citations": [[{"source": "wiki"}]] + } + + +def test_chunk_parser_with_citation(): + iterator = DatabricksChatResponseIterator(None, sync_stream=True) + chunk = { + "id": "1", + "object": "chat.completion.chunk", + "created": 0, + "model": "test", + "choices": [ + { + "delta": {"citation": {"source": "wiki"}}, + "index": 0, + "finish_reason": None, + } + ], + } + + parsed = iterator.chunk_parser(chunk) + assert parsed.choices[0].delta.provider_specific_fields == { + "citation": {"source": "wiki"} + } From 38a1dbd13a549967f4fe4f8895934810b2a4ebab Mon Sep 17 00:00:00 2001 From: TomuHirata Date: Thu, 28 Aug 2025 22:30:27 +0900 Subject: [PATCH 2/4] fix(databricks): include citations in reasoning content type --- litellm/types/llms/databricks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/litellm/types/llms/databricks.py b/litellm/types/llms/databricks.py index 3715140816..112427c6b5 100644 --- a/litellm/types/llms/databricks.py +++ b/litellm/types/llms/databricks.py @@ -36,9 +36,10 @@ class DatabricksReasoningSummary(TypedDict): signature: str -class DatabricksReasoningContent(TypedDict): +class DatabricksReasoningContent(TypedDict, total=False): type: Literal["reasoning"] - summary: List[DatabricksReasoningSummary] + summary: Required[List[DatabricksReasoningSummary]] + citations: Optional[List[Dict[str, Any]]] AllDatabricksContentListValues = Union[ From ca6d77b479b771b53ea0cf0107719db06b06165c Mon Sep 17 00:00:00 2001 From: TomeHirata Date: Mon, 1 Sep 2025 16:41:35 +0900 Subject: [PATCH 3/4] fix citation field name --- .../llms/databricks/chat/transformation.py | 28 +++++---- .../test_databricks_chat_transformation.py | 57 +++++++++++++++++-- 2 files changed, 69 insertions(+), 16 deletions(-) diff --git a/litellm/llms/databricks/chat/transformation.py b/litellm/llms/databricks/chat/transformation.py index 5600d5c642..9330b01923 100644 --- a/litellm/llms/databricks/chat/transformation.py +++ b/litellm/llms/databricks/chat/transformation.py @@ -26,7 +26,6 @@ from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response impo _should_convert_tool_call_to_json_mode, ) from litellm.litellm_core_utils.prompt_templates.common_utils import ( - handle_messages_with_content_list_to_str_conversion, strip_name_from_messages, ) from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator @@ -301,7 +300,6 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig): ) -> Union[List[AllMessageValues], Coroutine[Any, Any, List[AllMessageValues]]]: """ Databricks does not support: - - content in list format. - 'name' in user message. """ new_messages = [] @@ -311,7 +309,6 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig): else: _message = message new_messages.append(_message) - new_messages = handle_messages_with_content_list_to_str_conversion(new_messages) new_messages = strip_name_from_messages(new_messages) if is_async: @@ -388,10 +385,16 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig): citations: Optional[List[Any]] = None if isinstance(content, list): for item in content: + text = item.get("text", None) if item.get("citations") is not None: if citations is None: citations = [] - citations.append(item["citations"]) + citations.append( + [ + {**citation, "supported_text": text} + for citation in item["citations"] + ] + ) return citations def _transform_dbrx_choices( @@ -583,12 +586,17 @@ class DatabricksChatResponseIterator(BaseModelResponseIterator): for _tc in tool_calls: if _tc.get("function", {}).get("arguments") == "{}": _tc["function"]["arguments"] = "" # avoid invalid json - citation = choice["delta"].get("citation") - if citation is not None: - choice["delta"].setdefault("provider_specific_fields", {})[ - "citation" - ] = citation - choice["delta"].pop("citation", None) + if isinstance(choice["delta"]["content"], list) and ( + content := choice["delta"]["content"] + ): + if citations := content[0].get("citations"): + # TODO: Databricks delta does not include supported text or chunk type. + # Add either here once Databricks supports it to enable citation linkage. + choice["delta"].setdefault("provider_specific_fields", {})[ + "citation" + ] = citations[ + 0 + ] # Databricks Content item always has citation as a list of list # extract the content str content_str = DatabricksConfig.extract_content_str( choice["delta"].get("content") diff --git a/tests/test_litellm/llms/databricks/chat/test_databricks_chat_transformation.py b/tests/test_litellm/llms/databricks/chat/test_databricks_chat_transformation.py index d61f826e89..51a2e971c0 100644 --- a/tests/test_litellm/llms/databricks/chat/test_databricks_chat_transformation.py +++ b/tests/test_litellm/llms/databricks/chat/test_databricks_chat_transformation.py @@ -88,7 +88,7 @@ def test_transform_choices_without_signature(): assert choices[0].message.reasoning_content == "i'm thinking without signature." assert choices[0].message.thinking_blocks is not None assert len(choices[0].message.thinking_blocks) == 1 - + # Verify the thinking block was created successfully without signature thinking_block = choices[0].message.thinking_blocks[0] assert thinking_block["type"] == "thinking" @@ -104,8 +104,17 @@ def test_transform_choices_with_citations(): "content": [ { "type": "text", - "text": "Paris", - "citations": [{"source": "wiki"}], + "text": "Blue", + "citations": [ + { + "type": "char_location", + "cited_text": "The sky is blue.", + "document_index": 0, + "document_title": "My Document", + "start_char_index": 0, + "end_char_index": 50, + } + ], } ], }, @@ -117,7 +126,19 @@ def test_transform_choices_with_citations(): choices = config._transform_dbrx_choices(choices=databricks_choices) assert choices[0].message.provider_specific_fields == { - "citations": [[{"source": "wiki"}]] + "citations": [ + [ + { + "type": "char_location", + "cited_text": "The sky is blue.", + "document_index": 0, + "document_title": "My Document", + "start_char_index": 0, + "end_char_index": 50, + "supported_text": "Blue", + } + ] + ] } @@ -130,7 +151,24 @@ def test_chunk_parser_with_citation(): "model": "test", "choices": [ { - "delta": {"citation": {"source": "wiki"}}, + "delta": { + "content": [ + { + "type": "text", + "text": "", + "citations": [ + { + "type": "char_location", + "cited_text": "The sky is blue.", + "document_index": 0, + "document_title": "My Document", + "start_char_index": 0, + "end_char_index": 50, + } + ], + } + ], + }, "index": 0, "finish_reason": None, } @@ -139,5 +177,12 @@ def test_chunk_parser_with_citation(): parsed = iterator.chunk_parser(chunk) assert parsed.choices[0].delta.provider_specific_fields == { - "citation": {"source": "wiki"} + "citation": { + "type": "char_location", + "cited_text": "The sky is blue.", + "document_index": 0, + "document_title": "My Document", + "start_char_index": 0, + "end_char_index": 50, + } } From cf676e7aeff459330c5df604a636955d494aff45 Mon Sep 17 00:00:00 2001 From: TomeHirata Date: Mon, 1 Sep 2025 17:51:24 +0900 Subject: [PATCH 4/4] fix mypy --- litellm/llms/databricks/chat/transformation.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/litellm/llms/databricks/chat/transformation.py b/litellm/llms/databricks/chat/transformation.py index 9330b01923..d3df5bbf36 100644 --- a/litellm/llms/databricks/chat/transformation.py +++ b/litellm/llms/databricks/chat/transformation.py @@ -382,20 +382,18 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig): ) -> Optional[List[Any]]: if content is None: return None - citations: Optional[List[Any]] = None + citations = [] if isinstance(content, list): for item in content: text = item.get("text", None) - if item.get("citations") is not None: - if citations is None: - citations = [] + if citations_item := item.get("citations"): citations.append( [ {**citation, "supported_text": text} - for citation in item["citations"] + for citation in citations_item ] ) - return citations + return citations or None def _transform_dbrx_choices( self, choices: List[DatabricksChoice], json_mode: Optional[bool] = None