diff --git a/.circleci/config.yml b/.circleci/config.yml index 04f2a30c51..ecae22f872 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1935,12 +1935,12 @@ jobs: pip install prisma pip install fastapi pip install jsonschema - pip install "httpx==0.24.1" + pip install "httpx==0.27.0" pip install "anyio==3.7.1" pip install "asyncio==3.4.3" pip install "PyGithub==1.59.1" pip install "google-cloud-aiplatform==1.59.0" - pip install "anthropic==0.21.3" + pip install "anthropic==0.49.0" # Run pytest and generate JUnit XML report - run: name: Build Docker image @@ -1982,11 +1982,44 @@ jobs: - run: name: Wait for app to be ready command: dockerize -wait http://localhost:4000 -timeout 5m + # Add Ruby installation and testing before the existing Node.js and Python tests + - run: + name: Install Ruby and Bundler + command: | + # Import GPG keys first + gpg --keyserver hkp://keyserver.ubuntu.com --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB || { + curl -sSL https://rvm.io/mpapis.asc | gpg --import - + curl -sSL https://rvm.io/pkuczynski.asc | gpg --import - + } + + # Install Ruby version manager (RVM) + curl -sSL https://get.rvm.io | bash -s stable + + # Source RVM from the correct location + source $HOME/.rvm/scripts/rvm + + # Install Ruby 3.2.2 + rvm install 3.2.2 + rvm use 3.2.2 --default + + # Install latest Bundler + gem install bundler + + - run: + name: Run Ruby tests + command: | + source $HOME/.rvm/scripts/rvm + cd tests/pass_through_tests/ruby_passthrough_tests + bundle install + bundle exec rspec + no_output_timeout: 30m # New steps to run Node.js test - run: name: Install Node.js command: | + export DEBIAN_FRONTEND=noninteractive curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash - + sudo apt-get update sudo apt-get install -y nodejs node --version npm --version diff --git a/.github/workflows/locustfile.py b/.github/workflows/locustfile.py index 96dd8e1990..36dbeee9c4 100644 --- a/.github/workflows/locustfile.py +++ b/.github/workflows/locustfile.py @@ -8,7 +8,7 @@ class MyUser(HttpUser): def chat_completion(self): headers = { "Content-Type": "application/json", - "Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA", + "Authorization": "Bearer sk-8N1tLOOyH8TIxwOLahhIVg", # Include any additional headers you may need for authentication, etc. } diff --git a/.gitignore b/.gitignore index d760ba17f4..d35923f7c3 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,5 @@ litellm/proxy/_experimental/out/404.html litellm/proxy/_experimental/out/model_hub.html .mypy_cache/* litellm/proxy/application.log +tests/llm_translation/vertex_test_account.json +tests/llm_translation/test_vertex_key.json diff --git a/README.md b/README.md index c52b12b66a..014df0ccdd 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ LiteLLM manages: [**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs)
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs) -🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. +🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. [More information about the release cycle here](https://docs.litellm.ai/docs/proxy/release_cycle) Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+). diff --git a/deploy/charts/litellm-helm/Chart.yaml b/deploy/charts/litellm-helm/Chart.yaml index 6232a2320d..f1f2fd8d64 100644 --- a/deploy/charts/litellm-helm/Chart.yaml +++ b/deploy/charts/litellm-helm/Chart.yaml @@ -18,7 +18,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.3.0 +version: 0.4.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/deploy/charts/litellm-helm/templates/migrations-job.yaml b/deploy/charts/litellm-helm/templates/migrations-job.yaml index 381e9e5433..e994c45548 100644 --- a/deploy/charts/litellm-helm/templates/migrations-job.yaml +++ b/deploy/charts/litellm-helm/templates/migrations-job.yaml @@ -48,6 +48,23 @@ spec: {{- end }} - name: DISABLE_SCHEMA_UPDATE value: "false" # always run the migration from the Helm PreSync hook, override the value set + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} restartPolicy: OnFailure + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + ttlSecondsAfterFinished: {{ .Values.migrationJob.ttlSecondsAfterFinished }} backoffLimit: {{ .Values.migrationJob.backoffLimit }} {{- end }} diff --git a/deploy/charts/litellm-helm/values.yaml b/deploy/charts/litellm-helm/values.yaml index 19cbf72321..9f21fc40ad 100644 --- a/deploy/charts/litellm-helm/values.yaml +++ b/deploy/charts/litellm-helm/values.yaml @@ -187,6 +187,7 @@ migrationJob: backoffLimit: 4 # Backoff limit for Job restarts disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0. annotations: {} + ttlSecondsAfterFinished: 120 # Additional environment variables to be added to the deployment envVars: { diff --git a/docs/my-website/docs/anthropic_unified.md b/docs/my-website/docs/anthropic_unified.md new file mode 100644 index 0000000000..71b9203399 --- /dev/null +++ b/docs/my-website/docs/anthropic_unified.md @@ -0,0 +1,92 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# [BETA] `/v1/messages` + +LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint. + +This currently just supports the Anthropic API. + +| Feature | Supported | Notes | +|-------|-------|-------| +| Cost Tracking | ✅ | | +| Logging | ✅ | works across all integrations | +| End-user Tracking | ✅ | | +| Streaming | ✅ | | +| Fallbacks | ✅ | between anthropic models | +| Loadbalancing | ✅ | between anthropic models | + +Planned improvement: +- Vertex AI Anthropic support +- Bedrock Anthropic support + +## Usage + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: anthropic-claude + litellm_params: + model: claude-3-7-sonnet-latest +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \ +-H 'content-type: application/json' \ +-H 'x-api-key: $LITELLM_API_KEY' \ +-H 'anthropic-version: 2023-06-01' \ +-d '{ + "model": "anthropic-claude", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "List 5 important events in the XIX century" + } + ] + } + ], + "max_tokens": 4096 +}' +``` + + + +```python +from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages +import asyncio +import os + +# set env +os.environ["ANTHROPIC_API_KEY"] = "my-api-key" + +messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}] + +# Call the handler +async def call(): + response = await anthropic_messages( + messages=messages, + api_key=api_key, + model="claude-3-haiku-20240307", + max_tokens=100, + ) + +asyncio.run(call()) +``` + + + \ No newline at end of file diff --git a/docs/my-website/docs/completion/vision.md b/docs/my-website/docs/completion/vision.md index efb988b76f..1e18109b3b 100644 --- a/docs/my-website/docs/completion/vision.md +++ b/docs/my-website/docs/completion/vision.md @@ -189,4 +189,138 @@ Expected Response ``` - \ No newline at end of file + + + +## Explicitly specify image type + +If you have images without a mime-type, or if litellm is incorrectly inferring the mime type of your image (e.g. calling `gs://` url's with vertex ai), you can set this explicity via the `format` param. + +```python +"image_url": { + "url": "gs://my-gs-image", + "format": "image/jpeg" +} +``` + +LiteLLM will use this for any API endpoint, which supports specifying mime-type (e.g. anthropic/bedrock/vertex ai). + +For others (e.g. openai), it will be ignored. + + + + +```python +import os +from litellm import completion + +os.environ["ANTHROPIC_API_KEY"] = "your-api-key" + +# openai call +response = completion( + model = "claude-3-7-sonnet-latest", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What’s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", + "format": "image/jpeg" + } + } + ] + } + ], +) + +``` + + + + +1. Define vision models on config.yaml + +```yaml +model_list: + - model_name: gpt-4-vision-preview # OpenAI gpt-4-vision-preview + litellm_params: + model: openai/gpt-4-vision-preview + api_key: os.environ/OPENAI_API_KEY + - model_name: llava-hf # Custom OpenAI compatible model + litellm_params: + model: openai/llava-hf/llava-v1.6-vicuna-7b-hf + api_base: http://localhost:8000 + api_key: fake-key + model_info: + supports_vision: True # set supports_vision to True so /model/info returns this attribute as True + +``` + +2. Run proxy server + +```bash +litellm --config config.yaml +``` + +3. Test it using the OpenAI Python SDK + + +```python +import os +from openai import OpenAI + +client = OpenAI( + api_key="sk-1234", # your litellm proxy api key +) + +response = client.chat.completions.create( + model = "gpt-4-vision-preview", # use model="llava-hf" to test your custom OpenAI endpoint + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What’s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", + "format": "image/jpeg" + } + } + ] + } + ], +) + +``` + + + + + + + + + +## Spec + +``` +"image_url": str + +OR + +"image_url": { + "url": "url OR base64 encoded str", + "detail": "openai-only param", + "format": "specify mime-type of image" +} +``` \ No newline at end of file diff --git a/docs/my-website/docs/data_security.md b/docs/my-website/docs/data_security.md index 13cde26d5d..30128760f2 100644 --- a/docs/my-website/docs/data_security.md +++ b/docs/my-website/docs/data_security.md @@ -46,7 +46,7 @@ For security inquiries, please contact us at support@berri.ai |-------------------|-------------------------------------------------------------------------------------------------| | SOC 2 Type I | Certified. Report available upon request on Enterprise plan. | | SOC 2 Type II | In progress. Certificate available by April 15th, 2025 | -| ISO27001 | In progress. Certificate available by February 7th, 2025 | +| ISO 27001 | Certified. Report available upon request on Enterprise | ## Supported Data Regions for LiteLLM Cloud @@ -137,7 +137,7 @@ Point of contact email address for general security-related questions: krrish@be Has the Vendor been audited / certified? - SOC 2 Type I. Certified. Report available upon request on Enterprise plan. - SOC 2 Type II. In progress. Certificate available by April 15th, 2025. -- ISO27001. In progress. Certificate available by February 7th, 2025. +- ISO 27001. Certified. Report available upon request on Enterprise plan. Has an information security management system been implemented? - Yes - [CodeQL](https://codeql.github.com/) and a comprehensive ISMS covering multiple security domains. diff --git a/docs/my-website/docs/projects/PDL.md b/docs/my-website/docs/projects/PDL.md new file mode 100644 index 0000000000..5d6fd77555 --- /dev/null +++ b/docs/my-website/docs/projects/PDL.md @@ -0,0 +1,5 @@ +PDL - A YAML-based approach to prompt programming + +Github: https://github.com/IBM/prompt-declaration-language + +PDL is a declarative approach to prompt programming, helping users to accumulate messages implicitly, with support for model chaining and tool use. \ No newline at end of file diff --git a/docs/my-website/docs/projects/pgai.md b/docs/my-website/docs/projects/pgai.md new file mode 100644 index 0000000000..bece5baf6a --- /dev/null +++ b/docs/my-website/docs/projects/pgai.md @@ -0,0 +1,9 @@ +# pgai + +[pgai](https://github.com/timescale/pgai) is a suite of tools to develop RAG, semantic search, and other AI applications more easily with PostgreSQL. + +If you don't know what pgai is yet check out the [README](https://github.com/timescale/pgai)! + +If you're already familiar with pgai, you can find litellm specific docs here: +- Litellm for [model calling](https://github.com/timescale/pgai/blob/main/docs/model_calling/litellm.md) in pgai +- Use the [litellm provider](https://github.com/timescale/pgai/blob/main/docs/vectorizer/api-reference.md#aiembedding_litellm) to automatically create embeddings for your data via the pgai vectorizer. diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md index 00fe45e99e..bd2d4be1a4 100644 --- a/docs/my-website/docs/providers/bedrock.md +++ b/docs/my-website/docs/providers/bedrock.md @@ -286,9 +286,12 @@ print(response) -## Usage - Function Calling +## Usage - Function Calling / Tool calling -LiteLLM uses Bedrock's Converse API for making tool calls +LiteLLM supports tool calling via Bedrock's Converse and Invoke API's. + + + ```python from litellm import completion @@ -333,6 +336,69 @@ assert isinstance( response.choices[0].message.tool_calls[0].function.arguments, str ) ``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: bedrock-claude-3-7 + litellm_params: + model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0 # for bedrock invoke, specify `bedrock/invoke/` +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ +-H "Content-Type: application/json" \ +-H "Authorization: Bearer $LITELLM_API_KEY" \ +-d '{ + "model": "bedrock-claude-3-7", + "messages": [ + { + "role": "user", + "content": "What'\''s the weather like in Boston today?" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + } + ], + "tool_choice": "auto" +}' + +``` + + + + ## Usage - Vision @@ -377,6 +443,226 @@ print(f"\nResponse: {resp}") ``` +## Usage - 'thinking' / 'reasoning content' + +This is currently only supported for Anthropic's Claude 3.7 Sonnet + Deepseek R1. + +Works on v1.61.20+. + +Returns 2 new fields in `message` and `delta` object: +- `reasoning_content` - string - The reasoning content of the response +- `thinking_blocks` - list of objects (Anthropic only) - The thinking blocks of the response + +Each object has the following fields: +- `type` - Literal["thinking"] - The type of thinking block +- `thinking` - string - The thinking of the response. Also returned in `reasoning_content` +- `signature` - string - A base64 encoded string, returned by Anthropic. + +The `signature` is required by Anthropic on subsequent calls, if 'thinking' content is passed in (only required to use `thinking` with tool calling). [Learn more](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#understanding-thinking-blocks) + + + + +```python +from litellm import completion + +# set env +os.environ["AWS_ACCESS_KEY_ID"] = "" +os.environ["AWS_SECRET_ACCESS_KEY"] = "" +os.environ["AWS_REGION_NAME"] = "" + + +resp = completion( + model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0", + messages=[{"role": "user", "content": "What is the capital of France?"}], + thinking={"type": "enabled", "budget_tokens": 1024}, +) + +print(resp) +``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: bedrock-claude-3-7 + litellm_params: + model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0 + thinking: {"type": "enabled", "budget_tokens": 1024} # 👈 EITHER HERE OR ON REQUEST +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "bedrock-claude-3-7", + "messages": [{"role": "user", "content": "What is the capital of France?"}], + "thinking": {"type": "enabled", "budget_tokens": 1024} # 👈 EITHER HERE OR ON CONFIG.YAML + }' +``` + + + + + +**Expected Response** + +Same as [Anthropic API response](../providers/anthropic#usage---thinking--reasoning_content). + +```python +{ + "id": "chatcmpl-c661dfd7-7530-49c9-b0cc-d5018ba4727d", + "created": 1740640366, + "model": "us.anthropic.claude-3-7-sonnet-20250219-v1:0", + "object": "chat.completion", + "system_fingerprint": null, + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "The capital of France is Paris. It's not only the capital city but also the largest city in France, serving as the country's major cultural, economic, and political center.", + "role": "assistant", + "tool_calls": null, + "function_call": null, + "reasoning_content": "The capital of France is Paris. This is a straightforward factual question.", + "thinking_blocks": [ + { + "type": "thinking", + "thinking": "The capital of France is Paris. This is a straightforward factual question.", + "signature": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+yCHpBY7U6FQW8/FcoLewocJQPa2HnmLM+NECy50y44F/kD4SULFXi57buI9fAvyBwtyjlOiO0SDE3+r3spdg6PLOo9PBoMma2ku5OTAoR46j9VIjDRlvNmBvff7YW4WI9oU8XagaOBSxLPxElrhyuxppEn7m6bfT40dqBSTDrfiw4FYB4qEPETTI6TA6wtjGAAqmFqKTo=" + } + ] + } + } + ], + "usage": { + "completion_tokens": 64, + "prompt_tokens": 42, + "total_tokens": 106, + "completion_tokens_details": null, + "prompt_tokens_details": null + } +} +``` + + +## Usage - Structured Output / JSON mode + + + + +```python +from litellm import completion +import os +from pydantic import BaseModel + +# set env +os.environ["AWS_ACCESS_KEY_ID"] = "" +os.environ["AWS_SECRET_ACCESS_KEY"] = "" +os.environ["AWS_REGION_NAME"] = "" + +class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] + +class EventsList(BaseModel): + events: list[CalendarEvent] + +response = completion( + model="bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0", # specify invoke via `bedrock/invoke/anthropic.claude-3-7-sonnet-20250219-v1:0` + response_format=EventsList, + messages=[ + {"role": "system", "content": "You are a helpful assistant designed to output JSON."}, + {"role": "user", "content": "Who won the world series in 2020?"} + ], +) +print(response.choices[0].message.content) +``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: bedrock-claude-3-7 + litellm_params: + model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0 # specify invoke via `bedrock/invoke/` + aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID + aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY + aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $LITELLM_KEY" \ + -d '{ + "model": "bedrock-claude-3-7", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant designed to output JSON." + }, + { + "role": "user", + "content": "Who won the worlde series in 2020?" + } + ], + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "math_reasoning", + "description": "reason about maths", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "explanation": { "type": "string" }, + "output": { "type": "string" } + }, + "required": ["explanation", "output"], + "additionalProperties": false + } + }, + "final_answer": { "type": "string" } + }, + "required": ["steps", "final_answer"], + "additionalProperties": false + }, + "strict": true + } + } + }' +``` + + + ## Usage - Bedrock Guardrails Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html) diff --git a/docs/my-website/docs/providers/cerebras.md b/docs/my-website/docs/providers/cerebras.md index 4fabeb31cb..33bef5e107 100644 --- a/docs/my-website/docs/providers/cerebras.md +++ b/docs/my-website/docs/providers/cerebras.md @@ -23,14 +23,16 @@ import os os.environ['CEREBRAS_API_KEY'] = "" response = completion( - model="cerebras/meta/llama3-70b-instruct", + model="cerebras/llama3-70b-instruct", messages=[ { "role": "user", - "content": "What's the weather like in Boston today in Fahrenheit?", + "content": "What's the weather like in Boston today in Fahrenheit? (Write in JSON)", } ], max_tokens=10, + + # The prompt should include JSON if 'json_object' is selected; otherwise, you will get error code 400. response_format={ "type": "json_object" }, seed=123, stop=["\n\n"], @@ -50,16 +52,18 @@ import os os.environ['CEREBRAS_API_KEY'] = "" response = completion( - model="cerebras/meta/llama3-70b-instruct", + model="cerebras/llama3-70b-instruct", messages=[ { "role": "user", - "content": "What's the weather like in Boston today in Fahrenheit?", + "content": "What's the weather like in Boston today in Fahrenheit? (Write in JSON)", } ], stream=True, max_tokens=10, - response_format={ "type": "json_object" }, + + # The prompt should include JSON if 'json_object' is selected; otherwise, you will get error code 400. + response_format={ "type": "json_object" }, seed=123, stop=["\n\n"], temperature=0.2, diff --git a/docs/my-website/docs/providers/infinity.md b/docs/my-website/docs/providers/infinity.md index dd6986dfef..091503bf18 100644 --- a/docs/my-website/docs/providers/infinity.md +++ b/docs/my-website/docs/providers/infinity.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Infinity | Property | Details | @@ -12,6 +15,9 @@ ```python from litellm import rerank +import os + +os.environ["INFINITY_API_BASE"] = "http://localhost:8080" response = rerank( model="infinity/rerank", @@ -65,3 +71,114 @@ curl http://0.0.0.0:4000/rerank \ ``` +## Supported Cohere Rerank API Params + +| Param | Type | Description | +|-------|-------|-------| +| `query` | `str` | The query to rerank the documents against | +| `documents` | `list[str]` | The documents to rerank | +| `top_n` | `int` | The number of documents to return | +| `return_documents` | `bool` | Whether to return the documents in the response | + +### Usage - Return Documents + + + + +```python +response = rerank( + model="infinity/rerank", + query="What is the capital of France?", + documents=["Paris", "London", "Berlin", "Madrid"], + return_documents=True, +) +``` + + + + + +```bash +curl http://0.0.0.0:4000/rerank \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "custom-infinity-rerank", + "query": "What is the capital of France?", + "documents": [ + "Paris", + "London", + "Berlin", + "Madrid" + ], + "return_documents": True, + }' +``` + + + + +## Pass Provider-specific Params + +Any unmapped params will be passed to the provider as-is. + + + + +```python +from litellm import rerank +import os + +os.environ["INFINITY_API_BASE"] = "http://localhost:8080" + +response = rerank( + model="infinity/rerank", + query="What is the capital of France?", + documents=["Paris", "London", "Berlin", "Madrid"], + raw_scores=True, # 👈 PROVIDER-SPECIFIC PARAM +) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: custom-infinity-rerank + litellm_params: + model: infinity/rerank + api_base: https://localhost:8080 + raw_scores: True # 👈 EITHER SET PROVIDER-SPECIFIC PARAMS HERE OR IN REQUEST BODY +``` + +2. Start litellm + +```bash +litellm --config /path/to/config.yaml + +# RUNNING on http://0.0.0.0:4000 +``` + +3. Test it! + +```bash +curl http://0.0.0.0:4000/rerank \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "custom-infinity-rerank", + "query": "What is the capital of the United States?", + "documents": [ + "Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + "Washington, D.C. is the capital of the United States.", + "Capital punishment has existed in the United States since before it was a country." + ], + "raw_scores": True # 👈 PROVIDER-SPECIFIC PARAM + }' +``` + + + diff --git a/docs/my-website/docs/providers/sambanova.md b/docs/my-website/docs/providers/sambanova.md index 9fa6ce8b60..7dd837e1b0 100644 --- a/docs/my-website/docs/providers/sambanova.md +++ b/docs/my-website/docs/providers/sambanova.md @@ -2,11 +2,11 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; # Sambanova -https://community.sambanova.ai/t/create-chat-completion-api/ +https://cloud.sambanova.ai/ :::tip -**We support ALL Sambanova models, just set `model=sambanova/` as a prefix when sending litellm requests. For the complete supported model list, visit https://sambanova.ai/technology/models ** +**We support ALL Sambanova models, just set `model=sambanova/` as a prefix when sending litellm requests. For the complete supported model list, visit https://docs.sambanova.ai/cloud/docs/get-started/supported-models ** ::: @@ -27,12 +27,11 @@ response = completion( messages=[ { "role": "user", - "content": "What do you know about sambanova.ai", + "content": "What do you know about sambanova.ai. Give your response in json format", } ], max_tokens=10, response_format={ "type": "json_object" }, - seed=123, stop=["\n\n"], temperature=0.2, top_p=0.9, @@ -54,13 +53,12 @@ response = completion( messages=[ { "role": "user", - "content": "What do you know about sambanova.ai", + "content": "What do you know about sambanova.ai. Give your response in json format", } ], stream=True, max_tokens=10, response_format={ "type": "json_object" }, - seed=123, stop=["\n\n"], temperature=0.2, top_p=0.9, diff --git a/docs/my-website/docs/providers/vertex.md b/docs/my-website/docs/providers/vertex.md index cb8c031c06..b90709ce5b 100644 --- a/docs/my-website/docs/providers/vertex.md +++ b/docs/my-website/docs/providers/vertex.md @@ -852,6 +852,7 @@ litellm.vertex_location = "us-central1 # Your Location | claude-3-5-sonnet@20240620 | `completion('vertex_ai/claude-3-5-sonnet@20240620', messages)` | | claude-3-sonnet@20240229 | `completion('vertex_ai/claude-3-sonnet@20240229', messages)` | | claude-3-haiku@20240307 | `completion('vertex_ai/claude-3-haiku@20240307', messages)` | +| claude-3-7-sonnet@20250219 | `completion('vertex_ai/claude-3-7-sonnet@20250219', messages)` | ### Usage @@ -926,6 +927,119 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ + +### Usage - `thinking` / `reasoning_content` + + + + + +```python +from litellm import completion + +resp = completion( + model="vertex_ai/claude-3-7-sonnet-20250219", + messages=[{"role": "user", "content": "What is the capital of France?"}], + thinking={"type": "enabled", "budget_tokens": 1024}, +) + +``` + + + + + +1. Setup config.yaml + +```yaml +- model_name: claude-3-7-sonnet-20250219 + litellm_params: + model: vertex_ai/claude-3-7-sonnet-20250219 + vertex_ai_project: "my-test-project" + vertex_ai_location: "us-west-1" +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "claude-3-7-sonnet-20250219", + "messages": [{"role": "user", "content": "What is the capital of France?"}], + "thinking": {"type": "enabled", "budget_tokens": 1024} + }' +``` + + + + + +**Expected Response** + +```python +ModelResponse( + id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e', + created=1740470510, + model='claude-3-7-sonnet-20250219', + object='chat.completion', + system_fingerprint=None, + choices=[ + Choices( + finish_reason='stop', + index=0, + message=Message( + content="The capital of France is Paris.", + role='assistant', + tool_calls=None, + function_call=None, + provider_specific_fields={ + 'citations': None, + 'thinking_blocks': [ + { + 'type': 'thinking', + 'thinking': 'The capital of France is Paris. This is a very straightforward factual question.', + 'signature': 'EuYBCkQYAiJAy6...' + } + ] + } + ), + thinking_blocks=[ + { + 'type': 'thinking', + 'thinking': 'The capital of France is Paris. This is a very straightforward factual question.', + 'signature': 'EuYBCkQYAiJAy6AGB...' + } + ], + reasoning_content='The capital of France is Paris. This is a very straightforward factual question.' + ) + ], + usage=Usage( + completion_tokens=68, + prompt_tokens=42, + total_tokens=110, + completion_tokens_details=None, + prompt_tokens_details=PromptTokensDetailsWrapper( + audio_tokens=None, + cached_tokens=0, + text_tokens=None, + image_tokens=None + ), + cache_creation_input_tokens=0, + cache_read_input_tokens=0 + ) +) +``` + + + ## Llama 3 API | Model Name | Function Call | @@ -1572,6 +1686,14 @@ assert isinstance( Pass any file supported by Vertex AI, through LiteLLM. +LiteLLM Supports the following image types passed in url + +``` +Images with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg +Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg +Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4 +Base64 Encoded Local Images +``` diff --git a/docs/my-website/docs/providers/vllm.md b/docs/my-website/docs/providers/vllm.md index 9cc0ad487e..b5987167ec 100644 --- a/docs/my-website/docs/providers/vllm.md +++ b/docs/my-website/docs/providers/vllm.md @@ -157,6 +157,98 @@ curl -L -X POST 'http://0.0.0.0:4000/embeddings' \ +## Send Video URL to VLLM + +Example Implementation from VLLM [here](https://github.com/vllm-project/vllm/pull/10020) + +There are two ways to send a video url to VLLM: + +1. Pass the video url directly + +``` +{"type": "video_url", "video_url": {"url": video_url}}, +``` + +2. Pass the video data as base64 + +``` +{"type": "video_url", "video_url": {"url": f"data:video/mp4;base64,{video_data_base64}"}} +``` + + + + +```python +from litellm import completion + +response = completion( + model="hosted_vllm/qwen", # pass the vllm model name + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Summarize the following video" + }, + { + "type": "video_url", + "video_url": { + "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ" + } + } + ] + } + ], + api_base="https://hosted-vllm-api.co") + +print(response) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: my-model + litellm_params: + model: hosted_vllm/qwen # add hosted_vllm/ prefix to route as OpenAI provider + api_base: https://hosted-vllm-api.co # add api base for OpenAI compatible provider +``` + +2. Start the proxy + +```bash +$ litellm --config /path/to/config.yaml + +# RUNNING on http://0.0.0.0:4000 +``` + +3. Test it! + +```bash +curl -X POST http://0.0.0.0:4000/chat/completions \ +-H "Authorization: Bearer sk-1234" \ +-H "Content-Type: application/json" \ +-d '{ + "model": "my-model", + "messages": [ + {"role": "user", "content": + [ + {"type": "text", "text": "Summarize the following video"}, + {"type": "video_url", "video_url": {"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"}} + ] + } + ] +}' +``` + + + + + ## (Deprecated) for `vllm pip package` ### Using - `litellm.completion` diff --git a/docs/my-website/docs/proxy/architecture.md b/docs/my-website/docs/proxy/architecture.md index 832fd266b6..2b83583ed9 100644 --- a/docs/my-website/docs/proxy/architecture.md +++ b/docs/my-website/docs/proxy/architecture.md @@ -36,7 +36,7 @@ import TabItem from '@theme/TabItem'; - Virtual Key Rate Limit - User Rate Limit - Team Limit - - The `_PROXY_track_cost_callback` updates spend / usage in the LiteLLM database. [Here is everything tracked in the DB per request](https://github.com/BerriAI/litellm/blob/ba41a72f92a9abf1d659a87ec880e8e319f87481/schema.prisma#L172) + - The `_ProxyDBLogger` updates spend / usage in the LiteLLM database. [Here is everything tracked in the DB per request](https://github.com/BerriAI/litellm/blob/ba41a72f92a9abf1d659a87ec880e8e319f87481/schema.prisma#L172) ## Frequently Asked Questions diff --git a/docs/my-website/docs/proxy/db_info.md b/docs/my-website/docs/proxy/db_info.md index 1b87aa1e54..946089bf14 100644 --- a/docs/my-website/docs/proxy/db_info.md +++ b/docs/my-website/docs/proxy/db_info.md @@ -46,18 +46,17 @@ You can see the full DB Schema [here](https://github.com/BerriAI/litellm/blob/ma | Table Name | Description | Row Insert Frequency | |------------|-------------|---------------------| -| LiteLLM_SpendLogs | Detailed logs of all API requests. Records token usage, spend, and timing information. Tracks which models and keys were used. | **High - every LLM API request** | -| LiteLLM_ErrorLogs | Captures failed requests and errors. Stores exception details and request information. Helps with debugging and monitoring. | **Medium - on errors only** | +| LiteLLM_SpendLogs | Detailed logs of all API requests. Records token usage, spend, and timing information. Tracks which models and keys were used. | **High - every LLM API request - Success or Failure** | | LiteLLM_AuditLog | Tracks changes to system configuration. Records who made changes and what was modified. Maintains history of updates to teams, users, and models. | **Off by default**, **High - when enabled** | -## Disable `LiteLLM_SpendLogs` & `LiteLLM_ErrorLogs` +## Disable `LiteLLM_SpendLogs` You can disable spend_logs and error_logs by setting `disable_spend_logs` and `disable_error_logs` to `True` on the `general_settings` section of your proxy_config.yaml file. ```yaml general_settings: disable_spend_logs: True # Disable writing spend logs to DB - disable_error_logs: True # Disable writing error logs to DB + disable_error_logs: True # Only disable writing error logs to DB, regular spend logs will still be written unless `disable_spend_logs: True` ``` ### What is the impact of disabling these logs? diff --git a/docs/my-website/docs/proxy/logging_spec.md b/docs/my-website/docs/proxy/logging_spec.md index 86ba907373..7da937e565 100644 --- a/docs/my-website/docs/proxy/logging_spec.md +++ b/docs/my-website/docs/proxy/logging_spec.md @@ -78,6 +78,7 @@ Inherits from `StandardLoggingUserAPIKeyMetadata` and adds: | `api_base` | `Optional[str]` | Optional API base URL | | `response_cost` | `Optional[str]` | Optional response cost | | `additional_headers` | `Optional[StandardLoggingAdditionalHeaders]` | Additional headers | +| `batch_models` | `Optional[List[str]]` | Only set for Batches API. Lists the models used for cost calculation | ## StandardLoggingModelInformation diff --git a/docs/my-website/docs/proxy/master_key_rotations.md b/docs/my-website/docs/proxy/master_key_rotations.md new file mode 100644 index 0000000000..1713679863 --- /dev/null +++ b/docs/my-website/docs/proxy/master_key_rotations.md @@ -0,0 +1,53 @@ +# Rotating Master Key + +Here are our recommended steps for rotating your master key. + + +**1. Backup your DB** +In case of any errors during the encryption/de-encryption process, this will allow you to revert back to current state without issues. + +**2. Call `/key/regenerate` with the new master key** + +```bash +curl -L -X POST 'http://localhost:4000/key/regenerate' \ +-H 'Authorization: Bearer sk-1234' \ +-H 'Content-Type: application/json' \ +-d '{ + "key": "sk-1234", + "new_master_key": "sk-PIp1h0RekR" +}' +``` + +This will re-encrypt any models in your Proxy_ModelTable with the new master key. + +Expect to start seeing decryption errors in logs, as your old master key is no longer able to decrypt the new values. + +```bash + raise Exception("Unable to decrypt value={}".format(v)) +Exception: Unable to decrypt value= +``` + +**3. Update LITELLM_MASTER_KEY** + +In your environment variables update the value of LITELLM_MASTER_KEY to the new_master_key from Step 2. + +This ensures the key used for decryption from db is the new key. + +**4. Test it** + +Make a test request to a model stored on proxy with a litellm key (new master key or virtual key) and see if it works + +```bash + curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "gpt-4o-mini", # 👈 REPLACE with 'public model name' for any db-model + "messages": [ + { + "content": "Hey, how's it going", + "role": "user" + } + ], +}' +``` \ No newline at end of file diff --git a/docs/my-website/docs/proxy/prod.md b/docs/my-website/docs/proxy/prod.md index d0b8c48174..d3ba2d6224 100644 --- a/docs/my-website/docs/proxy/prod.md +++ b/docs/my-website/docs/proxy/prod.md @@ -107,9 +107,9 @@ general_settings: By default, LiteLLM writes several types of logs to the database: - Every LLM API request to the `LiteLLM_SpendLogs` table -- LLM Exceptions to the `LiteLLM_LogsErrors` table +- LLM Exceptions to the `LiteLLM_SpendLogs` table -If you're not viewing these logs on the LiteLLM UI (most users use Prometheus for monitoring), you can disable them by setting the following flags to `True`: +If you're not viewing these logs on the LiteLLM UI, you can disable them by setting the following flags to `True`: ```yaml general_settings: diff --git a/docs/my-website/docs/proxy/release_cycle.md b/docs/my-website/docs/proxy/release_cycle.md new file mode 100644 index 0000000000..947a4ae6b3 --- /dev/null +++ b/docs/my-website/docs/proxy/release_cycle.md @@ -0,0 +1,12 @@ +# Release Cycle + +Litellm Proxy has the following release cycle: + +- `v1.x.x-nightly`: These are releases which pass ci/cd. +- `v1.x.x.rc`: These are releases which pass ci/cd + [manual review](https://github.com/BerriAI/litellm/discussions/8495#discussioncomment-12180711). +- `v1.x.x` OR `v1.x.x-stable`: These are releases which pass ci/cd + manual review + 3 days of production testing. + +In production, we recommend using the latest `v1.x.x` release. + + +Follow our release notes [here](https://github.com/BerriAI/litellm/releases). \ No newline at end of file diff --git a/docs/my-website/docs/reasoning_content.md b/docs/my-website/docs/reasoning_content.md new file mode 100644 index 0000000000..5cf287e737 --- /dev/null +++ b/docs/my-website/docs/reasoning_content.md @@ -0,0 +1,357 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# 'Thinking' / 'Reasoning Content' + +Supported Providers: +- Deepseek (`deepseek/`) +- Anthropic API (`anthropic/`) +- Bedrock (Anthropic + Deepseek) (`bedrock/`) +- Vertex AI (Anthropic) (`vertexai/`) + +```python +"message": { + ... + "reasoning_content": "The capital of France is Paris.", + "thinking_blocks": [ + { + "type": "thinking", + "thinking": "The capital of France is Paris.", + "signature": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+..." + } + ] +} +``` + +## Quick Start + + + + +```python +from litellm import completion +import os + +os.environ["ANTHROPIC_API_KEY"] = "" + +response = completion( + model="anthropic/claude-3-7-sonnet-20250219", + messages=[ + {"role": "user", "content": "What is the capital of France?"}, + ], + thinking={"type": "enabled", "budget_tokens": 1024} # 👈 REQUIRED FOR ANTHROPIC models (on `anthropic/`, `bedrock/`, `vertexai/`) +) +print(response.choices[0].message.content) +``` + + + + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $LITELLM_KEY" \ + -d '{ + "model": "anthropic/claude-3-7-sonnet-20250219", + "messages": [ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "thinking": {"type": "enabled", "budget_tokens": 1024} +}' +``` + + + +**Expected Response** + +```bash +{ + "id": "3b66124d79a708e10c603496b363574c", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": " won the FIFA World Cup in 2022.", + "role": "assistant", + "tool_calls": null, + "function_call": null + } + } + ], + "created": 1723323084, + "model": "deepseek/deepseek-chat", + "object": "chat.completion", + "system_fingerprint": "fp_7e0991cad4", + "usage": { + "completion_tokens": 12, + "prompt_tokens": 16, + "total_tokens": 28, + }, + "service_tier": null +} +``` + +## Tool Calling with `thinking` + +Here's how to use `thinking` blocks by Anthropic with tool calling. + + + + +```python +litellm._turn_on_debug() +litellm.modify_params = True +model = "anthropic/claude-3-7-sonnet-20250219" # works across Anthropic, Bedrock, Vertex AI +# Step 1: send the conversation and available functions to the model +messages = [ + { + "role": "user", + "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses", + } +] +tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["location"], + }, + }, + } +] +response = litellm.completion( + model=model, + messages=messages, + tools=tools, + tool_choice="auto", # auto is default, but we'll be explicit + thinking={"type": "enabled", "budget_tokens": 1024}, +) +print("Response\n", response) +response_message = response.choices[0].message +tool_calls = response_message.tool_calls + +print("Expecting there to be 3 tool calls") +assert ( + len(tool_calls) > 0 +) # this has to call the function for SF, Tokyo and paris + +# Step 2: check if the model wanted to call a function +print(f"tool_calls: {tool_calls}") +if tool_calls: + # Step 3: call the function + # Note: the JSON response may not always be valid; be sure to handle errors + available_functions = { + "get_current_weather": get_current_weather, + } # only one function in this example, but you can have multiple + messages.append( + response_message + ) # extend conversation with assistant's reply + print("Response message\n", response_message) + # Step 4: send the info for each function call and function response to the model + for tool_call in tool_calls: + function_name = tool_call.function.name + if function_name not in available_functions: + # the model called a function that does not exist in available_functions - don't try calling anything + return + function_to_call = available_functions[function_name] + function_args = json.loads(tool_call.function.arguments) + function_response = function_to_call( + location=function_args.get("location"), + unit=function_args.get("unit"), + ) + messages.append( + { + "tool_call_id": tool_call.id, + "role": "tool", + "name": function_name, + "content": function_response, + } + ) # extend conversation with function response + print(f"messages: {messages}") + second_response = litellm.completion( + model=model, + messages=messages, + seed=22, + # tools=tools, + drop_params=True, + thinking={"type": "enabled", "budget_tokens": 1024}, + ) # get a new response from the model where it can see the function response + print("second response\n", second_response) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: claude-3-7-sonnet-thinking + litellm_params: + model: anthropic/claude-3-7-sonnet-20250219 + api_key: os.environ/ANTHROPIC_API_KEY + thinking: { + "type": "enabled", + "budget_tokens": 1024 + } +``` + +2. Run proxy + +```bash +litellm --config config.yaml + +# RUNNING on http://0.0.0.0:4000 +``` + +3. Make 1st call + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $LITELLM_KEY" \ + -d '{ + "model": "claude-3-7-sonnet-thinking", + "messages": [ + {"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"}, + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["location"], + }, + }, + } + ], + "tool_choice": "auto" + }' +``` + +4. Make 2nd call with tool call results + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $LITELLM_KEY" \ + -d '{ + "model": "claude-3-7-sonnet-thinking", + "messages": [ + { + "role": "user", + "content": "What\'s the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses" + }, + { + "role": "assistant", + "content": "I\'ll check the current weather for these three cities for you:", + "tool_calls": [ + { + "index": 2, + "function": { + "arguments": "{\"location\": \"San Francisco\"}", + "name": "get_current_weather" + }, + "id": "tooluse_mnqzmtWYRjCxUInuAdK7-w", + "type": "function" + } + ], + "function_call": null, + "reasoning_content": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user.", + "thinking_blocks": [ + { + "type": "thinking", + "thinking": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user.", + "signature": "EqoBCkgIARABGAIiQCkBXENoyB+HstUOs/iGjG+bvDbIQRrxPsPpOSt5yDxX6iulZ/4K/w9Rt4J5Nb2+3XUYsyOH+CpZMfADYvItFR4SDPb7CmzoGKoolCMAJRoM62p1ZRASZhrD3swqIjAVY7vOAFWKZyPEJglfX/60+bJphN9W1wXR6rWrqn3MwUbQ5Mb/pnpeb10HMploRgUqEGKOd6fRKTkUoNDuAnPb55c=" + } + ], + "provider_specific_fields": { + "reasoningContentBlocks": [ + { + "reasoningText": { + "signature": "EqoBCkgIARABGAIiQCkBXENoyB+HstUOs/iGjG+bvDbIQRrxPsPpOSt5yDxX6iulZ/4K/w9Rt4J5Nb2+3XUYsyOH+CpZMfADYvItFR4SDPb7CmzoGKoolCMAJRoM62p1ZRASZhrD3swqIjAVY7vOAFWKZyPEJglfX/60+bJphN9W1wXR6rWrqn3MwUbQ5Mb/pnpeb10HMploRgUqEGKOd6fRKTkUoNDuAnPb55c=", + "text": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user." + } + } + ] + } + }, + { + "tool_call_id": "tooluse_mnqzmtWYRjCxUInuAdK7-w", + "role": "tool", + "name": "get_current_weather", + "content": "{\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"}" + } + ] + }' +``` + + + + +## Switching between Anthropic + Deepseek models + +Set `drop_params=True` to drop the 'thinking' blocks when swapping from Anthropic to Deepseek models. Suggest improvements to this approach [here](https://github.com/BerriAI/litellm/discussions/8927). + +```python +litellm.drop_params = True # 👈 EITHER GLOBALLY or per request + +# or per request +## Anthropic +response = litellm.completion( + model="anthropic/claude-3-7-sonnet-20250219", + messages=[{"role": "user", "content": "What is the capital of France?"}], + thinking={"type": "enabled", "budget_tokens": 1024}, + drop_params=True, +) + +## Deepseek +response = litellm.completion( + model="deepseek/deepseek-chat", + messages=[{"role": "user", "content": "What is the capital of France?"}], + thinking={"type": "enabled", "budget_tokens": 1024}, + drop_params=True, +) +``` + +## Spec + + +These fields can be accessed via `response.choices[0].message.reasoning_content` and `response.choices[0].message.thinking_blocks`. + +- `reasoning_content` - str: The reasoning content from the model. Returned across all providers. +- `thinking_blocks` - Optional[List[Dict[str, str]]]: A list of thinking blocks from the model. Only returned for Anthropic models. + - `type` - str: The type of thinking block. + - `thinking` - str: The thinking from the model. + - `signature` - str: The signature delta from the model. + diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 79fefcf754..0ad28b24f4 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -952,8 +952,8 @@ router_settings: ``` Defaults: -- allowed_fails: 0 -- cooldown_time: 60s +- allowed_fails: 3 +- cooldown_time: 5s (`DEFAULT_COOLDOWN_TIME_SECONDS` in constants.py) **Set Per Model** diff --git a/docs/my-website/docs/secret.md b/docs/my-website/docs/secret.md index a65c696f36..7676164259 100644 --- a/docs/my-website/docs/secret.md +++ b/docs/my-website/docs/secret.md @@ -96,6 +96,33 @@ litellm --config /path/to/config.yaml ``` +### Using K/V pairs in 1 AWS Secret + +You can read multiple keys from a single AWS Secret using the `primary_secret_name` parameter: + +```yaml +general_settings: + key_management_system: "aws_secret_manager" + key_management_settings: + hosted_keys: [ + "OPENAI_API_KEY_MODEL_1", + "OPENAI_API_KEY_MODEL_2", + ] + primary_secret_name: "litellm_secrets" # 👈 Read multiple keys from one JSON secret +``` + +The `primary_secret_name` allows you to read multiple keys from a single AWS Secret as a JSON object. For example, the "litellm_secrets" would contain: + +```json +{ + "OPENAI_API_KEY_MODEL_1": "sk-key1...", + "OPENAI_API_KEY_MODEL_2": "sk-key2..." +} +``` + +This reduces the number of AWS Secrets you need to manage. + + ## Hashicorp Vault @@ -353,4 +380,7 @@ general_settings: # Hosted Keys Settings hosted_keys: ["litellm_master_key"] # OPTIONAL. Specify which env keys you stored on AWS + + # K/V pairs in 1 AWS Secret Settings + primary_secret_name: "litellm_secrets" # OPTIONAL. Read multiple keys from one JSON secret on AWS Secret Manager ``` \ No newline at end of file diff --git a/docs/my-website/docs/tutorials/litellm_proxy_aporia.md b/docs/my-website/docs/tutorials/litellm_proxy_aporia.md index 3b5bada2bc..143512f99c 100644 --- a/docs/my-website/docs/tutorials/litellm_proxy_aporia.md +++ b/docs/my-website/docs/tutorials/litellm_proxy_aporia.md @@ -2,9 +2,9 @@ import Image from '@theme/IdealImage'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Use LiteLLM AI Gateway with Aporia Guardrails +# Aporia Guardrails with LiteLLM Gateway -In this tutorial we will use LiteLLM Proxy with Aporia to detect PII in requests and profanity in responses +In this tutorial we will use LiteLLM AI Gateway with Aporia to detect PII in requests and profanity in responses ## 1. Setup guardrails on Aporia diff --git a/docs/my-website/docs/tutorials/openweb_ui.md b/docs/my-website/docs/tutorials/openweb_ui.md new file mode 100644 index 0000000000..94ab0447bd --- /dev/null +++ b/docs/my-website/docs/tutorials/openweb_ui.md @@ -0,0 +1,109 @@ +import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# OpenWeb UI with LiteLLM + +This guide walks you through connecting OpenWeb UI to LiteLLM. Using LiteLLM with OpenWeb UI allows teams to +- Access 100+ LLMs on OpenWeb UI +- Track Spend / Usage, Set Budget Limits +- Send Request/Response Logs to logging destinations like langfuse, s3, gcs buckets, etc. +- Set access controls eg. Control what models OpenWebUI can access. + +## Quickstart + +- Make sure to setup LiteLLM with the [LiteLLM Getting Started Guide](https://docs.litellm.ai/docs/proxy/docker_quick_start) + + +## 1. Start LiteLLM & OpenWebUI + +Deploy this docker compose to deploy both OpenWebUI and LiteLLM. + +```bash +docker compose up -d +``` + +- OpenWebUI starts running on [http://localhost:3000](http://localhost:3000) +- LiteLLM starts running on [http://localhost:4000](http://localhost:4000) + + +## 2. Create a Virtual Key on LiteLLM + +Virtual Keys are API Keys that allow you to authenticate to LiteLLM Proxy. We will create a Virtual Key that will allow OpenWebUI to access LiteLLM. + +### 2.1 LiteLLM User Management Hierarchy + +On LiteLLM, you can create Organizations, Teams, Users and Virtual Keys. For this tutorial, we will create a Team and a Virtual Key. + +- `Organization` - An Organization is a group of Teams. (US Engineering, EU Developer Tools) +- `Team` - A Team is a group of Users. (OpenWeb UI Team, Data Science Team, etc.) +- `User` - A User is an individual user (employee, developer, eg. `krrish@litellm.ai`) +- `Virtual Key` - A Virtual Key is an API Key that allows you to authenticate to LiteLLM Proxy. A Virtual Key is associated with a User or Team. + +Once the Team is created, you can invite Users to the Team. You can read more about LiteLLM's User Management [here](https://docs.litellm.ai/docs/proxy/user_management_heirarchy). + +### 2.2 Create a Team on LiteLLM + +Navigate to [http://localhost:4000/ui](http://localhost:4000/ui) and create a new team. + + + +### 2.2 Create a Virtual Key on LiteLLM + +Navigate to [http://localhost:4000/ui](http://localhost:4000/ui) and create a new virtual Key. + +LiteLLM allows you to specify what models are available on OpenWeb UI (by specifying the models the key will have access to). + + + +## 3. Connect OpenWeb UI to LiteLLM + +On OpenWeb UI, navigate to Settings -> Connections and create a new connection to LiteLLM + +Enter the following details: +- URL: `http://localhost:4000` (your litellm proxy base url) +- Key: `your-virtual-key` (the key you created in the previous step) + + + +### 3.1 Test Request + +On the top left corner, select models you should only see the models you gave the key access to in Step 2. + +Once you selected a model, enter your message content and click on `Submit` + + + +### 3.2 Tracking Spend / Usage + +After your request is made, navigate to `Logs` on the LiteLLM UI, you can see Team, Key, Model, Usage and Cost. + + + + + +## Render `thinking` content on OpenWeb UI + +OpenWebUI requires reasoning/thinking content to be rendered with `` tags. In order to render this for specific models, you can use the `merge_reasoning_content_in_choices` litellm parameter. + +Example litellm config.yaml: + +```yaml +model_list: + - model_name: thinking-anthropic-claude-3-7-sonnet + litellm_params: + model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0 + thinking: {"type": "enabled", "budget_tokens": 1024} + max_tokens: 1080 + merge_reasoning_content_in_choices: true +``` + +### Test it on OpenWeb UI + +On the models dropdown select `thinking-anthropic-claude-3-7-sonnet` + + + + + + diff --git a/docs/my-website/docusaurus.config.js b/docs/my-website/docusaurus.config.js index cf20dfcd70..8d480131ff 100644 --- a/docs/my-website/docusaurus.config.js +++ b/docs/my-website/docusaurus.config.js @@ -44,7 +44,7 @@ const config = { path: './release_notes', routeBasePath: 'release_notes', blogTitle: 'Release Notes', - blogSidebarTitle: 'All Releases', + blogSidebarTitle: 'Releases', blogSidebarCount: 'ALL', postsPerPage: 'ALL', showReadingTime: false, diff --git a/docs/my-website/img/basic_litellm.gif b/docs/my-website/img/basic_litellm.gif new file mode 100644 index 0000000000..d4cf9fd52a Binary files /dev/null and b/docs/my-website/img/basic_litellm.gif differ diff --git a/docs/my-website/img/create_key_in_team_oweb.gif b/docs/my-website/img/create_key_in_team_oweb.gif new file mode 100644 index 0000000000..d24849b259 Binary files /dev/null and b/docs/my-website/img/create_key_in_team_oweb.gif differ diff --git a/docs/my-website/img/litellm_create_team.gif b/docs/my-website/img/litellm_create_team.gif new file mode 100644 index 0000000000..e2f12613ec Binary files /dev/null and b/docs/my-website/img/litellm_create_team.gif differ diff --git a/docs/my-website/img/litellm_setup_openweb.gif b/docs/my-website/img/litellm_setup_openweb.gif new file mode 100644 index 0000000000..5618660d6c Binary files /dev/null and b/docs/my-website/img/litellm_setup_openweb.gif differ diff --git a/docs/my-website/img/litellm_thinking_openweb.gif b/docs/my-website/img/litellm_thinking_openweb.gif new file mode 100644 index 0000000000..385db583a4 Binary files /dev/null and b/docs/my-website/img/litellm_thinking_openweb.gif differ diff --git a/docs/my-website/img/release_notes/anthropic_thinking.jpg b/docs/my-website/img/release_notes/anthropic_thinking.jpg new file mode 100644 index 0000000000..f10de06dec Binary files /dev/null and b/docs/my-website/img/release_notes/anthropic_thinking.jpg differ diff --git a/docs/my-website/img/release_notes/error_logs.jpg b/docs/my-website/img/release_notes/error_logs.jpg new file mode 100644 index 0000000000..6f2767e1fb Binary files /dev/null and b/docs/my-website/img/release_notes/error_logs.jpg differ diff --git a/docs/my-website/img/release_notes/v1632_release.jpg b/docs/my-website/img/release_notes/v1632_release.jpg new file mode 100644 index 0000000000..1770460b2a Binary files /dev/null and b/docs/my-website/img/release_notes/v1632_release.jpg differ diff --git a/docs/my-website/release_notes/v1.57.8-stable/index.md b/docs/my-website/release_notes/v1.57.8-stable/index.md index 9787444fde..d37a7b9ff8 100644 --- a/docs/my-website/release_notes/v1.57.8-stable/index.md +++ b/docs/my-website/release_notes/v1.57.8-stable/index.md @@ -18,13 +18,6 @@ hide_table_of_contents: false `alerting`, `prometheus`, `secret management`, `management endpoints`, `ui`, `prompt management`, `finetuning`, `batch` -:::note - -v1.57.8-stable, is currently being tested. It will be released on 2025-01-12. - -::: - - ## New / Updated Models 1. Mistral large pricing - https://github.com/BerriAI/litellm/pull/7452 diff --git a/docs/my-website/release_notes/v1.61.20-stable/index.md b/docs/my-website/release_notes/v1.61.20-stable/index.md new file mode 100644 index 0000000000..132c1aa318 --- /dev/null +++ b/docs/my-website/release_notes/v1.61.20-stable/index.md @@ -0,0 +1,103 @@ +--- +title: v1.61.20-stable +slug: v1.61.20-stable +date: 2025-03-01T10:00:00 +authors: + - name: Krrish Dholakia + title: CEO, LiteLLM + url: https://www.linkedin.com/in/krish-d/ + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + - name: Ishaan Jaffer + title: CTO, LiteLLM + url: https://www.linkedin.com/in/reffajnaahsi/ + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGiM7ZrUwqu_Q/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1675971026692?e=1741824000&v=beta&t=eQnRdXPJo4eiINWTZARoYTfqh064pgZ-E21pQTSy8jc +tags: [llm translation, rerank, ui, thinking, reasoning_content, claude-3-7-sonnet] +hide_table_of_contents: false +--- + +import Image from '@theme/IdealImage'; + +# v1.61.20-stable + + +These are the changes since `v1.61.13-stable`. + +This release is primarily focused on: +- LLM Translation improvements (claude-3-7-sonnet + 'thinking'/'reasoning_content' support) +- UI improvements (add model flow, user management, etc) + +## Demo Instance + +Here's a Demo Instance to test changes: +- Instance: https://demo.litellm.ai/ +- Login Credentials: + - Username: admin + - Password: sk-1234 + +## New Models / Updated Models + +1. Anthropic 3-7 sonnet support + cost tracking (Anthropic API + Bedrock + Vertex AI + OpenRouter) + 1. Anthropic API [Start here](https://docs.litellm.ai/docs/providers/anthropic#usage---thinking--reasoning_content) + 2. Bedrock API [Start here](https://docs.litellm.ai/docs/providers/bedrock#usage---thinking--reasoning-content) + 3. Vertex AI API [See here](../../docs/providers/vertex#usage---thinking--reasoning_content) + 4. OpenRouter [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L5626) +2. Gpt-4.5-preview support + cost tracking [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L79) +3. Azure AI - Phi-4 cost tracking [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L1773) +4. Claude-3.5-sonnet - vision support updated on Anthropic API [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L2888) +5. Bedrock llama vision support [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L7714) +6. Cerebras llama3.3-70b pricing [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L2697) + +## LLM Translation + +1. Infinity Rerank - support returning documents when return_documents=True [Start here](../../docs/providers/infinity#usage---returning-documents) +2. Amazon Deepseek - `` param extraction into ‘reasoning_content’ [Start here](https://docs.litellm.ai/docs/providers/bedrock#bedrock-imported-models-deepseek-deepseek-r1) +3. Amazon Titan Embeddings - filter out ‘aws_’ params from request body [Start here](https://docs.litellm.ai/docs/providers/bedrock#bedrock-embedding) +4. Anthropic ‘thinking’ + ‘reasoning_content’ translation support (Anthropic API, Bedrock, Vertex AI) [Start here](https://docs.litellm.ai/docs/reasoning_content) +5. VLLM - support ‘video_url’ [Start here](../../docs/providers/vllm#send-video-url-to-vllm) +6. Call proxy via litellm SDK: Support `litellm_proxy/` for embedding, image_generation, transcription, speech, rerank [Start here](https://docs.litellm.ai/docs/providers/litellm_proxy) +7. OpenAI Pass-through - allow using Assistants GET, DELETE on /openai pass through routes [Start here](https://docs.litellm.ai/docs/pass_through/openai_passthrough) +8. Message Translation - fix openai message for assistant msg if role is missing - openai allows this +9. O1/O3 - support ‘drop_params’ for o3-mini and o1 parallel_tool_calls param (not supported currently) [See here](https://docs.litellm.ai/docs/completion/drop_params) + +## Spend Tracking Improvements + +1. Cost tracking for rerank via Bedrock [See PR](https://github.com/BerriAI/litellm/commit/b682dc4ec8fd07acf2f4c981d2721e36ae2a49c5) +2. Anthropic pass-through - fix race condition causing cost to not be tracked [See PR](https://github.com/BerriAI/litellm/pull/8874) +3. Anthropic pass-through: Ensure accurate token counting [See PR](https://github.com/BerriAI/litellm/pull/8880) + +## Management Endpoints / UI + +1. Models Page - Allow sorting models by ‘created at’ +2. Models Page - Edit Model Flow Improvements +3. Models Page - Fix Adding Azure, Azure AI Studio models on UI +4. Internal Users Page - Allow Bulk Adding Internal Users on UI +5. Internal Users Page - Allow sorting users by ‘created at’ +6. Virtual Keys Page - Allow searching for UserIDs on the dropdown when assigning a user to a team [See PR](https://github.com/BerriAI/litellm/pull/8844) +7. Virtual Keys Page - allow creating a user when assigning keys to users [See PR](https://github.com/BerriAI/litellm/pull/8844) +8. Model Hub Page - fix text overflow issue [See PR](https://github.com/BerriAI/litellm/pull/8749) +9. Admin Settings Page - Allow adding MSFT SSO on UI +10. Backend - don't allow creating duplicate internal users in DB + +## Helm + +1. support ttlSecondsAfterFinished on the migration job - [See PR](https://github.com/BerriAI/litellm/pull/8593) +2. enhance migrations job with additional configurable properties - [See PR](https://github.com/BerriAI/litellm/pull/8636) + +## Logging / Guardrail Integrations + +1. Arize Phoenix support +2. ‘No-log’ - fix ‘no-log’ param support on embedding calls + +## Performance / Loadbalancing / Reliability improvements + +1. Single Deployment Cooldown logic - Use allowed_fails or allowed_fail_policy if set [Start here](https://docs.litellm.ai/docs/routing#advanced-custom-retries-cooldowns-based-on-error-type) + +## General Proxy Improvements + +1. Hypercorn - fix reading / parsing request body +2. Windows - fix running proxy in windows +3. DD-Trace - fix dd-trace enablement on proxy + +## Complete Git Diff + +View the complete git diff [here](https://github.com/BerriAI/litellm/compare/v1.61.13-stable...v1.61.20-stable). \ No newline at end of file diff --git a/docs/my-website/release_notes/v1.63.0/index.md b/docs/my-website/release_notes/v1.63.0/index.md new file mode 100644 index 0000000000..e74a2f9b86 --- /dev/null +++ b/docs/my-website/release_notes/v1.63.0/index.md @@ -0,0 +1,40 @@ +--- +title: v1.63.0 - Anthropic 'thinking' response update +slug: v1.63.0 +date: 2025-03-05T10:00:00 +authors: + - name: Krrish Dholakia + title: CEO, LiteLLM + url: https://www.linkedin.com/in/krish-d/ + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + - name: Ishaan Jaffer + title: CTO, LiteLLM + url: https://www.linkedin.com/in/reffajnaahsi/ + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGiM7ZrUwqu_Q/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1675971026692?e=1741824000&v=beta&t=eQnRdXPJo4eiINWTZARoYTfqh064pgZ-E21pQTSy8jc +tags: [llm translation, thinking, reasoning_content, claude-3-7-sonnet] +hide_table_of_contents: false +--- + +v1.63.0 fixes Anthropic 'thinking' response on streaming to return the `signature` block. [Github Issue](https://github.com/BerriAI/litellm/issues/8964) + + + +It also moves the response structure from `signature_delta` to `signature` to be the same as Anthropic. [Anthropic Docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#implementing-extended-thinking) + + +## Diff + +```bash +"message": { + ... + "reasoning_content": "The capital of France is Paris.", + "thinking_blocks": [ + { + "type": "thinking", + "thinking": "The capital of France is Paris.", +- "signature_delta": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+..." # 👈 OLD FORMAT ++ "signature": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+..." # 👈 KEY CHANGE + } + ] +} +``` diff --git a/docs/my-website/release_notes/v1.63.2-stable/index.md b/docs/my-website/release_notes/v1.63.2-stable/index.md new file mode 100644 index 0000000000..0c359452dc --- /dev/null +++ b/docs/my-website/release_notes/v1.63.2-stable/index.md @@ -0,0 +1,112 @@ +--- +title: v1.63.2-stable +slug: v1.63.2-stable +date: 2025-03-08T10:00:00 +authors: + - name: Krrish Dholakia + title: CEO, LiteLLM + url: https://www.linkedin.com/in/krish-d/ + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + - name: Ishaan Jaffer + title: CTO, LiteLLM + url: https://www.linkedin.com/in/reffajnaahsi/ + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGiM7ZrUwqu_Q/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1675971026692?e=1741824000&v=beta&t=eQnRdXPJo4eiINWTZARoYTfqh064pgZ-E21pQTSy8jc +tags: [llm translation, thinking, reasoning_content, claude-3-7-sonnet] +hide_table_of_contents: false +--- + +import Image from '@theme/IdealImage'; + + +These are the changes since `v1.61.20-stable`. + +This release is primarily focused on: +- LLM Translation improvements (more `thinking` content improvements) +- UI improvements (Error logs now shown on UI) + + +:::info + +This release will be live on 03/09/2025 + +::: + + + + +## Demo Instance + +Here's a Demo Instance to test changes: +- Instance: https://demo.litellm.ai/ +- Login Credentials: + - Username: admin + - Password: sk-1234 + + +## New Models / Updated Models + +1. Add `supports_pdf_input` for specific Bedrock Claude models [PR](https://github.com/BerriAI/litellm/commit/f63cf0030679fe1a43d03fb196e815a0f28dae92) +2. Add pricing for amazon `eu` models [PR](https://github.com/BerriAI/litellm/commits/main/model_prices_and_context_window.json) +3. Fix Azure O1 mini pricing [PR](https://github.com/BerriAI/litellm/commit/52de1949ef2f76b8572df751f9c868a016d4832c) + +## LLM Translation + + + +1. Support `/openai/` passthrough for Assistant endpoints. [Get Started](https://docs.litellm.ai/docs/pass_through/openai_passthrough) +2. Bedrock Claude - fix tool calling transformation on invoke route. [Get Started](../../docs/providers/bedrock#usage---function-calling--tool-calling) +3. Bedrock Claude - response_format support for claude on invoke route. [Get Started](../../docs/providers/bedrock#usage---structured-output--json-mode) +4. Bedrock - pass `description` if set in response_format. [Get Started](../../docs/providers/bedrock#usage---structured-output--json-mode) +5. Bedrock - Fix passing response_format: {"type": "text"}. [PR](https://github.com/BerriAI/litellm/commit/c84b489d5897755139aa7d4e9e54727ebe0fa540) +6. OpenAI - Handle sending image_url as str to openai. [Get Started](https://docs.litellm.ai/docs/completion/vision) +7. Deepseek - return 'reasoning_content' missing on streaming. [Get Started](https://docs.litellm.ai/docs/reasoning_content) +8. Caching - Support caching on reasoning content. [Get Started](https://docs.litellm.ai/docs/proxy/caching) +9. Bedrock - handle thinking blocks in assistant message. [Get Started](https://docs.litellm.ai/docs/providers/bedrock#usage---thinking--reasoning-content) +10. Anthropic - Return `signature` on streaming. [Get Started](https://docs.litellm.ai/docs/providers/bedrock#usage---thinking--reasoning-content) +- Note: We've also migrated from `signature_delta` to `signature`. [Read more](https://docs.litellm.ai/release_notes/v1.63.0) +11. Support format param for specifying image type. [Get Started](../../docs/completion/vision.md#explicitly-specify-image-type) +12. Anthropic - `/v1/messages` endpoint - `thinking` param support. [Get Started](../../docs/anthropic_unified.md) +- Note: this refactors the [BETA] unified `/v1/messages` endpoint, to just work for the Anthropic API. +13. Vertex AI - handle $id in response schema when calling vertex ai. [Get Started](https://docs.litellm.ai/docs/providers/vertex#json-schema) + +## Spend Tracking Improvements + +1. Batches API - Fix cost calculation to run on retrieve_batch. [Get Started](https://docs.litellm.ai/docs/batches) +2. Batches API - Log batch models in spend logs / standard logging payload. [Get Started](../../docs/proxy/logging_spec.md#standardlogginghiddenparams) + +## Management Endpoints / UI + + + +1. Virtual Keys Page + - Allow team/org filters to be searchable on the Create Key Page + - Add created_by and updated_by fields to Keys table + - Show 'user_email' on key table + - Show 100 Keys Per Page, Use full height, increase width of key alias +2. Logs Page + - Show Error Logs on LiteLLM UI + - Allow Internal Users to View their own logs +3. Internal Users Page + - Allow admin to control default model access for internal users +7. Fix session handling with cookies + +## Logging / Guardrail Integrations + +1. Fix prometheus metrics w/ custom metrics, when keys containing team_id make requests. [PR](https://github.com/BerriAI/litellm/pull/8935) + +## Performance / Loadbalancing / Reliability improvements + +1. Cooldowns - Support cooldowns on models called with client side credentials. [Get Started](https://docs.litellm.ai/docs/proxy/clientside_auth#pass-user-llm-api-keys--api-base) +2. Tag-based Routing - ensures tag-based routing across all endpoints (`/embeddings`, `/image_generation`, etc.). [Get Started](https://docs.litellm.ai/docs/proxy/tag_routing) + +## General Proxy Improvements + +1. Raise BadRequestError when unknown model passed in request +2. Enforce model access restrictions on Azure OpenAI proxy route +3. Reliability fix - Handle emoji’s in text - fix orjson error +4. Model Access Patch - don't overwrite litellm.anthropic_models when running auth checks +5. Enable setting timezone information in docker image + +## Complete Git Diff + +[Here's the complete git diff](https://github.com/BerriAI/litellm/compare/v1.61.20-stable...v1.63.2-stable) \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 7eaf31d2b9..51e0aa774b 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -41,10 +41,12 @@ const sidebars = { "proxy/deploy", "proxy/prod", "proxy/cli", + "proxy/release_cycle", "proxy/model_management", "proxy/health", "proxy/debugging", "proxy/spending_monitoring", + "proxy/master_key_rotations", ], }, "proxy/demo", @@ -242,6 +244,7 @@ const sidebars = { "completion/document_understanding", "completion/vision", "completion/json_mode", + "reasoning_content", "completion/prompt_caching", "completion/predict_outputs", "completion/prefix", @@ -254,13 +257,19 @@ const sidebars = { "completion/batching", "completion/mock_requests", "completion/reliable_completions", - 'tutorials/litellm_proxy_aporia', ] }, { type: "category", label: "Supported Endpoints", + link: { + type: "generated-index", + title: "Supported Endpoints", + description: + "Learn how to deploy + call models from different providers on LiteLLM", + slug: "/supported_endpoints", + }, items: [ { type: "category", @@ -279,6 +288,7 @@ const sidebars = { }, "text_completion", "embedding/supported_embedding", + "anthropic_unified", { type: "category", label: "Image", @@ -348,23 +358,6 @@ const sidebars = { label: "LangChain, LlamaIndex, Instructor Integration", items: ["langchain/langchain", "tutorials/instructor"], }, - { - type: "category", - label: "Tutorials", - items: [ - - 'tutorials/azure_openai', - 'tutorials/instructor', - "tutorials/gradio_integration", - "tutorials/huggingface_codellama", - "tutorials/huggingface_tutorial", - "tutorials/TogetherAI_liteLLM", - "tutorials/finetuned_chat_gpt", - "tutorials/text_completion", - "tutorials/first_playground", - "tutorials/model_fallbacks", - ], - }, ], }, { @@ -422,6 +415,31 @@ const sidebars = { "observability/opik_integration", ], }, + { + type: "category", + label: "Tutorials", + items: [ + "tutorials/openweb_ui", + 'tutorials/litellm_proxy_aporia', + { + type: "category", + label: "LiteLLM Python SDK Tutorials", + items: [ + + 'tutorials/azure_openai', + 'tutorials/instructor', + "tutorials/gradio_integration", + "tutorials/huggingface_codellama", + "tutorials/huggingface_tutorial", + "tutorials/TogetherAI_liteLLM", + "tutorials/finetuned_chat_gpt", + "tutorials/text_completion", + "tutorials/first_playground", + "tutorials/model_fallbacks", + ], + }, + ] + }, { type: "category", @@ -444,6 +462,7 @@ const sidebars = { items: [ "projects/smolagents", "projects/Docq.AI", + "projects/PDL", "projects/OpenInterpreter", "projects/Elroy", "projects/dbally", @@ -459,6 +478,7 @@ const sidebars = { "projects/YiVal", "projects/LiteLLM Proxy", "projects/llm_cord", + "projects/pgai", ], }, "contributing", diff --git a/litellm/__init__.py b/litellm/__init__.py index d3d3dd0d4b..d66707f8b3 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -53,6 +53,7 @@ from litellm.constants import ( cohere_embedding_models, bedrock_embedding_models, known_tokenizer_config, + BEDROCK_INVOKE_PROVIDERS_LITERAL, ) from litellm.types.guardrails import GuardrailItem from litellm.proxy._types import ( @@ -276,8 +277,6 @@ disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None custom_prometheus_metadata_labels: List[str] = [] #### REQUEST PRIORITIZATION #### priority_reservation: Optional[Dict[str, float]] = None - - force_ipv4: bool = ( False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6. ) @@ -361,17 +360,7 @@ BEDROCK_CONVERSE_MODELS = [ "meta.llama3-2-11b-instruct-v1:0", "meta.llama3-2-90b-instruct-v1:0", ] -BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[ - "cohere", - "anthropic", - "mistral", - "amazon", - "meta", - "llama", - "ai21", - "nova", - "deepseek_r1", -] + ####### COMPLETION MODELS ################### open_ai_chat_completion_models: List = [] open_ai_text_completion_models: List = [] @@ -809,9 +798,6 @@ from .llms.oobabooga.chat.transformation import OobaboogaConfig from .llms.maritalk import MaritalkConfig from .llms.openrouter.chat.transformation import OpenrouterConfig from .llms.anthropic.chat.transformation import AnthropicConfig -from .llms.anthropic.experimental_pass_through.transformation import ( - AnthropicExperimentalPassThroughConfig, -) from .llms.groq.stt.transformation import GroqSTTConfig from .llms.anthropic.completion.transformation import AnthropicTextConfig from .llms.triton.completion.transformation import TritonConfig @@ -830,6 +816,9 @@ from .llms.infinity.rerank.transformation import InfinityRerankConfig from .llms.jina_ai.rerank.transformation import JinaAIRerankConfig from .llms.clarifai.chat.transformation import ClarifaiConfig from .llms.ai21.chat.transformation import AI21ChatConfig, AI21ChatConfig as AI21Config +from .llms.anthropic.experimental_pass_through.messages.transformation import ( + AnthropicMessagesConfig, +) from .llms.together_ai.chat import TogetherAIConfig from .llms.together_ai.completion.transformation import TogetherAITextCompletionConfig from .llms.cloudflare.chat.transformation import CloudflareChatConfig @@ -1020,6 +1009,7 @@ from .assistants.main import * from .batches.main import * from .batch_completion.main import * # type: ignore from .rerank_api.main import * +from .llms.anthropic.experimental_pass_through.messages.handler import * from .realtime_api.main import _arealtime from .fine_tuning.main import * from .files.main import * diff --git a/litellm/adapters/anthropic_adapter.py b/litellm/adapters/anthropic_adapter.py deleted file mode 100644 index 961bc77527..0000000000 --- a/litellm/adapters/anthropic_adapter.py +++ /dev/null @@ -1,186 +0,0 @@ -# What is this? -## Translates OpenAI call to Anthropic `/v1/messages` format -import traceback -from typing import Any, Optional - -import litellm -from litellm import ChatCompletionRequest, verbose_logger -from litellm.integrations.custom_logger import CustomLogger -from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse -from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse - - -class AnthropicAdapter(CustomLogger): - def __init__(self) -> None: - super().__init__() - - def translate_completion_input_params( - self, kwargs - ) -> Optional[ChatCompletionRequest]: - """ - - translate params, where needed - - pass rest, as is - """ - request_body = AnthropicMessagesRequest(**kwargs) # type: ignore - - translated_body = litellm.AnthropicExperimentalPassThroughConfig().translate_anthropic_to_openai( - anthropic_message_request=request_body - ) - - return translated_body - - def translate_completion_output_params( - self, response: ModelResponse - ) -> Optional[AnthropicResponse]: - - return litellm.AnthropicExperimentalPassThroughConfig().translate_openai_response_to_anthropic( - response=response - ) - - def translate_completion_output_params_streaming( - self, completion_stream: Any - ) -> AdapterCompletionStreamWrapper | None: - return AnthropicStreamWrapper(completion_stream=completion_stream) - - -anthropic_adapter = AnthropicAdapter() - - -class AnthropicStreamWrapper(AdapterCompletionStreamWrapper): - """ - - first chunk return 'message_start' - - content block must be started and stopped - - finish_reason must map exactly to anthropic reason, else anthropic client won't be able to parse it. - """ - - sent_first_chunk: bool = False - sent_content_block_start: bool = False - sent_content_block_finish: bool = False - sent_last_message: bool = False - holding_chunk: Optional[Any] = None - - def __next__(self): - try: - if self.sent_first_chunk is False: - self.sent_first_chunk = True - return { - "type": "message_start", - "message": { - "id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", - "type": "message", - "role": "assistant", - "content": [], - "model": "claude-3-5-sonnet-20240620", - "stop_reason": None, - "stop_sequence": None, - "usage": {"input_tokens": 25, "output_tokens": 1}, - }, - } - if self.sent_content_block_start is False: - self.sent_content_block_start = True - return { - "type": "content_block_start", - "index": 0, - "content_block": {"type": "text", "text": ""}, - } - - for chunk in self.completion_stream: - if chunk == "None" or chunk is None: - raise Exception - - processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic( - response=chunk - ) - if ( - processed_chunk["type"] == "message_delta" - and self.sent_content_block_finish is False - ): - self.holding_chunk = processed_chunk - self.sent_content_block_finish = True - return { - "type": "content_block_stop", - "index": 0, - } - elif self.holding_chunk is not None: - return_chunk = self.holding_chunk - self.holding_chunk = processed_chunk - return return_chunk - else: - return processed_chunk - if self.holding_chunk is not None: - return_chunk = self.holding_chunk - self.holding_chunk = None - return return_chunk - if self.sent_last_message is False: - self.sent_last_message = True - return {"type": "message_stop"} - raise StopIteration - except StopIteration: - if self.sent_last_message is False: - self.sent_last_message = True - return {"type": "message_stop"} - raise StopIteration - except Exception as e: - verbose_logger.error( - "Anthropic Adapter - {}\n{}".format(e, traceback.format_exc()) - ) - - async def __anext__(self): - try: - if self.sent_first_chunk is False: - self.sent_first_chunk = True - return { - "type": "message_start", - "message": { - "id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", - "type": "message", - "role": "assistant", - "content": [], - "model": "claude-3-5-sonnet-20240620", - "stop_reason": None, - "stop_sequence": None, - "usage": {"input_tokens": 25, "output_tokens": 1}, - }, - } - if self.sent_content_block_start is False: - self.sent_content_block_start = True - return { - "type": "content_block_start", - "index": 0, - "content_block": {"type": "text", "text": ""}, - } - async for chunk in self.completion_stream: - if chunk == "None" or chunk is None: - raise Exception - processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic( - response=chunk - ) - if ( - processed_chunk["type"] == "message_delta" - and self.sent_content_block_finish is False - ): - self.holding_chunk = processed_chunk - self.sent_content_block_finish = True - return { - "type": "content_block_stop", - "index": 0, - } - elif self.holding_chunk is not None: - return_chunk = self.holding_chunk - self.holding_chunk = processed_chunk - return return_chunk - else: - return processed_chunk - if self.holding_chunk is not None: - return_chunk = self.holding_chunk - self.holding_chunk = None - return return_chunk - if self.sent_last_message is False: - self.sent_last_message = True - return {"type": "message_stop"} - raise StopIteration - except StopIteration: - if self.sent_last_message is False: - self.sent_last_message = True - return {"type": "message_stop"} - raise StopAsyncIteration diff --git a/litellm/batches/batch_utils.py b/litellm/batches/batch_utils.py index f24eda0432..af53304e5a 100644 --- a/litellm/batches/batch_utils.py +++ b/litellm/batches/batch_utils.py @@ -1,76 +1,16 @@ -import asyncio -import datetime import json -import threading -from typing import Any, List, Literal, Optional +from typing import Any, List, Literal, Tuple import litellm from litellm._logging import verbose_logger -from litellm.constants import ( - BATCH_STATUS_POLL_INTERVAL_SECONDS, - BATCH_STATUS_POLL_MAX_ATTEMPTS, -) -from litellm.files.main import afile_content -from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.types.llms.openai import Batch -from litellm.types.utils import StandardLoggingPayload, Usage - - -async def batches_async_logging( - batch_id: str, - custom_llm_provider: Literal["openai", "azure", "vertex_ai"] = "openai", - logging_obj: Optional[LiteLLMLoggingObj] = None, - **kwargs, -): - """ - Async Job waits for the batch to complete and then logs the completed batch usage - cost, total tokens, prompt tokens, completion tokens - - - Polls retrieve_batch until it returns a batch with status "completed" or "failed" - """ - from .main import aretrieve_batch - - verbose_logger.debug( - ".....in _batches_async_logging... polling retrieve to get batch status" - ) - if logging_obj is None: - raise ValueError( - "logging_obj is None cannot calculate cost / log batch creation event" - ) - for _ in range(BATCH_STATUS_POLL_MAX_ATTEMPTS): - try: - start_time = datetime.datetime.now() - batch: Batch = await aretrieve_batch(batch_id, custom_llm_provider) - verbose_logger.debug( - "in _batches_async_logging... batch status= %s", batch.status - ) - - if batch.status == "completed": - end_time = datetime.datetime.now() - await _handle_completed_batch( - batch=batch, - custom_llm_provider=custom_llm_provider, - logging_obj=logging_obj, - start_time=start_time, - end_time=end_time, - **kwargs, - ) - break - elif batch.status == "failed": - pass - except Exception as e: - verbose_logger.error("error in batches_async_logging", e) - await asyncio.sleep(BATCH_STATUS_POLL_INTERVAL_SECONDS) +from litellm.types.utils import CallTypes, Usage async def _handle_completed_batch( batch: Batch, custom_llm_provider: Literal["openai", "azure", "vertex_ai"], - logging_obj: LiteLLMLoggingObj, - start_time: datetime.datetime, - end_time: datetime.datetime, - **kwargs, -) -> None: +) -> Tuple[float, Usage, List[str]]: """Helper function to process a completed batch and handle logging""" # Get batch results file_content_dictionary = await _get_batch_output_file_content_as_dictionary( @@ -87,52 +27,25 @@ async def _handle_completed_batch( custom_llm_provider=custom_llm_provider, ) - # Handle logging - await _log_completed_batch( - logging_obj=logging_obj, - batch_usage=batch_usage, - batch_cost=batch_cost, - start_time=start_time, - end_time=end_time, - **kwargs, - ) + batch_models = _get_batch_models_from_file_content(file_content_dictionary) + + return batch_cost, batch_usage, batch_models -async def _log_completed_batch( - logging_obj: LiteLLMLoggingObj, - batch_usage: Usage, - batch_cost: float, - start_time: datetime.datetime, - end_time: datetime.datetime, - **kwargs, -) -> None: - """Helper function to handle all logging operations for a completed batch""" - logging_obj.call_type = "batch_success" - - standard_logging_object = _create_standard_logging_object_for_completed_batch( - kwargs=kwargs, - start_time=start_time, - end_time=end_time, - logging_obj=logging_obj, - batch_usage_object=batch_usage, - response_cost=batch_cost, - ) - - logging_obj.model_call_details["standard_logging_object"] = standard_logging_object - - # Launch async and sync logging handlers - asyncio.create_task( - logging_obj.async_success_handler( - result=None, - start_time=start_time, - end_time=end_time, - cache_hit=None, - ) - ) - threading.Thread( - target=logging_obj.success_handler, - args=(None, start_time, end_time), - ).start() +def _get_batch_models_from_file_content( + file_content_dictionary: List[dict], +) -> List[str]: + """ + Get the models from the file content + """ + batch_models = [] + for _item in file_content_dictionary: + if _batch_response_was_successful(_item): + _response_body = _get_response_from_batch_job_output_file(_item) + _model = _response_body.get("model") + if _model: + batch_models.append(_model) + return batch_models async def _batch_cost_calculator( @@ -159,6 +72,8 @@ async def _get_batch_output_file_content_as_dictionary( """ Get the batch output file content as a list of dictionaries """ + from litellm.files.main import afile_content + if custom_llm_provider == "vertex_ai": raise ValueError("Vertex AI does not support file content retrieval") @@ -208,6 +123,7 @@ def _get_batch_job_cost_from_file_content( total_cost += litellm.completion_cost( completion_response=_response_body, custom_llm_provider=custom_llm_provider, + call_type=CallTypes.aretrieve_batch.value, ) verbose_logger.debug("total_cost=%s", total_cost) return total_cost @@ -264,30 +180,3 @@ def _batch_response_was_successful(batch_job_output_file: dict) -> bool: """ _response: dict = batch_job_output_file.get("response", None) or {} return _response.get("status_code", None) == 200 - - -def _create_standard_logging_object_for_completed_batch( - kwargs: dict, - start_time: datetime.datetime, - end_time: datetime.datetime, - logging_obj: LiteLLMLoggingObj, - batch_usage_object: Usage, - response_cost: float, -) -> StandardLoggingPayload: - """ - Create a standard logging object for a completed batch - """ - standard_logging_object = logging_obj.model_call_details.get( - "standard_logging_object", None - ) - - if standard_logging_object is None: - raise ValueError("unable to create standard logging object for completed batch") - - # Add Completed Batch Job Usage and Response Cost - standard_logging_object["call_type"] = "batch_success" - standard_logging_object["response_cost"] = response_cost - standard_logging_object["total_tokens"] = batch_usage_object.total_tokens - standard_logging_object["prompt_tokens"] = batch_usage_object.prompt_tokens - standard_logging_object["completion_tokens"] = batch_usage_object.completion_tokens - return standard_logging_object diff --git a/litellm/batches/main.py b/litellm/batches/main.py index 32428c9c18..2f4800043c 100644 --- a/litellm/batches/main.py +++ b/litellm/batches/main.py @@ -31,10 +31,9 @@ from litellm.types.llms.openai import ( RetrieveBatchRequest, ) from litellm.types.router import GenericLiteLLMParams +from litellm.types.utils import LiteLLMBatch from litellm.utils import client, get_litellm_params, supports_httpx_timeout -from .batch_utils import batches_async_logging - ####### ENVIRONMENT VARIABLES ################### openai_batches_instance = OpenAIBatchesAPI() azure_batches_instance = AzureBatchesAPI() @@ -85,17 +84,6 @@ async def acreate_batch( else: response = init_response - # Start async logging job - if response is not None: - asyncio.create_task( - batches_async_logging( - logging_obj=kwargs.get("litellm_logging_obj", None), - batch_id=response.id, - custom_llm_provider=custom_llm_provider, - **kwargs, - ) - ) - return response except Exception as e: raise e @@ -111,7 +99,7 @@ def create_batch( extra_headers: Optional[Dict[str, str]] = None, extra_body: Optional[Dict[str, str]] = None, **kwargs, -) -> Union[Batch, Coroutine[Any, Any, Batch]]: +) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]: """ Creates and executes a batch from an uploaded file of request @@ -119,21 +107,26 @@ def create_batch( """ try: optional_params = GenericLiteLLMParams(**kwargs) + litellm_call_id = kwargs.get("litellm_call_id", None) + proxy_server_request = kwargs.get("proxy_server_request", None) + model_info = kwargs.get("model_info", None) _is_async = kwargs.pop("acreate_batch", False) is True litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj", None) ### TIMEOUT LOGIC ### timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600 - litellm_params = get_litellm_params( - custom_llm_provider=custom_llm_provider, - litellm_call_id=kwargs.get("litellm_call_id", None), - litellm_trace_id=kwargs.get("litellm_trace_id"), - litellm_metadata=kwargs.get("litellm_metadata"), - ) litellm_logging_obj.update_environment_variables( model=None, user=None, optional_params=optional_params.model_dump(), - litellm_params=litellm_params, + litellm_params={ + "litellm_call_id": litellm_call_id, + "proxy_server_request": proxy_server_request, + "model_info": model_info, + "metadata": metadata, + "preset_cache_key": None, + "stream_response": {}, + **optional_params.model_dump(exclude_unset=True), + }, custom_llm_provider=custom_llm_provider, ) @@ -261,7 +254,7 @@ def create_batch( response=httpx.Response( status_code=400, content="Unsupported provider", - request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore + request=httpx.Request(method="create_batch", url="https://github.com/BerriAI/litellm"), # type: ignore ), ) return response @@ -269,6 +262,7 @@ def create_batch( raise e +@client async def aretrieve_batch( batch_id: str, custom_llm_provider: Literal["openai", "azure", "vertex_ai"] = "openai", @@ -276,7 +270,7 @@ async def aretrieve_batch( extra_headers: Optional[Dict[str, str]] = None, extra_body: Optional[Dict[str, str]] = None, **kwargs, -) -> Batch: +) -> LiteLLMBatch: """ Async: Retrieves a batch. @@ -310,6 +304,7 @@ async def aretrieve_batch( raise e +@client def retrieve_batch( batch_id: str, custom_llm_provider: Literal["openai", "azure", "vertex_ai"] = "openai", @@ -317,7 +312,7 @@ def retrieve_batch( extra_headers: Optional[Dict[str, str]] = None, extra_body: Optional[Dict[str, str]] = None, **kwargs, -) -> Union[Batch, Coroutine[Any, Any, Batch]]: +) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]: """ Retrieves a batch. @@ -325,9 +320,23 @@ def retrieve_batch( """ try: optional_params = GenericLiteLLMParams(**kwargs) + + litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj", None) ### TIMEOUT LOGIC ### timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600 - # set timeout for 10 minutes by default + litellm_params = get_litellm_params( + custom_llm_provider=custom_llm_provider, + litellm_call_id=kwargs.get("litellm_call_id", None), + litellm_trace_id=kwargs.get("litellm_trace_id"), + litellm_metadata=kwargs.get("litellm_metadata"), + ) + litellm_logging_obj.update_environment_variables( + model=None, + user=None, + optional_params=optional_params.model_dump(), + litellm_params=litellm_params, + custom_llm_provider=custom_llm_provider, + ) if ( timeout is not None diff --git a/litellm/caching/caching.py b/litellm/caching/caching.py index 26f94a94c2..415c49edff 100644 --- a/litellm/caching/caching.py +++ b/litellm/caching/caching.py @@ -13,26 +13,14 @@ import json import time import traceback from enum import Enum -from typing import Any, Dict, List, Optional, Set, Union +from typing import Any, Dict, List, Optional, Union -from openai.types.audio.transcription_create_params import TranscriptionCreateParams -from openai.types.chat.completion_create_params import ( - CompletionCreateParamsNonStreaming, - CompletionCreateParamsStreaming, -) -from openai.types.completion_create_params import ( - CompletionCreateParamsNonStreaming as TextCompletionCreateParamsNonStreaming, -) -from openai.types.completion_create_params import ( - CompletionCreateParamsStreaming as TextCompletionCreateParamsStreaming, -) -from openai.types.embedding_create_params import EmbeddingCreateParams from pydantic import BaseModel import litellm from litellm._logging import verbose_logger +from litellm.litellm_core_utils.model_param_helper import ModelParamHelper from litellm.types.caching import * -from litellm.types.rerank import RerankRequest from litellm.types.utils import all_litellm_params from .base_cache import BaseCache @@ -257,7 +245,7 @@ class Cache: verbose_logger.debug("\nReturning preset cache key: %s", preset_cache_key) return preset_cache_key - combined_kwargs = self._get_relevant_args_to_use_for_cache_key() + combined_kwargs = ModelParamHelper._get_all_llm_api_params() litellm_param_kwargs = all_litellm_params for param in kwargs: if param in combined_kwargs: @@ -364,76 +352,6 @@ class Cache: if "litellm_params" in kwargs: kwargs["litellm_params"]["preset_cache_key"] = preset_cache_key - def _get_relevant_args_to_use_for_cache_key(self) -> Set[str]: - """ - Gets the supported kwargs for each call type and combines them - """ - chat_completion_kwargs = self._get_litellm_supported_chat_completion_kwargs() - text_completion_kwargs = self._get_litellm_supported_text_completion_kwargs() - embedding_kwargs = self._get_litellm_supported_embedding_kwargs() - transcription_kwargs = self._get_litellm_supported_transcription_kwargs() - rerank_kwargs = self._get_litellm_supported_rerank_kwargs() - exclude_kwargs = self._get_kwargs_to_exclude_from_cache_key() - - combined_kwargs = chat_completion_kwargs.union( - text_completion_kwargs, - embedding_kwargs, - transcription_kwargs, - rerank_kwargs, - ) - combined_kwargs = combined_kwargs.difference(exclude_kwargs) - return combined_kwargs - - def _get_litellm_supported_chat_completion_kwargs(self) -> Set[str]: - """ - Get the litellm supported chat completion kwargs - - This follows the OpenAI API Spec - """ - all_chat_completion_kwargs = set( - CompletionCreateParamsNonStreaming.__annotations__.keys() - ).union(set(CompletionCreateParamsStreaming.__annotations__.keys())) - return all_chat_completion_kwargs - - def _get_litellm_supported_text_completion_kwargs(self) -> Set[str]: - """ - Get the litellm supported text completion kwargs - - This follows the OpenAI API Spec - """ - all_text_completion_kwargs = set( - TextCompletionCreateParamsNonStreaming.__annotations__.keys() - ).union(set(TextCompletionCreateParamsStreaming.__annotations__.keys())) - return all_text_completion_kwargs - - def _get_litellm_supported_rerank_kwargs(self) -> Set[str]: - """ - Get the litellm supported rerank kwargs - """ - return set(RerankRequest.model_fields.keys()) - - def _get_litellm_supported_embedding_kwargs(self) -> Set[str]: - """ - Get the litellm supported embedding kwargs - - This follows the OpenAI API Spec - """ - return set(EmbeddingCreateParams.__annotations__.keys()) - - def _get_litellm_supported_transcription_kwargs(self) -> Set[str]: - """ - Get the litellm supported transcription kwargs - - This follows the OpenAI API Spec - """ - return set(TranscriptionCreateParams.__annotations__.keys()) - - def _get_kwargs_to_exclude_from_cache_key(self) -> Set[str]: - """ - Get the kwargs to exclude from the cache key - """ - return set(["metadata"]) - @staticmethod def _get_hashed_cache_key(cache_key: str) -> str: """ diff --git a/litellm/caching/caching_handler.py b/litellm/caching/caching_handler.py index 40c1001732..2a958c9eee 100644 --- a/litellm/caching/caching_handler.py +++ b/litellm/caching/caching_handler.py @@ -247,7 +247,6 @@ class LLMCachingHandler: pass else: call_type = original_function.__name__ - cached_result = self._convert_cached_result_to_model_response( cached_result=cached_result, call_type=call_type, @@ -725,6 +724,7 @@ class LLMCachingHandler: """ Sync internal method to add the result to the cache """ + new_kwargs = kwargs.copy() new_kwargs.update( convert_args_to_kwargs( @@ -738,6 +738,7 @@ class LLMCachingHandler: if self._should_store_result_in_cache( original_function=self.original_function, kwargs=new_kwargs ): + litellm.cache.add_cache(result, **new_kwargs) return diff --git a/litellm/caching/redis_cache.py b/litellm/caching/redis_cache.py index 960d19c3f8..66245e7476 100644 --- a/litellm/caching/redis_cache.py +++ b/litellm/caching/redis_cache.py @@ -543,6 +543,7 @@ class RedisCache(BaseCache): _redis_client: Redis = self.init_async_client() # type: ignore start_time = time.time() _used_ttl = self.get_ttl(ttl=ttl) + key = self.check_and_fix_namespace(key=key) try: result = await _redis_client.incrbyfloat(name=key, amount=value) if _used_ttl is not None: diff --git a/litellm/constants.py b/litellm/constants.py index a1f7750d53..0288c45e40 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Literal ROUTER_MAX_FALLBACKS = 5 DEFAULT_BATCH_SIZE = 512 @@ -120,6 +120,7 @@ OPENAI_CHAT_COMPLETION_PARAMS = [ "top_logprobs", "reasoning_effort", "extra_headers", + "thinking", ] openai_compatible_endpoints: List = [ @@ -319,6 +320,17 @@ baseten_models: List = [ "31dxrj3", ] # FALCON 7B # WizardLM # Mosaic ML +BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[ + "cohere", + "anthropic", + "mistral", + "amazon", + "meta", + "llama", + "ai21", + "nova", + "deepseek_r1", +] open_ai_embedding_models: List = ["text-embedding-ada-002"] cohere_embedding_models: List = [ diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 07676d8a83..1d10fa1f9e 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -239,6 +239,15 @@ def cost_per_token( # noqa: PLR0915 custom_llm_provider=custom_llm_provider, billed_units=rerank_billed_units, ) + elif ( + call_type == "aretrieve_batch" + or call_type == "retrieve_batch" + or call_type == CallTypes.aretrieve_batch + or call_type == CallTypes.retrieve_batch + ): + return batch_cost_calculator( + usage=usage_block, model=model, custom_llm_provider=custom_llm_provider + ) elif call_type == "atranscription" or call_type == "transcription": return openai_cost_per_second( model=model, @@ -399,9 +408,12 @@ def _select_model_name_for_cost_calc( if base_model is not None: return_model = base_model - completion_response_model: Optional[str] = getattr( - completion_response, "model", None - ) + completion_response_model: Optional[str] = None + if completion_response is not None: + if isinstance(completion_response, BaseModel): + completion_response_model = getattr(completion_response, "model", None) + elif isinstance(completion_response, dict): + completion_response_model = completion_response.get("model", None) hidden_params: Optional[dict] = getattr(completion_response, "_hidden_params", None) if completion_response_model is None and hidden_params is not None: if ( @@ -957,3 +969,54 @@ def default_image_cost_calculator( ) return cost_info["input_cost_per_pixel"] * height * width * n + + +def batch_cost_calculator( + usage: Usage, + model: str, + custom_llm_provider: Optional[str] = None, +) -> Tuple[float, float]: + """ + Calculate the cost of a batch job + """ + + _, custom_llm_provider, _, _ = litellm.get_llm_provider( + model=model, custom_llm_provider=custom_llm_provider + ) + + verbose_logger.info( + "Calculating batch cost per token. model=%s, custom_llm_provider=%s", + model, + custom_llm_provider, + ) + + try: + model_info: Optional[ModelInfo] = litellm.get_model_info( + model=model, custom_llm_provider=custom_llm_provider + ) + except Exception: + model_info = None + + if not model_info: + return 0.0, 0.0 + + input_cost_per_token_batches = model_info.get("input_cost_per_token_batches") + input_cost_per_token = model_info.get("input_cost_per_token") + output_cost_per_token_batches = model_info.get("output_cost_per_token_batches") + output_cost_per_token = model_info.get("output_cost_per_token") + total_prompt_cost = 0.0 + total_completion_cost = 0.0 + if input_cost_per_token_batches: + total_prompt_cost = usage.prompt_tokens * input_cost_per_token_batches + elif input_cost_per_token: + total_prompt_cost = ( + usage.prompt_tokens * (input_cost_per_token) / 2 + ) # batch cost is usually half of the regular token cost + if output_cost_per_token_batches: + total_completion_cost = usage.completion_tokens * output_cost_per_token_batches + elif output_cost_per_token: + total_completion_cost = ( + usage.completion_tokens * (output_cost_per_token) / 2 + ) # batch cost is usually half of the regular token cost + + return total_prompt_cost, total_completion_cost diff --git a/litellm/files/main.py b/litellm/files/main.py index 9f81b2e385..e49066e84b 100644 --- a/litellm/files/main.py +++ b/litellm/files/main.py @@ -816,7 +816,7 @@ def file_content( ) else: raise litellm.exceptions.BadRequestError( - message="LiteLLM doesn't support {} for 'file_content'. Only 'openai' and 'azure' are supported.".format( + message="LiteLLM doesn't support {} for 'custom_llm_provider'. Supported providers are 'openai', 'azure', 'vertex_ai'.".format( custom_llm_provider ), model="n/a", diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 2558b0c2eb..4f4b05c84e 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -577,6 +577,4 @@ class DataDogLogger( start_time_utc: Optional[datetimeObj], end_time_utc: Optional[datetimeObj], ) -> Optional[dict]: - raise NotImplementedError( - "Datdog Integration for getting request/response payloads not implemented as yet" - ) + pass diff --git a/litellm/integrations/langfuse/langfuse_prompt_management.py b/litellm/integrations/langfuse/langfuse_prompt_management.py index cc2a6cf80d..1f4ca84db3 100644 --- a/litellm/integrations/langfuse/langfuse_prompt_management.py +++ b/litellm/integrations/langfuse/langfuse_prompt_management.py @@ -40,6 +40,7 @@ in_memory_dynamic_logger_cache = DynamicLoggingCache() def langfuse_client_init( langfuse_public_key=None, langfuse_secret=None, + langfuse_secret_key=None, langfuse_host=None, flush_interval=1, ) -> LangfuseClass: @@ -67,7 +68,10 @@ def langfuse_client_init( ) # Instance variables - secret_key = langfuse_secret or os.getenv("LANGFUSE_SECRET_KEY") + + secret_key = ( + langfuse_secret or langfuse_secret_key or os.getenv("LANGFUSE_SECRET_KEY") + ) public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY") langfuse_host = langfuse_host or os.getenv( "LANGFUSE_HOST", "https://cloud.langfuse.com" @@ -190,6 +194,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge langfuse_client = langfuse_client_init( langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"), langfuse_secret=dynamic_callback_params.get("langfuse_secret"), + langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"), langfuse_host=dynamic_callback_params.get("langfuse_host"), ) langfuse_prompt_client = self._get_prompt_from_id( @@ -206,6 +211,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge langfuse_client = langfuse_client_init( langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"), langfuse_secret=dynamic_callback_params.get("langfuse_secret"), + langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"), langfuse_host=dynamic_callback_params.get("langfuse_host"), ) langfuse_prompt_client = self._get_prompt_from_id( diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 04050abf7b..d6e47b87ce 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -1560,10 +1560,18 @@ class PrometheusLogger(CustomLogger): - Max Budget - Budget Reset At """ - self.litellm_remaining_team_budget_metric.labels( - team.team_id, - team.team_alias or "", - ).set( + enum_values = UserAPIKeyLabelValues( + team=team.team_id, + team_alias=team.team_alias or "", + ) + + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.get_labels( + label_name="litellm_remaining_team_budget_metric" + ), + enum_values=enum_values, + ) + self.litellm_remaining_team_budget_metric.labels(**_labels).set( self._safe_get_remaining_budget( max_budget=team.max_budget, spend=team.spend, @@ -1571,16 +1579,22 @@ class PrometheusLogger(CustomLogger): ) if team.max_budget is not None: - self.litellm_team_max_budget_metric.labels( - team.team_id, - team.team_alias or "", - ).set(team.max_budget) + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.get_labels( + label_name="litellm_team_max_budget_metric" + ), + enum_values=enum_values, + ) + self.litellm_team_max_budget_metric.labels(**_labels).set(team.max_budget) if team.budget_reset_at is not None: - self.litellm_team_budget_remaining_hours_metric.labels( - team.team_id, - team.team_alias or "", - ).set( + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.get_labels( + label_name="litellm_team_budget_remaining_hours_metric" + ), + enum_values=enum_values, + ) + self.litellm_team_budget_remaining_hours_metric.labels(**_labels).set( self._get_remaining_hours_for_budget_reset( budget_reset_at=team.budget_reset_at ) diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py index ceb150946c..2036b93692 100644 --- a/litellm/litellm_core_utils/core_helpers.py +++ b/litellm/litellm_core_utils/core_helpers.py @@ -73,8 +73,19 @@ def remove_index_from_tool_calls( def get_litellm_metadata_from_kwargs(kwargs: dict): """ Helper to get litellm metadata from all litellm request kwargs + + Return `litellm_metadata` if it exists, otherwise return `metadata` """ - return kwargs.get("litellm_params", {}).get("metadata", {}) + litellm_params = kwargs.get("litellm_params", {}) + if litellm_params: + metadata = litellm_params.get("metadata", {}) + litellm_metadata = litellm_params.get("litellm_metadata", {}) + if litellm_metadata: + return litellm_metadata + elif metadata: + return metadata + + return {} # Helper functions used for OTEL logging diff --git a/litellm/litellm_core_utils/dd_tracing.py b/litellm/litellm_core_utils/dd_tracing.py index 4b33b2c423..1f866a998a 100644 --- a/litellm/litellm_core_utils/dd_tracing.py +++ b/litellm/litellm_core_utils/dd_tracing.py @@ -5,61 +5,69 @@ If the ddtrace package is not installed, the tracer will be a no-op. """ from contextlib import contextmanager +from typing import TYPE_CHECKING, Any, Union from litellm.secret_managers.main import get_secret_bool +if TYPE_CHECKING: + from ddtrace.tracer import Tracer as DD_TRACER +else: + DD_TRACER = Any + + +class NullSpan: + """A no-op span implementation.""" + + def __enter__(self): + return self + + def __exit__(self, *args): + pass + + def finish(self): + pass + + +@contextmanager +def null_tracer(name, **kwargs): + """Context manager that yields a no-op span.""" + yield NullSpan() + + +class NullTracer: + """A no-op tracer implementation.""" + + def trace(self, name, **kwargs): + return NullSpan() + + def wrap(self, name=None, **kwargs): + # If called with no arguments (as @tracer.wrap()) + if callable(name): + return name + + # If called with arguments (as @tracer.wrap(name="something")) + def decorator(f): + return f + + return decorator + def _should_use_dd_tracer(): - """ - Returns True if `USE_DDTRACE` is set to True in .env - """ + """Returns True if `USE_DDTRACE` is set to True in .env""" return get_secret_bool("USE_DDTRACE", False) is True -has_ddtrace = False -try: - from ddtrace import tracer as dd_tracer +# Initialize tracer +should_use_dd_tracer = _should_use_dd_tracer() +tracer: Union[NullTracer, DD_TRACER] = NullTracer() +# We need to ensure tracer is never None and always has the required methods +if should_use_dd_tracer: + try: + from ddtrace import tracer as dd_tracer - if _should_use_dd_tracer(): - has_ddtrace = True -except ImportError: - has_ddtrace = False - - @contextmanager - def null_tracer(name, **kwargs): - class NullSpan: - def __enter__(self): - return self - - def __exit__(self, *args): - pass - - def finish(self): - pass - - yield NullSpan() - - class NullTracer: - def trace(self, name, **kwargs): - class NullSpan: - def __enter__(self): - return self - - def __exit__(self, *args): - pass - - def finish(self): - pass - - return NullSpan() - - def wrap(self, name=None, **kwargs): - def decorator(f): - return f - - return decorator - - dd_tracer = NullTracer() - -# Export the tracer instance -tracer = dd_tracer + # Define the type to match what's expected by the code using this module + tracer = dd_tracer + except ImportError: + tracer = NullTracer() +else: + tracer = NullTracer() diff --git a/litellm/litellm_core_utils/exception_mapping_utils.py b/litellm/litellm_core_utils/exception_mapping_utils.py index 9ac20de4c0..1f5e0147b9 100644 --- a/litellm/litellm_core_utils/exception_mapping_utils.py +++ b/litellm/litellm_core_utils/exception_mapping_utils.py @@ -278,6 +278,7 @@ def exception_type( # type: ignore # noqa: PLR0915 "This model's maximum context length is" in error_str or "string too long. Expected a string with maximum length" in error_str + or "model's maximum context limit" in error_str ): exception_mapping_worked = True raise ContextWindowExceededError( @@ -692,6 +693,13 @@ def exception_type( # type: ignore # noqa: PLR0915 response=getattr(original_exception, "response", None), litellm_debug_info=extra_information, ) + elif "model's maximum context limit" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"{custom_llm_provider}Exception: Context Window Error - {error_str}", + model=model, + llm_provider=custom_llm_provider, + ) elif "token_quota_reached" in error_str: exception_mapping_worked = True raise RateLimitError( diff --git a/litellm/litellm_core_utils/get_litellm_params.py b/litellm/litellm_core_utils/get_litellm_params.py index 3d8394f7af..cf62375f33 100644 --- a/litellm/litellm_core_utils/get_litellm_params.py +++ b/litellm/litellm_core_utils/get_litellm_params.py @@ -57,6 +57,7 @@ def get_litellm_params( prompt_variables: Optional[dict] = None, async_call: Optional[bool] = None, ssl_verify: Optional[bool] = None, + merge_reasoning_content_in_choices: Optional[bool] = None, **kwargs, ) -> dict: litellm_params = { @@ -75,7 +76,7 @@ def get_litellm_params( "model_info": model_info, "proxy_server_request": proxy_server_request, "preset_cache_key": preset_cache_key, - "no-log": no_log, + "no-log": no_log or kwargs.get("no-log"), "stream_response": {}, # litellm_call_id: ModelResponse Dict "input_cost_per_token": input_cost_per_token, "input_cost_per_second": input_cost_per_second, @@ -97,5 +98,6 @@ def get_litellm_params( "prompt_variables": prompt_variables, "async_call": async_call, "ssl_verify": ssl_verify, + "merge_reasoning_content_in_choices": merge_reasoning_content_in_choices, } return litellm_params diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index d44fb07637..a3d9a57a49 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -3,7 +3,6 @@ # Logging function -> log the exact model details + what's being sent | Non-Blocking import copy import datetime -from functools import lru_cache import json import os import re @@ -13,6 +12,7 @@ import time import traceback import uuid from datetime import datetime as dt_object +from functools import lru_cache from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast from pydantic import BaseModel @@ -25,6 +25,7 @@ from litellm import ( turn_off_message_logging, ) from litellm._logging import _is_debugging_on, verbose_logger +from litellm.batches.batch_utils import _handle_completed_batch from litellm.caching.caching import DualCache, InMemoryCache from litellm.caching.caching_handler import LLMCachingHandler from litellm.cost_calculator import _select_model_name_for_cost_calc @@ -33,6 +34,7 @@ from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.mlflow import MlflowLogger from litellm.integrations.pagerduty.pagerduty import PagerDutyAlerting from litellm.litellm_core_utils.get_litellm_params import get_litellm_params +from litellm.litellm_core_utils.model_param_helper import ModelParamHelper from litellm.litellm_core_utils.redact_messages import ( redact_message_input_output_from_custom_logger, redact_message_input_output_from_logging, @@ -49,9 +51,11 @@ from litellm.types.utils import ( CallTypes, EmbeddingResponse, ImageResponse, + LiteLLMBatch, LiteLLMLoggingBaseClass, ModelResponse, ModelResponseStream, + RawRequestTypedDict, StandardCallbackDynamicParams, StandardLoggingAdditionalHeaders, StandardLoggingHiddenParams, @@ -202,6 +206,7 @@ class Logging(LiteLLMLoggingBaseClass): ] = None, applied_guardrails: Optional[List[str]] = None, kwargs: Optional[Dict] = None, + log_raw_request_response: bool = False, ): _input: Optional[str] = messages # save original value of messages if messages is not None: @@ -230,6 +235,7 @@ class Logging(LiteLLMLoggingBaseClass): self.sync_streaming_chunks: List[Any] = ( [] ) # for generating complete stream response + self.log_raw_request_response = log_raw_request_response # Initialize dynamic callbacks self.dynamic_input_callbacks: Optional[ @@ -450,6 +456,18 @@ class Logging(LiteLLMLoggingBaseClass): return model, messages, non_default_params + def _get_raw_request_body(self, data: Optional[Union[dict, str]]) -> dict: + if data is None: + return {"error": "Received empty dictionary for raw request body"} + if isinstance(data, str): + try: + return json.loads(data) + except Exception: + return { + "error": "Unable to parse raw request body. Got - {}".format(data) + } + return data + def _pre_call(self, input, api_key, model=None, additional_args={}): """ Common helper function across the sync + async pre-call function @@ -465,6 +483,7 @@ class Logging(LiteLLMLoggingBaseClass): self.model_call_details["model"] = model def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915 + # Log the exact input to the LLM API litellm.error_logs["PRE_CALL"] = locals() try: @@ -482,28 +501,54 @@ class Logging(LiteLLMLoggingBaseClass): additional_args=additional_args, ) # log raw request to provider (like LangFuse) -- if opted in. - if log_raw_request_response is True: + if ( + self.log_raw_request_response is True + or log_raw_request_response is True + ): + _litellm_params = self.model_call_details.get("litellm_params", {}) _metadata = _litellm_params.get("metadata", {}) or {} try: # [Non-blocking Extra Debug Information in metadata] - if ( - turn_off_message_logging is not None - and turn_off_message_logging is True - ): + if turn_off_message_logging is True: + _metadata["raw_request"] = ( "redacted by litellm. \ 'litellm.turn_off_message_logging=True'" ) else: + curl_command = self._get_request_curl_command( api_base=additional_args.get("api_base", ""), headers=additional_args.get("headers", {}), additional_args=additional_args, data=additional_args.get("complete_input_dict", {}), ) + _metadata["raw_request"] = str(curl_command) + # split up, so it's easier to parse in the UI + self.model_call_details["raw_request_typed_dict"] = ( + RawRequestTypedDict( + raw_request_api_base=str( + additional_args.get("api_base") or "" + ), + raw_request_body=self._get_raw_request_body( + additional_args.get("complete_input_dict", {}) + ), + raw_request_headers=self._get_masked_headers( + additional_args.get("headers", {}) or {}, + ignore_sensitive_headers=True, + ), + error=None, + ) + ) except Exception as e: + self.model_call_details["raw_request_typed_dict"] = ( + RawRequestTypedDict( + error=str(e), + ) + ) + traceback.print_exc() _metadata["raw_request"] = ( "Unable to Log \ raw request: {}".format( @@ -636,9 +681,14 @@ class Logging(LiteLLMLoggingBaseClass): ) verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n") + def _get_request_body(self, data: dict) -> str: + return str(data) + def _get_request_curl_command( - self, api_base: str, headers: dict, additional_args: dict, data: dict + self, api_base: str, headers: Optional[dict], additional_args: dict, data: dict ) -> str: + if headers is None: + headers = {} curl_command = "\n\nPOST Request Sent from LiteLLM:\n" curl_command += "curl -X POST \\\n" curl_command += f"{api_base} \\\n" @@ -646,11 +696,10 @@ class Logging(LiteLLMLoggingBaseClass): formatted_headers = " ".join( [f"-H '{k}: {v}'" for k, v in masked_headers.items()] ) - curl_command += ( f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else "" ) - curl_command += f"-d '{str(data)}'\n" + curl_command += f"-d '{self._get_request_body(data)}'\n" if additional_args.get("request_str", None) is not None: # print the sagemaker / bedrock client request curl_command = "\nRequest Sent from LiteLLM:\n" @@ -659,12 +708,20 @@ class Logging(LiteLLMLoggingBaseClass): curl_command = str(self.model_call_details) return curl_command - def _get_masked_headers(self, headers: dict): + def _get_masked_headers( + self, headers: dict, ignore_sensitive_headers: bool = False + ) -> dict: """ Internal debugging helper function Masks the headers of the request sent from LiteLLM """ + sensitive_keywords = [ + "authorization", + "token", + "key", + "secret", + ] return { k: ( (v[:-44] + "*" * 44) @@ -672,6 +729,11 @@ class Logging(LiteLLMLoggingBaseClass): else "*****" ) for k, v in headers.items() + if not ignore_sensitive_headers + or not any( + sensitive_keyword in k.lower() + for sensitive_keyword in sensitive_keywords + ) } def post_call( @@ -870,6 +932,24 @@ class Logging(LiteLLMLoggingBaseClass): return None + async def _response_cost_calculator_async( + self, + result: Union[ + ModelResponse, + ModelResponseStream, + EmbeddingResponse, + ImageResponse, + TranscriptionResponse, + TextCompletionResponse, + HttpxBinaryResponseContent, + RerankResponse, + Batch, + FineTuningJob, + ], + cache_hit: Optional[bool] = None, + ) -> Optional[float]: + return self._response_cost_calculator(result=result, cache_hit=cache_hit) + def should_run_callback( self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str ) -> bool: @@ -911,6 +991,9 @@ class Logging(LiteLLMLoggingBaseClass): self.model_call_details["log_event_type"] = "successful_api_call" self.model_call_details["end_time"] = end_time self.model_call_details["cache_hit"] = cache_hit + + if self.call_type == CallTypes.anthropic_messages.value: + result = self._handle_anthropic_messages_response_logging(result=result) ## if model in model cost map - log the response cost ## else set cost to None if ( @@ -927,8 +1010,8 @@ class Logging(LiteLLMLoggingBaseClass): or isinstance(result, TextCompletionResponse) or isinstance(result, HttpxBinaryResponseContent) # tts or isinstance(result, RerankResponse) - or isinstance(result, Batch) or isinstance(result, FineTuningJob) + or isinstance(result, LiteLLMBatch) ): ## HIDDEN PARAMS ## hidden_params = getattr(result, "_hidden_params", {}) @@ -1524,6 +1607,20 @@ class Logging(LiteLLMLoggingBaseClass): print_verbose( "Logging Details LiteLLM-Async Success Call, cache_hit={}".format(cache_hit) ) + + ## CALCULATE COST FOR BATCH JOBS + if self.call_type == CallTypes.aretrieve_batch.value and isinstance( + result, LiteLLMBatch + ): + + response_cost, batch_usage, batch_models = await _handle_completed_batch( + batch=result, custom_llm_provider=self.custom_llm_provider + ) + + result._hidden_params["response_cost"] = response_cost + result._hidden_params["batch_models"] = batch_models + result.usage = batch_usage + start_time, end_time, result = self._success_handler_helper_fn( start_time=start_time, end_time=end_time, @@ -1531,6 +1628,7 @@ class Logging(LiteLLMLoggingBaseClass): cache_hit=cache_hit, standard_logging_object=kwargs.get("standard_logging_object", None), ) + ## BUILD COMPLETE STREAMED RESPONSE if "async_complete_streaming_response" in self.model_call_details: return # break out of this. @@ -2269,6 +2367,37 @@ class Logging(LiteLLMLoggingBaseClass): return complete_streaming_response return None + def _handle_anthropic_messages_response_logging(self, result: Any) -> ModelResponse: + """ + Handles logging for Anthropic messages responses. + + Args: + result: The response object from the model call + + Returns: + The the response object from the model call + + - For Non-streaming responses, we need to transform the response to a ModelResponse object. + - For streaming responses, anthropic_messages handler calls success_handler with a assembled ModelResponse. + """ + if self.stream and isinstance(result, ModelResponse): + return result + + result = litellm.AnthropicConfig().transform_response( + raw_response=self.model_call_details["httpx_response"], + model_response=litellm.ModelResponse(), + model=self.model, + messages=[], + logging_obj=self, + optional_params={}, + api_key="", + request_data={}, + encoding=litellm.encoding, + json_mode=False, + litellm_params={}, + ) + return result + def set_callbacks(callback_list, function_id=None): # noqa: PLR0915 """ @@ -2513,15 +2642,19 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915 # auth can be disabled on local deployments of arize phoenix if arize_phoenix_config.otlp_auth_headers is not None: - os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = arize_phoenix_config.otlp_auth_headers - + os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = ( + arize_phoenix_config.otlp_auth_headers + ) + for callback in _in_memory_loggers: if ( isinstance(callback, OpenTelemetry) and callback.callback_name == "arize_phoenix" ): return callback # type: ignore - _otel_logger = OpenTelemetry(config=otel_config, callback_name="arize_phoenix") + _otel_logger = OpenTelemetry( + config=otel_config, callback_name="arize_phoenix" + ) _in_memory_loggers.append(_otel_logger) return _otel_logger # type: ignore elif logging_integration == "otel": @@ -3081,6 +3214,7 @@ class StandardLoggingPayloadSetup: response_cost=None, additional_headers=None, litellm_overhead_time_ms=None, + batch_models=None, ) if hidden_params is not None: for key in StandardLoggingHiddenParams.__annotations__.keys(): @@ -3110,10 +3244,26 @@ class StandardLoggingPayloadSetup: str(original_exception.__class__.__name__) if original_exception else "" ) _llm_provider_in_exception = getattr(original_exception, "llm_provider", "") + + # Get traceback information (first 100 lines) + traceback_info = "" + if original_exception: + tb = getattr(original_exception, "__traceback__", None) + if tb: + import traceback + + tb_lines = traceback.format_tb(tb) + traceback_info = "".join(tb_lines[:100]) # Limit to first 100 lines + + # Get additional error details + error_message = str(original_exception) + return StandardLoggingPayloadErrorInformation( error_code=error_status, error_class=error_class, llm_provider=_llm_provider_in_exception, + traceback=traceback_info, + error_message=error_message if original_exception else "", ) @staticmethod @@ -3178,6 +3328,7 @@ def get_standard_logging_object_payload( api_base=None, response_cost=None, litellm_overhead_time_ms=None, + batch_models=None, ) ) @@ -3310,7 +3461,9 @@ def get_standard_logging_object_payload( requester_ip_address=clean_metadata.get("requester_ip_address", None), messages=kwargs.get("messages"), response=final_response_obj, - model_parameters=kwargs.get("optional_params", None), + model_parameters=ModelParamHelper.get_standard_logging_model_parameters( + kwargs.get("optional_params", None) or {} + ), hidden_params=clean_hidden_params, model_map_information=model_cost_information, error_str=error_str, @@ -3460,6 +3613,7 @@ def create_dummy_standard_logging_payload() -> StandardLoggingPayload: response_cost=None, additional_headers=None, litellm_overhead_time_ms=None, + batch_models=None, ) # Convert numeric values to appropriate types diff --git a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py index 46d40be9c5..ebb1032a19 100644 --- a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py +++ b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py @@ -9,6 +9,7 @@ from typing import Dict, Iterable, List, Literal, Optional, Tuple, Union import litellm from litellm._logging import verbose_logger from litellm.constants import RESPONSE_FORMAT_TOOL_NAME +from litellm.types.llms.openai import ChatCompletionThinkingBlock from litellm.types.utils import ( ChatCompletionDeltaToolCall, ChatCompletionMessageToolCall, @@ -128,12 +129,7 @@ def convert_to_streaming_response(response_object: Optional[dict] = None): model_response_object = ModelResponse(stream=True) choice_list = [] for idx, choice in enumerate(response_object["choices"]): - delta = Delta( - content=choice["message"].get("content", None), - role=choice["message"]["role"], - function_call=choice["message"].get("function_call", None), - tool_calls=choice["message"].get("tool_calls", None), - ) + delta = Delta(**choice["message"]) finish_reason = choice.get("finish_reason", None) if finish_reason is None: # gpt-4 vision can return 'finish_reason' or 'finish_details' @@ -243,6 +239,24 @@ def _parse_content_for_reasoning( return None, message_text +def _extract_reasoning_content(message: dict) -> Tuple[Optional[str], Optional[str]]: + """ + Extract reasoning content and main content from a message. + + Args: + message (dict): The message dictionary that may contain reasoning_content + + Returns: + tuple[Optional[str], Optional[str]]: A tuple of (reasoning_content, content) + """ + if "reasoning_content" in message: + return message["reasoning_content"], message["content"] + elif "reasoning" in message: + return message["reasoning"], message["content"] + else: + return _parse_content_for_reasoning(message.get("content")) + + class LiteLLMResponseObjectHandler: @staticmethod @@ -456,11 +470,16 @@ def convert_to_model_response_object( # noqa: PLR0915 provider_specific_fields[field] = choice["message"][field] # Handle reasoning models that display `reasoning_content` within `content` - - reasoning_content, content = _parse_content_for_reasoning( - choice["message"].get("content") + reasoning_content, content = _extract_reasoning_content( + choice["message"] ) + # Handle thinking models that display `thinking_blocks` within `content` + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None + if "thinking_blocks" in choice["message"]: + thinking_blocks = choice["message"]["thinking_blocks"] + provider_specific_fields["thinking_blocks"] = thinking_blocks + if reasoning_content: provider_specific_fields["reasoning_content"] = ( reasoning_content @@ -473,6 +492,8 @@ def convert_to_model_response_object( # noqa: PLR0915 tool_calls=tool_calls, audio=choice["message"].get("audio", None), provider_specific_fields=provider_specific_fields, + reasoning_content=reasoning_content, + thinking_blocks=thinking_blocks, ) finish_reason = choice.get("finish_reason", None) if finish_reason is None: diff --git a/litellm/litellm_core_utils/model_param_helper.py b/litellm/litellm_core_utils/model_param_helper.py new file mode 100644 index 0000000000..09a2c15a77 --- /dev/null +++ b/litellm/litellm_core_utils/model_param_helper.py @@ -0,0 +1,133 @@ +from typing import Set + +from openai.types.audio.transcription_create_params import TranscriptionCreateParams +from openai.types.chat.completion_create_params import ( + CompletionCreateParamsNonStreaming, + CompletionCreateParamsStreaming, +) +from openai.types.completion_create_params import ( + CompletionCreateParamsNonStreaming as TextCompletionCreateParamsNonStreaming, +) +from openai.types.completion_create_params import ( + CompletionCreateParamsStreaming as TextCompletionCreateParamsStreaming, +) +from openai.types.embedding_create_params import EmbeddingCreateParams + +from litellm.types.rerank import RerankRequest + + +class ModelParamHelper: + + @staticmethod + def get_standard_logging_model_parameters( + model_parameters: dict, + ) -> dict: + """ """ + standard_logging_model_parameters: dict = {} + supported_model_parameters = ( + ModelParamHelper._get_relevant_args_to_use_for_logging() + ) + + for key, value in model_parameters.items(): + if key in supported_model_parameters: + standard_logging_model_parameters[key] = value + return standard_logging_model_parameters + + @staticmethod + def get_exclude_params_for_model_parameters() -> Set[str]: + return set(["messages", "prompt", "input"]) + + @staticmethod + def _get_relevant_args_to_use_for_logging() -> Set[str]: + """ + Gets all relevant llm api params besides the ones with prompt content + """ + all_openai_llm_api_params = ModelParamHelper._get_all_llm_api_params() + # Exclude parameters that contain prompt content + combined_kwargs = all_openai_llm_api_params.difference( + set(ModelParamHelper.get_exclude_params_for_model_parameters()) + ) + return combined_kwargs + + @staticmethod + def _get_all_llm_api_params() -> Set[str]: + """ + Gets the supported kwargs for each call type and combines them + """ + chat_completion_kwargs = ( + ModelParamHelper._get_litellm_supported_chat_completion_kwargs() + ) + text_completion_kwargs = ( + ModelParamHelper._get_litellm_supported_text_completion_kwargs() + ) + embedding_kwargs = ModelParamHelper._get_litellm_supported_embedding_kwargs() + transcription_kwargs = ( + ModelParamHelper._get_litellm_supported_transcription_kwargs() + ) + rerank_kwargs = ModelParamHelper._get_litellm_supported_rerank_kwargs() + exclude_kwargs = ModelParamHelper._get_exclude_kwargs() + + combined_kwargs = chat_completion_kwargs.union( + text_completion_kwargs, + embedding_kwargs, + transcription_kwargs, + rerank_kwargs, + ) + combined_kwargs = combined_kwargs.difference(exclude_kwargs) + return combined_kwargs + + @staticmethod + def _get_litellm_supported_chat_completion_kwargs() -> Set[str]: + """ + Get the litellm supported chat completion kwargs + + This follows the OpenAI API Spec + """ + all_chat_completion_kwargs = set( + CompletionCreateParamsNonStreaming.__annotations__.keys() + ).union(set(CompletionCreateParamsStreaming.__annotations__.keys())) + return all_chat_completion_kwargs + + @staticmethod + def _get_litellm_supported_text_completion_kwargs() -> Set[str]: + """ + Get the litellm supported text completion kwargs + + This follows the OpenAI API Spec + """ + all_text_completion_kwargs = set( + TextCompletionCreateParamsNonStreaming.__annotations__.keys() + ).union(set(TextCompletionCreateParamsStreaming.__annotations__.keys())) + return all_text_completion_kwargs + + @staticmethod + def _get_litellm_supported_rerank_kwargs() -> Set[str]: + """ + Get the litellm supported rerank kwargs + """ + return set(RerankRequest.model_fields.keys()) + + @staticmethod + def _get_litellm_supported_embedding_kwargs() -> Set[str]: + """ + Get the litellm supported embedding kwargs + + This follows the OpenAI API Spec + """ + return set(EmbeddingCreateParams.__annotations__.keys()) + + @staticmethod + def _get_litellm_supported_transcription_kwargs() -> Set[str]: + """ + Get the litellm supported transcription kwargs + + This follows the OpenAI API Spec + """ + return set(TranscriptionCreateParams.__annotations__.keys()) + + @staticmethod + def _get_exclude_kwargs() -> Set[str]: + """ + Get the kwargs to exclude from the cache key + """ + return set(["metadata"]) diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py index 2b1af67091..df7aa2cbd0 100644 --- a/litellm/litellm_core_utils/prompt_templates/factory.py +++ b/litellm/litellm_core_utils/prompt_templates/factory.py @@ -187,53 +187,125 @@ def ollama_pt( final_prompt_value="### Response:", messages=messages, ) - elif "llava" in model: - prompt = "" - images = [] - for message in messages: - if isinstance(message["content"], str): - prompt += message["content"] - elif isinstance(message["content"], list): - # see https://docs.litellm.ai/docs/providers/openai#openai-vision-models - for element in message["content"]: - if isinstance(element, dict): - if element["type"] == "text": - prompt += element["text"] - elif element["type"] == "image_url": - base64_image = convert_to_ollama_image( - element["image_url"]["url"] - ) - images.append(base64_image) - return {"prompt": prompt, "images": images} else: + user_message_types = {"user", "tool", "function"} + msg_i = 0 + images = [] prompt = "" - for message in messages: - role = message["role"] - content = message.get("content", "") + while msg_i < len(messages): + init_msg_i = msg_i + user_content_str = "" + ## MERGE CONSECUTIVE USER CONTENT ## + while ( + msg_i < len(messages) and messages[msg_i]["role"] in user_message_types + ): + msg_content = messages[msg_i].get("content") + if msg_content: + if isinstance(msg_content, list): + for m in msg_content: + if m.get("type", "") == "image_url": + if isinstance(m["image_url"], str): + images.append(m["image_url"]) + elif isinstance(m["image_url"], dict): + images.append(m["image_url"]["url"]) + elif m.get("type", "") == "text": + user_content_str += m["text"] + else: + # Tool message content will always be a string + user_content_str += msg_content - if "tool_calls" in message: - tool_calls = [] + msg_i += 1 - for call in message["tool_calls"]: - call_id: str = call["id"] - function_name: str = call["function"]["name"] - arguments = json.loads(call["function"]["arguments"]) + if user_content_str: + prompt += f"### User:\n{user_content_str}\n\n" - tool_calls.append( - { - "id": call_id, - "type": "function", - "function": {"name": function_name, "arguments": arguments}, - } + assistant_content_str = "" + ## MERGE CONSECUTIVE ASSISTANT CONTENT ## + while msg_i < len(messages) and messages[msg_i]["role"] == "assistant": + msg_content = messages[msg_i].get("content") + if msg_content: + if isinstance(msg_content, list): + for m in msg_content: + if m.get("type", "") == "text": + assistant_content_str += m["text"] + elif isinstance(msg_content, str): + # Tool message content will always be a string + assistant_content_str += msg_content + + tool_calls = messages[msg_i].get("tool_calls") + ollama_tool_calls = [] + if tool_calls: + for call in tool_calls: + call_id: str = call["id"] + function_name: str = call["function"]["name"] + arguments = json.loads(call["function"]["arguments"]) + + ollama_tool_calls.append( + { + "id": call_id, + "type": "function", + "function": { + "name": function_name, + "arguments": arguments, + }, + } + ) + + if ollama_tool_calls: + assistant_content_str += ( + f"Tool Calls: {json.dumps(ollama_tool_calls, indent=2)}" ) - prompt += f"### Assistant:\nTool Calls: {json.dumps(tool_calls, indent=2)}\n\n" + msg_i += 1 - elif "tool_call_id" in message: - prompt += f"### User:\n{message['content']}\n\n" + if assistant_content_str: + prompt += f"### Assistant:\n{assistant_content_str}\n\n" - elif content: - prompt += f"### {role.capitalize()}:\n{content}\n\n" + if msg_i == init_msg_i: # prevent infinite loops + raise litellm.BadRequestError( + message=BAD_MESSAGE_ERROR_STR + f"passed in {messages[msg_i]}", + model=model, + llm_provider="ollama", + ) + # prompt = "" + # images = [] + # for message in messages: + # if isinstance(message["content"], str): + # prompt += message["content"] + # elif isinstance(message["content"], list): + # # see https://docs.litellm.ai/docs/providers/openai#openai-vision-models + # for element in message["content"]: + # if isinstance(element, dict): + # if element["type"] == "text": + # prompt += element["text"] + # elif element["type"] == "image_url": + # base64_image = convert_to_ollama_image( + # element["image_url"]["url"] + # ) + # images.append(base64_image) + + # if "tool_calls" in message: + # tool_calls = [] + + # for call in message["tool_calls"]: + # call_id: str = call["id"] + # function_name: str = call["function"]["name"] + # arguments = json.loads(call["function"]["arguments"]) + + # tool_calls.append( + # { + # "id": call_id, + # "type": "function", + # "function": {"name": function_name, "arguments": arguments}, + # } + # ) + + # prompt += f"### Assistant:\nTool Calls: {json.dumps(tool_calls, indent=2)}\n\n" + + # elif "tool_call_id" in message: + # prompt += f"### User:\n{message['content']}\n\n" + + return {"prompt": prompt, "images": images} return prompt @@ -680,12 +752,13 @@ def convert_generic_image_chunk_to_openai_image_obj( Return: "data:image/jpeg;base64,{base64_image}" """ - return "data:{};{},{}".format( - image_chunk["media_type"], image_chunk["type"], image_chunk["data"] - ) + media_type = image_chunk["media_type"] + return "data:{};{},{}".format(media_type, image_chunk["type"], image_chunk["data"]) -def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsingChunk: +def convert_to_anthropic_image_obj( + openai_image_url: str, format: Optional[str] +) -> GenericImageParsingChunk: """ Input: "image_url": "data:image/jpeg;base64,{base64_image}", @@ -702,7 +775,11 @@ def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsing openai_image_url = convert_url_to_base64(url=openai_image_url) # Extract the media type and base64 data media_type, base64_data = openai_image_url.split("data:")[1].split(";base64,") - media_type = media_type.replace("\\/", "/") + + if format: + media_type = format + else: + media_type = media_type.replace("\\/", "/") return GenericImageParsingChunk( type="base64", @@ -820,11 +897,12 @@ def anthropic_messages_pt_xml(messages: list): if isinstance(messages[msg_i]["content"], list): for m in messages[msg_i]["content"]: if m.get("type", "") == "image_url": + format = m["image_url"].get("format") user_content.append( { "type": "image", "source": convert_to_anthropic_image_obj( - m["image_url"]["url"] + m["image_url"]["url"], format=format ), } ) @@ -1156,10 +1234,13 @@ def convert_to_anthropic_tool_result( ) elif content["type"] == "image_url": if isinstance(content["image_url"], str): - image_chunk = convert_to_anthropic_image_obj(content["image_url"]) - else: image_chunk = convert_to_anthropic_image_obj( - content["image_url"]["url"] + content["image_url"], format=None + ) + else: + format = content["image_url"].get("format") + image_chunk = convert_to_anthropic_image_obj( + content["image_url"]["url"], format=format ) anthropic_content_list.append( AnthropicMessagesImageParam( @@ -1282,6 +1363,7 @@ def add_cache_control_to_content( AnthropicMessagesImageParam, AnthropicMessagesTextParam, AnthropicMessagesDocumentParam, + ChatCompletionThinkingBlock, ], orignal_content_element: Union[dict, AllMessageValues], ): @@ -1317,6 +1399,7 @@ def _anthropic_content_element_factory( data=image_chunk["data"], ), ) + return _anthropic_content_element @@ -1368,13 +1451,16 @@ def anthropic_messages_pt( # noqa: PLR0915 for m in user_message_types_block["content"]: if m.get("type", "") == "image_url": m = cast(ChatCompletionImageObject, m) + format: Optional[str] = None if isinstance(m["image_url"], str): image_chunk = convert_to_anthropic_image_obj( - openai_image_url=m["image_url"] + openai_image_url=m["image_url"], format=None ) else: + format = m["image_url"].get("format") image_chunk = convert_to_anthropic_image_obj( - openai_image_url=m["image_url"]["url"] + openai_image_url=m["image_url"]["url"], + format=format, ) _anthropic_content_element = ( @@ -1454,12 +1540,23 @@ def anthropic_messages_pt( # noqa: PLR0915 assistant_content_block["content"], list ): for m in assistant_content_block["content"]: - # handle text + # handle thinking blocks + thinking_block = cast(str, m.get("thinking", "")) + text_block = cast(str, m.get("text", "")) if ( - m.get("type", "") == "text" and len(m.get("text", "")) > 0 + m.get("type", "") == "thinking" and len(thinking_block) > 0 + ): # don't pass empty text blocks. anthropic api raises errors. + anthropic_message: Union[ + ChatCompletionThinkingBlock, + AnthropicMessagesTextParam, + ] = cast(ChatCompletionThinkingBlock, m) + assistant_content.append(anthropic_message) + # handle text + elif ( + m.get("type", "") == "text" and len(text_block) > 0 ): # don't pass empty text blocks. anthropic api raises errors. anthropic_message = AnthropicMessagesTextParam( - type="text", text=m.get("text") + type="text", text=text_block ) _cached_message = add_cache_control_to_content( anthropic_content_element=anthropic_message, @@ -1512,6 +1609,7 @@ def anthropic_messages_pt( # noqa: PLR0915 msg_i += 1 if assistant_content: + new_messages.append({"role": "assistant", "content": assistant_content}) if msg_i == init_msg_i: # prevent infinite loops @@ -1520,17 +1618,6 @@ def anthropic_messages_pt( # noqa: PLR0915 model=model, llm_provider=llm_provider, ) - if not new_messages or new_messages[0]["role"] != "user": - if litellm.modify_params: - new_messages.insert( - 0, {"role": "user", "content": [{"type": "text", "text": "."}]} - ) - else: - raise Exception( - "Invalid first message={}. Should always start with 'role'='user' for Anthropic. System prompt is sent separately for Anthropic. set 'litellm.modify_params = True' or 'litellm_settings:modify_params = True' on proxy, to insert a placeholder user message - '.' as the first message, ".format( - new_messages - ) - ) if new_messages[-1]["role"] == "assistant": if isinstance(new_messages[-1]["content"], str): @@ -2151,6 +2238,10 @@ from email.message import Message import httpx +from litellm.types.llms.bedrock import ( + BedrockConverseReasoningContentBlock, + BedrockConverseReasoningTextBlock, +) from litellm.types.llms.bedrock import ContentBlock as BedrockContentBlock from litellm.types.llms.bedrock import DocumentBlock as BedrockDocumentBlock from litellm.types.llms.bedrock import ImageBlock as BedrockImageBlock @@ -2297,8 +2388,11 @@ class BedrockImageProcessor: ) @classmethod - def process_image_sync(cls, image_url: str) -> BedrockContentBlock: + def process_image_sync( + cls, image_url: str, format: Optional[str] = None + ) -> BedrockContentBlock: """Synchronous image processing.""" + if "base64" in image_url: img_bytes, mime_type, image_format = cls._parse_base64_image(image_url) elif "http://" in image_url or "https://" in image_url: @@ -2309,11 +2403,17 @@ class BedrockImageProcessor: "Unsupported image type. Expected either image url or base64 encoded string" ) + if format: + mime_type = format + image_format = mime_type.split("/")[1] + image_format = cls._validate_format(mime_type, image_format) return cls._create_bedrock_block(img_bytes, mime_type, image_format) @classmethod - async def process_image_async(cls, image_url: str) -> BedrockContentBlock: + async def process_image_async( + cls, image_url: str, format: Optional[str] + ) -> BedrockContentBlock: """Asynchronous image processing.""" if "base64" in image_url: @@ -2328,6 +2428,10 @@ class BedrockImageProcessor: "Unsupported image type. Expected either image url or base64 encoded string" ) + if format: # override with user-defined params + mime_type = format + image_format = mime_type.split("/")[1] + image_format = cls._validate_format(mime_type, image_format) return cls._create_bedrock_block(img_bytes, mime_type, image_format) @@ -2815,12 +2919,14 @@ class BedrockConverseMessagesProcessor: _part = BedrockContentBlock(text=element["text"]) _parts.append(_part) elif element["type"] == "image_url": + format: Optional[str] = None if isinstance(element["image_url"], dict): image_url = element["image_url"]["url"] + format = element["image_url"].get("format") else: image_url = element["image_url"] _part = await BedrockImageProcessor.process_image_async( # type: ignore - image_url=image_url + image_url=image_url, format=format ) _parts.append(_part) # type: ignore _cache_point_block = ( @@ -2920,7 +3026,14 @@ class BedrockConverseMessagesProcessor: assistants_parts: List[BedrockContentBlock] = [] for element in _assistant_content: if isinstance(element, dict): - if element["type"] == "text": + if element["type"] == "thinking": + thinking_block = BedrockConverseMessagesProcessor.translate_thinking_blocks_to_reasoning_content_blocks( + thinking_blocks=[ + cast(ChatCompletionThinkingBlock, element) + ] + ) + assistants_parts.extend(thinking_block) + elif element["type"] == "text": assistants_part = BedrockContentBlock( text=element["text"] ) @@ -2963,6 +3076,28 @@ class BedrockConverseMessagesProcessor: return contents + @staticmethod + def translate_thinking_blocks_to_reasoning_content_blocks( + thinking_blocks: List[ChatCompletionThinkingBlock], + ) -> List[BedrockContentBlock]: + reasoning_content_blocks: List[BedrockContentBlock] = [] + for thinking_block in thinking_blocks: + reasoning_text = thinking_block.get("thinking") + reasoning_signature = thinking_block.get("signature") + text_block = BedrockConverseReasoningTextBlock( + text=reasoning_text or "", + ) + if reasoning_signature is not None: + text_block["signature"] = reasoning_signature + reasoning_content_block = BedrockConverseReasoningContentBlock( + reasoningText=text_block, + ) + bedrock_content_block = BedrockContentBlock( + reasoningContent=reasoning_content_block + ) + reasoning_content_blocks.append(bedrock_content_block) + return reasoning_content_blocks + def _bedrock_converse_messages_pt( # noqa: PLR0915 messages: List, @@ -3024,12 +3159,15 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915 _part = BedrockContentBlock(text=element["text"]) _parts.append(_part) elif element["type"] == "image_url": + format: Optional[str] = None if isinstance(element["image_url"], dict): image_url = element["image_url"]["url"] + format = element["image_url"].get("format") else: image_url = element["image_url"] _part = BedrockImageProcessor.process_image_sync( # type: ignore - image_url=image_url + image_url=image_url, + format=format, ) _parts.append(_part) # type: ignore _cache_point_block = ( @@ -3109,17 +3247,36 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915 assistant_content: List[BedrockContentBlock] = [] ## MERGE CONSECUTIVE ASSISTANT CONTENT ## while msg_i < len(messages) and messages[msg_i]["role"] == "assistant": + assistant_message_block = get_assistant_message_block_or_continue_message( message=messages[msg_i], assistant_continue_message=assistant_continue_message, ) _assistant_content = assistant_message_block.get("content", None) + thinking_blocks = cast( + Optional[List[ChatCompletionThinkingBlock]], + assistant_message_block.get("thinking_blocks"), + ) + + if thinking_blocks is not None: + assistant_content.extend( + BedrockConverseMessagesProcessor.translate_thinking_blocks_to_reasoning_content_blocks( + thinking_blocks + ) + ) if _assistant_content is not None and isinstance(_assistant_content, list): assistants_parts: List[BedrockContentBlock] = [] for element in _assistant_content: if isinstance(element, dict): - if element["type"] == "text": + if element["type"] == "thinking": + thinking_block = BedrockConverseMessagesProcessor.translate_thinking_blocks_to_reasoning_content_blocks( + thinking_blocks=[ + cast(ChatCompletionThinkingBlock, element) + ] + ) + assistants_parts.extend(thinking_block) + elif element["type"] == "text": assistants_part = BedrockContentBlock(text=element["text"]) assistants_parts.append(assistants_part) elif element["type"] == "image_url": diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py index 5e9fb7aa76..5d5a8bf256 100644 --- a/litellm/litellm_core_utils/streaming_handler.py +++ b/litellm/litellm_core_utils/streaming_handler.py @@ -5,7 +5,7 @@ import threading import time import traceback import uuid -from typing import Any, Callable, Dict, List, Optional, cast +from typing import Any, Callable, Dict, List, Optional, Union, cast import httpx from pydantic import BaseModel @@ -14,6 +14,8 @@ import litellm from litellm import verbose_logger from litellm.litellm_core_utils.redact_messages import LiteLLMLoggingObject from litellm.litellm_core_utils.thread_pool_executor import executor +from litellm.types.llms.openai import ChatCompletionChunk +from litellm.types.router import GenericLiteLLMParams from litellm.types.utils import Delta from litellm.types.utils import GenericStreamingChunk as GChunk from litellm.types.utils import ( @@ -69,6 +71,17 @@ class CustomStreamWrapper: self.completion_stream = completion_stream self.sent_first_chunk = False self.sent_last_chunk = False + + litellm_params: GenericLiteLLMParams = GenericLiteLLMParams( + **self.logging_obj.model_call_details.get("litellm_params", {}) + ) + self.merge_reasoning_content_in_choices: bool = ( + litellm_params.merge_reasoning_content_in_choices or False + ) + self.sent_first_thinking_block = False + self.sent_last_thinking_block = False + self.thinking_content = "" + self.system_fingerprint: Optional[str] = None self.received_finish_reason: Optional[str] = None self.intermittent_finish_reason: Optional[str] = ( @@ -86,12 +99,7 @@ class CustomStreamWrapper: self.holding_chunk = "" self.complete_response = "" self.response_uptil_now = "" - _model_info = ( - self.logging_obj.model_call_details.get("litellm_params", {}).get( - "model_info", {} - ) - or {} - ) + _model_info: Dict = litellm_params.model_info or {} _api_base = get_api_base( model=model or "", @@ -110,7 +118,7 @@ class CustomStreamWrapper: ) # GUARANTEE OPENAI HEADERS IN RESPONSE self._response_headers = _response_headers - self.response_id = None + self.response_id: Optional[str] = None self.logging_loop = None self.rules = Rules() self.stream_options = stream_options or getattr( @@ -629,7 +637,10 @@ class CustomStreamWrapper: if isinstance(chunk, bytes): chunk = chunk.decode("utf-8") if "text_output" in chunk: - response = chunk.replace("data: ", "").strip() + response = ( + CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or "" + ) + response = response.strip() parsed_response = json.loads(response) else: return { @@ -713,7 +724,7 @@ class CustomStreamWrapper: def is_delta_empty(self, delta: Delta) -> bool: is_empty = True - if delta.content is not None: + if delta.content: is_empty = False elif delta.tool_calls is not None: is_empty = False @@ -721,16 +732,45 @@ class CustomStreamWrapper: is_empty = False return is_empty - def return_processed_chunk_logic( # noqa + def set_model_id( + self, id: str, model_response: ModelResponseStream + ) -> ModelResponseStream: + """ + Set the model id and response id to the given id. + + Ensure model id is always the same across all chunks. + + If first chunk sent + id set, use that id for all chunks. + """ + if self.response_id is None: + self.response_id = id + if self.response_id is not None and isinstance(self.response_id, str): + model_response.id = self.response_id + return model_response + + def copy_model_response_level_provider_specific_fields( + self, + original_chunk: Union[ModelResponseStream, ChatCompletionChunk], + model_response: ModelResponseStream, + ) -> ModelResponseStream: + """ + Copy provider_specific_fields from original_chunk to model_response. + """ + provider_specific_fields = getattr( + original_chunk, "provider_specific_fields", None + ) + if provider_specific_fields is not None: + model_response.provider_specific_fields = provider_specific_fields + for k, v in provider_specific_fields.items(): + setattr(model_response, k, v) + return model_response + + def is_chunk_non_empty( self, completion_obj: Dict[str, Any], model_response: ModelResponseStream, response_obj: Dict[str, Any], - ): - - print_verbose( - f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}" - ) + ) -> bool: if ( "content" in completion_obj and ( @@ -746,13 +786,40 @@ class CustomStreamWrapper: "function_call" in completion_obj and completion_obj["function_call"] is not None ) + or ( + "reasoning_content" in model_response.choices[0].delta + and model_response.choices[0].delta.reasoning_content is not None + ) or (model_response.choices[0].delta.provider_specific_fields is not None) + or ( + "provider_specific_fields" in model_response + and model_response.choices[0].delta.provider_specific_fields is not None + ) or ( "provider_specific_fields" in response_obj and response_obj["provider_specific_fields"] is not None ) - ): # cannot set content of an OpenAI Object to be an empty string + ): + return True + else: + return False + def return_processed_chunk_logic( # noqa + self, + completion_obj: Dict[str, Any], + model_response: ModelResponseStream, + response_obj: Dict[str, Any], + ): + + print_verbose( + f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}" + ) + is_chunk_non_empty = self.is_chunk_non_empty( + completion_obj, model_response, response_obj + ) + if ( + is_chunk_non_empty + ): # cannot set content of an OpenAI Object to be an empty string self.safety_checker() hold, model_response_str = self.check_special_tokens( chunk=completion_obj["content"], @@ -763,14 +830,12 @@ class CustomStreamWrapper: ## check if openai/azure chunk original_chunk = response_obj.get("original_chunk", None) if original_chunk: - model_response.id = original_chunk.id - self.response_id = original_chunk.id if len(original_chunk.choices) > 0: choices = [] for choice in original_chunk.choices: try: if isinstance(choice, BaseModel): - choice_json = choice.model_dump() + choice_json = choice.model_dump() # type: ignore choice_json.pop( "finish_reason", None ) # for mistral etc. which return a value in their last chunk (not-openai compatible). @@ -798,9 +863,10 @@ class CustomStreamWrapper: model_response.choices[0].delta, "role" ): _initial_delta = model_response.choices[0].delta.model_dump() + _initial_delta.pop("role", None) model_response.choices[0].delta = Delta(**_initial_delta) - print_verbose( + verbose_logger.debug( f"model_response.choices[0].delta: {model_response.choices[0].delta}" ) else: @@ -817,6 +883,10 @@ class CustomStreamWrapper: _index: Optional[int] = completion_obj.get("index") if _index is not None: model_response.choices[0].index = _index + + self._optional_combine_thinking_block_in_choices( + model_response=model_response + ) print_verbose(f"returning model_response: {model_response}") return model_response else: @@ -842,6 +912,9 @@ class CustomStreamWrapper: _is_delta_empty = self.is_delta_empty(delta=model_response.choices[0].delta) if _is_delta_empty: + model_response.choices[0].delta = Delta( + content=None + ) # ensure empty delta chunk returned # get any function call arguments model_response.choices[0].finish_reason = map_finish_reason( finish_reason=self.received_finish_reason @@ -870,7 +943,49 @@ class CustomStreamWrapper: self.chunks.append(model_response) return - def chunk_creator(self, chunk): # type: ignore # noqa: PLR0915 + def _optional_combine_thinking_block_in_choices( + self, model_response: ModelResponseStream + ) -> None: + """ + UI's Like OpenWebUI expect to get 1 chunk with ... tags in the chunk content + + In place updates the model_response object with reasoning_content in content with ... tags + + Enabled when `merge_reasoning_content_in_choices=True` passed in request params + + + """ + if self.merge_reasoning_content_in_choices is True: + reasoning_content = getattr( + model_response.choices[0].delta, "reasoning_content", None + ) + if reasoning_content: + if self.sent_first_thinking_block is False: + model_response.choices[0].delta.content += ( + "" + reasoning_content + ) + self.sent_first_thinking_block = True + elif ( + self.sent_first_thinking_block is True + and hasattr(model_response.choices[0].delta, "reasoning_content") + and model_response.choices[0].delta.reasoning_content + ): + model_response.choices[0].delta.content = reasoning_content + elif ( + self.sent_first_thinking_block is True + and not self.sent_last_thinking_block + and model_response.choices[0].delta.content + ): + model_response.choices[0].delta.content = ( + "" + model_response.choices[0].delta.content + ) + self.sent_last_thinking_block = True + + if hasattr(model_response.choices[0].delta, "reasoning_content"): + del model_response.choices[0].delta.reasoning_content + return + + def chunk_creator(self, chunk: Any): # type: ignore # noqa: PLR0915 model_response = self.model_response_creator() response_obj: Dict[str, Any] = {} @@ -886,16 +1001,13 @@ class CustomStreamWrapper: ) # check if chunk is a generic streaming chunk ) or ( self.custom_llm_provider - and ( - self.custom_llm_provider == "anthropic" - or self.custom_llm_provider in litellm._custom_providers - ) + and self.custom_llm_provider in litellm._custom_providers ): if self.received_finish_reason is not None: if "provider_specific_fields" not in chunk: raise StopIteration - anthropic_response_obj: GChunk = chunk + anthropic_response_obj: GChunk = cast(GChunk, chunk) completion_obj["content"] = anthropic_response_obj["text"] if anthropic_response_obj["is_finished"]: self.received_finish_reason = anthropic_response_obj[ @@ -927,7 +1039,7 @@ class CustomStreamWrapper: ].items(): setattr(model_response, key, value) - response_obj = anthropic_response_obj + response_obj = cast(Dict[str, Any], anthropic_response_obj) elif self.model == "replicate" or self.custom_llm_provider == "replicate": response_obj = self.handle_replicate_chunk(chunk) completion_obj["content"] = response_obj["text"] @@ -989,6 +1101,7 @@ class CustomStreamWrapper: try: completion_obj["content"] = chunk.text except Exception as e: + original_exception = e if "Part has no text." in str(e): ## check for function calling function_call = ( @@ -1030,7 +1143,7 @@ class CustomStreamWrapper: _model_response.choices = [_streaming_response] response_obj = {"original_chunk": _model_response} else: - raise e + raise original_exception if ( hasattr(chunk.candidates[0], "finish_reason") and chunk.candidates[0].finish_reason.name @@ -1093,8 +1206,9 @@ class CustomStreamWrapper: total_tokens=response_obj["usage"].total_tokens, ) elif self.custom_llm_provider == "text-completion-codestral": - response_obj = litellm.CodestralTextCompletionConfig()._chunk_parser( - chunk + response_obj = cast( + Dict[str, Any], + litellm.CodestralTextCompletionConfig()._chunk_parser(chunk), ) completion_obj["content"] = response_obj["text"] print_verbose(f"completion obj content: {completion_obj['content']}") @@ -1156,8 +1270,9 @@ class CustomStreamWrapper: self.received_finish_reason = response_obj["finish_reason"] if response_obj.get("original_chunk", None) is not None: if hasattr(response_obj["original_chunk"], "id"): - model_response.id = response_obj["original_chunk"].id - self.response_id = model_response.id + model_response = self.set_model_id( + response_obj["original_chunk"].id, model_response + ) if hasattr(response_obj["original_chunk"], "system_fingerprint"): model_response.system_fingerprint = response_obj[ "original_chunk" @@ -1206,8 +1321,16 @@ class CustomStreamWrapper: ): # function / tool calling branch - only set for openai/azure compatible endpoints # enter this branch when no content has been passed in response original_chunk = response_obj.get("original_chunk", None) - model_response.id = original_chunk.id - self.response_id = original_chunk.id + if hasattr(original_chunk, "id"): + model_response = self.set_model_id( + original_chunk.id, model_response + ) + if hasattr(original_chunk, "provider_specific_fields"): + model_response = ( + self.copy_model_response_level_provider_specific_fields( + original_chunk, model_response + ) + ) if original_chunk.choices and len(original_chunk.choices) > 0: delta = original_chunk.choices[0].delta if delta is not None and ( @@ -1708,6 +1831,42 @@ class CustomStreamWrapper: extra_kwargs={}, ) + @staticmethod + def _strip_sse_data_from_chunk(chunk: Optional[str]) -> Optional[str]: + """ + Strips the 'data: ' prefix from Server-Sent Events (SSE) chunks. + + Some providers like sagemaker send it as `data:`, need to handle both + + SSE messages are prefixed with 'data: ' which is part of the protocol, + not the actual content from the LLM. This method removes that prefix + and returns the actual content. + + Args: + chunk: The SSE chunk that may contain the 'data: ' prefix (string or bytes) + + Returns: + The chunk with the 'data: ' prefix removed, or the original chunk + if no prefix was found. Returns None if input is None. + + See OpenAI Python Ref for this: https://github.com/openai/openai-python/blob/041bf5a8ec54da19aad0169671793c2078bd6173/openai/api_requestor.py#L100 + """ + if chunk is None: + return None + + if isinstance(chunk, str): + # OpenAI sends `data: ` + if chunk.startswith("data: "): + # Strip the prefix and any leading whitespace that might follow it + _length_of_sse_data_prefix = len("data: ") + return chunk[_length_of_sse_data_prefix:] + elif chunk.startswith("data:"): + # Sagemaker sends `data:`, no trailing whitespace + _length_of_sse_data_prefix = len("data:") + return chunk[_length_of_sse_data_prefix:] + + return chunk + def calculate_total_usage(chunks: List[ModelResponse]) -> Usage: """Assume most recent usage chunk has total usage uptil then.""" diff --git a/litellm/llms/aiohttp_openai/chat/transformation.py b/litellm/llms/aiohttp_openai/chat/transformation.py index 53157ad113..625704dbea 100644 --- a/litellm/llms/aiohttp_openai/chat/transformation.py +++ b/litellm/llms/aiohttp_openai/chat/transformation.py @@ -26,7 +26,7 @@ else: class AiohttpOpenAIChatConfig(OpenAILikeChatConfig): def get_complete_url( self, - api_base: str, + api_base: Optional[str], model: str, optional_params: dict, stream: Optional[bool] = None, @@ -35,6 +35,8 @@ class AiohttpOpenAIChatConfig(OpenAILikeChatConfig): Ensure - /v1/chat/completions is at the end of the url """ + if api_base is None: + api_base = "https://api.openai.com" if not api_base.endswith("/chat/completions"): api_base += "/chat/completions" diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py index 114ed27c9f..f2c5f390d7 100644 --- a/litellm/llms/anthropic/chat/handler.py +++ b/litellm/llms/anthropic/chat/handler.py @@ -34,7 +34,12 @@ from litellm.types.llms.openai import ( ChatCompletionToolCallChunk, ChatCompletionUsageBlock, ) -from litellm.types.utils import GenericStreamingChunk +from litellm.types.utils import ( + Delta, + GenericStreamingChunk, + ModelResponseStream, + StreamingChoices, +) from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager from ...base import BaseLLM @@ -469,7 +474,10 @@ class ModelResponseIterator: if len(self.content_blocks) == 0: return False - if self.content_blocks[0]["delta"]["type"] == "text_delta": + if ( + self.content_blocks[0]["delta"]["type"] == "text_delta" + or self.content_blocks[0]["delta"]["type"] == "thinking_delta" + ): return False for block in self.content_blocks: @@ -507,7 +515,12 @@ class ModelResponseIterator: return usage_block - def _content_block_delta_helper(self, chunk: dict): + def _content_block_delta_helper(self, chunk: dict) -> Tuple[ + str, + Optional[ChatCompletionToolCallChunk], + List[ChatCompletionThinkingBlock], + Dict[str, Any], + ]: """ Helper function to handle the content block delta """ @@ -516,6 +529,8 @@ class ModelResponseIterator: tool_use: Optional[ChatCompletionToolCallChunk] = None provider_specific_fields = {} content_block = ContentBlockDelta(**chunk) # type: ignore + thinking_blocks: List[ChatCompletionThinkingBlock] = [] + self.content_blocks.append(content_block) if "text" in content_block["delta"]: text = content_block["delta"]["text"] @@ -533,27 +548,43 @@ class ModelResponseIterator: provider_specific_fields["citation"] = content_block["delta"]["citation"] elif ( "thinking" in content_block["delta"] - or "signature_delta" == content_block["delta"] + or "signature" in content_block["delta"] ): - provider_specific_fields["thinking_blocks"] = [ + thinking_blocks = [ ChatCompletionThinkingBlock( type="thinking", - thinking=content_block["delta"].get("thinking"), - signature_delta=content_block["delta"].get("signature"), + thinking=content_block["delta"].get("thinking") or "", + signature=content_block["delta"].get("signature"), ) ] - return text, tool_use, provider_specific_fields + provider_specific_fields["thinking_blocks"] = thinking_blocks + return text, tool_use, thinking_blocks, provider_specific_fields - def chunk_parser(self, chunk: dict) -> GenericStreamingChunk: + def _handle_reasoning_content( + self, thinking_blocks: List[ChatCompletionThinkingBlock] + ) -> Optional[str]: + """ + Handle the reasoning content + """ + reasoning_content = None + for block in thinking_blocks: + if reasoning_content is None: + reasoning_content = "" + if "thinking" in block: + reasoning_content += block["thinking"] + return reasoning_content + + def chunk_parser(self, chunk: dict) -> ModelResponseStream: try: type_chunk = chunk.get("type", "") or "" text = "" tool_use: Optional[ChatCompletionToolCallChunk] = None - is_finished = False finish_reason = "" usage: Optional[ChatCompletionUsageBlock] = None provider_specific_fields: Dict[str, Any] = {} + reasoning_content: Optional[str] = None + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None index = int(chunk.get("index", 0)) if type_chunk == "content_block_delta": @@ -561,9 +592,13 @@ class ModelResponseIterator: Anthropic content chunk chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}} """ - text, tool_use, provider_specific_fields = ( + text, tool_use, thinking_blocks, provider_specific_fields = ( self._content_block_delta_helper(chunk=chunk) ) + if thinking_blocks: + reasoning_content = self._handle_reasoning_content( + thinking_blocks=thinking_blocks + ) elif type_chunk == "content_block_start": """ event: content_block_start @@ -585,9 +620,11 @@ class ModelResponseIterator: "index": self.tool_index, } elif type_chunk == "content_block_stop": + ContentBlockStop(**chunk) # type: ignore # check if tool call content block is_empty = self.check_empty_tool_call_args() + if is_empty: tool_use = { "id": None, @@ -610,7 +647,6 @@ class ModelResponseIterator: or "stop" ) usage = self._handle_usage(anthropic_usage_chunk=message_delta["usage"]) - is_finished = True elif type_chunk == "message_start": """ Anthropic @@ -649,16 +685,27 @@ class ModelResponseIterator: text, tool_use = self._handle_json_mode_chunk(text=text, tool_use=tool_use) - returned_chunk = GenericStreamingChunk( - text=text, - tool_use=tool_use, - is_finished=is_finished, - finish_reason=finish_reason, + returned_chunk = ModelResponseStream( + choices=[ + StreamingChoices( + index=index, + delta=Delta( + content=text, + tool_calls=[tool_use] if tool_use is not None else None, + provider_specific_fields=( + provider_specific_fields + if provider_specific_fields + else None + ), + thinking_blocks=( + thinking_blocks if thinking_blocks else None + ), + reasoning_content=reasoning_content, + ), + finish_reason=finish_reason, + ) + ], usage=usage, - index=index, - provider_specific_fields=( - provider_specific_fields if provider_specific_fields else None - ), ) return returned_chunk @@ -769,7 +816,7 @@ class ModelResponseIterator: except ValueError as e: raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}") - def convert_str_chunk_to_generic_chunk(self, chunk: str) -> GenericStreamingChunk: + def convert_str_chunk_to_generic_chunk(self, chunk: str) -> ModelResponseStream: """ Convert a string chunk to a GenericStreamingChunk @@ -789,11 +836,4 @@ class ModelResponseIterator: data_json = json.loads(str_line[5:]) return self.chunk_parser(chunk=data_json) else: - return GenericStreamingChunk( - text="", - is_finished=False, - finish_reason="", - usage=None, - index=0, - tool_use=None, - ) + return ModelResponseStream() diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py index 580b65f77f..383c1cd3e5 100644 --- a/litellm/llms/anthropic/chat/transformation.py +++ b/litellm/llms/anthropic/chat/transformation.py @@ -23,6 +23,7 @@ from litellm.types.llms.openai import ( AllMessageValues, ChatCompletionCachedContent, ChatCompletionSystemMessage, + ChatCompletionThinkingBlock, ChatCompletionToolCallChunk, ChatCompletionToolCallFunctionChunk, ChatCompletionToolParam, @@ -80,7 +81,7 @@ class AnthropicConfig(BaseConfig): return super().get_config() def get_supported_openai_params(self, model: str): - return [ + params = [ "stream", "stop", "temperature", @@ -95,6 +96,11 @@ class AnthropicConfig(BaseConfig): "user", ] + if "claude-3-7-sonnet" in model: + params.append("thinking") + + return params + def get_json_schema_from_pydantic_object( self, response_format: Union[Any, Dict, None] ) -> Optional[dict]: @@ -117,15 +123,16 @@ class AnthropicConfig(BaseConfig): prompt_caching_set: bool = False, pdf_used: bool = False, is_vertex_request: bool = False, + user_anthropic_beta_headers: Optional[List[str]] = None, ) -> dict: - betas = [] + betas = set() if prompt_caching_set: - betas.append("prompt-caching-2024-07-31") + betas.add("prompt-caching-2024-07-31") if computer_tool_used: - betas.append("computer-use-2024-10-22") + betas.add("computer-use-2024-10-22") if pdf_used: - betas.append("pdfs-2024-09-25") + betas.add("pdfs-2024-09-25") headers = { "anthropic-version": anthropic_version or "2023-06-01", "x-api-key": api_key, @@ -133,6 +140,9 @@ class AnthropicConfig(BaseConfig): "content-type": "application/json", } + if user_anthropic_beta_headers is not None: + betas.update(user_anthropic_beta_headers) + # Don't send any beta headers to Vertex, Vertex has failed requests when they are sent if is_vertex_request is True: pass @@ -283,18 +293,6 @@ class AnthropicConfig(BaseConfig): new_stop = new_v return new_stop - def _add_tools_to_optional_params( - self, optional_params: dict, tools: List[AllAnthropicToolsValues] - ) -> dict: - if "tools" not in optional_params: - optional_params["tools"] = tools - else: - optional_params["tools"] = [ - *optional_params["tools"], - *tools, - ] - return optional_params - def map_openai_params( self, non_default_params: dict, @@ -335,6 +333,10 @@ class AnthropicConfig(BaseConfig): optional_params["top_p"] = value if param == "response_format" and isinstance(value, dict): + ignore_response_format_types = ["text"] + if value["type"] in ignore_response_format_types: # value is a no-op + continue + json_schema: Optional[dict] = None if "response_schema" in value: json_schema = value["response_schema"] @@ -358,7 +360,8 @@ class AnthropicConfig(BaseConfig): optional_params["json_mode"] = True if param == "user": optional_params["metadata"] = {"user_id": value} - + if param == "thinking": + optional_params["thinking"] = value return optional_params def _create_json_tool_call_for_response_format( @@ -584,12 +587,14 @@ class AnthropicConfig(BaseConfig): def extract_response_content(self, completion_response: dict) -> Tuple[ str, Optional[List[Any]], - Optional[List[Dict[str, Any]]], + Optional[List[ChatCompletionThinkingBlock]], + Optional[str], List[ChatCompletionToolCallChunk], ]: text_content = "" citations: Optional[List[Any]] = None - thinking_blocks: Optional[List[Dict[str, Any]]] = None + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None + reasoning_content: Optional[str] = None tool_calls: List[ChatCompletionToolCallChunk] = [] for idx, content in enumerate(completion_response["content"]): if content["type"] == "text": @@ -615,8 +620,13 @@ class AnthropicConfig(BaseConfig): if content.get("thinking", None) is not None: if thinking_blocks is None: thinking_blocks = [] - thinking_blocks.append(content) - return text_content, citations, thinking_blocks, tool_calls + thinking_blocks.append(cast(ChatCompletionThinkingBlock, content)) + if thinking_blocks is not None: + reasoning_content = "" + for block in thinking_blocks: + if "thinking" in block: + reasoning_content += block["thinking"] + return text_content, citations, thinking_blocks, reasoning_content, tool_calls def transform_response( self, @@ -666,10 +676,11 @@ class AnthropicConfig(BaseConfig): else: text_content = "" citations: Optional[List[Any]] = None - thinking_blocks: Optional[List[Dict[str, Any]]] = None + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None + reasoning_content: Optional[str] = None tool_calls: List[ChatCompletionToolCallChunk] = [] - text_content, citations, thinking_blocks, tool_calls = ( + text_content, citations, thinking_blocks, reasoning_content, tool_calls = ( self.extract_response_content(completion_response=completion_response) ) @@ -680,6 +691,8 @@ class AnthropicConfig(BaseConfig): "citations": citations, "thinking_blocks": thinking_blocks, }, + thinking_blocks=thinking_blocks, + reasoning_content=reasoning_content, ) ## HANDLE JSON MODE - anthropic returns single function call @@ -774,6 +787,13 @@ class AnthropicConfig(BaseConfig): headers=cast(httpx.Headers, headers), ) + def _get_user_anthropic_beta_headers( + self, anthropic_beta_header: Optional[str] + ) -> Optional[List[str]]: + if anthropic_beta_header is None: + return None + return anthropic_beta_header.split(",") + def validate_environment( self, headers: dict, @@ -794,13 +814,18 @@ class AnthropicConfig(BaseConfig): prompt_caching_set = self.is_cache_control_set(messages=messages) computer_tool_used = self.is_computer_tool_used(tools=tools) pdf_used = self.is_pdf_used(messages=messages) + user_anthropic_beta_headers = self._get_user_anthropic_beta_headers( + anthropic_beta_header=headers.get("anthropic-beta") + ) anthropic_headers = self.get_anthropic_headers( computer_tool_used=computer_tool_used, prompt_caching_set=prompt_caching_set, pdf_used=pdf_used, api_key=api_key, is_vertex_request=optional_params.get("is_vertex_request", False), + user_anthropic_beta_headers=user_anthropic_beta_headers, ) headers = {**headers, **anthropic_headers} + return headers diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py new file mode 100644 index 0000000000..a7dfff74d9 --- /dev/null +++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py @@ -0,0 +1,179 @@ +""" +- call /messages on Anthropic API +- Make streaming + non-streaming request - just pass it through direct to Anthropic. No need to do anything special here +- Ensure requests are logged in the DB - stream + non-stream + +""" + +import json +from typing import Any, AsyncIterator, Dict, Optional, Union, cast + +import httpx + +import litellm +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.llms.base_llm.anthropic_messages.transformation import ( + BaseAnthropicMessagesConfig, +) +from litellm.llms.custom_httpx.http_handler import ( + AsyncHTTPHandler, + get_async_httpx_client, +) +from litellm.types.router import GenericLiteLLMParams +from litellm.types.utils import ProviderSpecificHeader +from litellm.utils import ProviderConfigManager, client + + +class AnthropicMessagesHandler: + + @staticmethod + async def _handle_anthropic_streaming( + response: httpx.Response, + request_body: dict, + litellm_logging_obj: LiteLLMLoggingObj, + ) -> AsyncIterator: + """Helper function to handle Anthropic streaming responses using the existing logging handlers""" + from datetime import datetime + + from litellm.proxy.pass_through_endpoints.streaming_handler import ( + PassThroughStreamingHandler, + ) + from litellm.proxy.pass_through_endpoints.success_handler import ( + PassThroughEndpointLogging, + ) + from litellm.proxy.pass_through_endpoints.types import EndpointType + + # Create success handler object + passthrough_success_handler_obj = PassThroughEndpointLogging() + + # Use the existing streaming handler for Anthropic + start_time = datetime.now() + return PassThroughStreamingHandler.chunk_processor( + response=response, + request_body=request_body, + litellm_logging_obj=litellm_logging_obj, + endpoint_type=EndpointType.ANTHROPIC, + start_time=start_time, + passthrough_success_handler_obj=passthrough_success_handler_obj, + url_route="/v1/messages", + ) + + +@client +async def anthropic_messages( + api_key: str, + model: str, + stream: bool = False, + api_base: Optional[str] = None, + client: Optional[AsyncHTTPHandler] = None, + custom_llm_provider: Optional[str] = None, + **kwargs, +) -> Union[Dict[str, Any], AsyncIterator]: + """ + Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec + """ + # Use provided client or create a new one + optional_params = GenericLiteLLMParams(**kwargs) + model, _custom_llm_provider, dynamic_api_key, dynamic_api_base = ( + litellm.get_llm_provider( + model=model, + custom_llm_provider=custom_llm_provider, + api_base=optional_params.api_base, + api_key=optional_params.api_key, + ) + ) + anthropic_messages_provider_config: Optional[BaseAnthropicMessagesConfig] = ( + ProviderConfigManager.get_provider_anthropic_messages_config( + model=model, + provider=litellm.LlmProviders(_custom_llm_provider), + ) + ) + if anthropic_messages_provider_config is None: + raise ValueError( + f"Anthropic messages provider config not found for model: {model}" + ) + if client is None or not isinstance(client, AsyncHTTPHandler): + async_httpx_client = get_async_httpx_client( + llm_provider=litellm.LlmProviders.ANTHROPIC + ) + else: + async_httpx_client = client + + litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj", None) + + # Prepare headers + provider_specific_header = cast( + Optional[ProviderSpecificHeader], kwargs.get("provider_specific_header", None) + ) + extra_headers = ( + provider_specific_header.get("extra_headers", {}) + if provider_specific_header + else {} + ) + headers = anthropic_messages_provider_config.validate_environment( + headers=extra_headers or {}, + model=model, + api_key=api_key, + ) + + litellm_logging_obj.update_environment_variables( + model=model, + optional_params=dict(optional_params), + litellm_params={ + "metadata": kwargs.get("metadata", {}), + "preset_cache_key": None, + "stream_response": {}, + **optional_params.model_dump(exclude_unset=True), + }, + custom_llm_provider=_custom_llm_provider, + ) + litellm_logging_obj.model_call_details.update(kwargs) + + # Prepare request body + request_body = kwargs.copy() + request_body = { + k: v + for k, v in request_body.items() + if k + in anthropic_messages_provider_config.get_supported_anthropic_messages_params( + model=model + ) + } + request_body["stream"] = stream + request_body["model"] = model + litellm_logging_obj.stream = stream + + # Make the request + request_url = anthropic_messages_provider_config.get_complete_url( + api_base=api_base, model=model + ) + + litellm_logging_obj.pre_call( + input=[{"role": "user", "content": json.dumps(request_body)}], + api_key="", + additional_args={ + "complete_input_dict": request_body, + "api_base": str(request_url), + "headers": headers, + }, + ) + + response = await async_httpx_client.post( + url=request_url, + headers=headers, + data=json.dumps(request_body), + stream=stream, + ) + response.raise_for_status() + + # used for logging + cost tracking + litellm_logging_obj.model_call_details["httpx_response"] = response + + if stream: + return await AnthropicMessagesHandler._handle_anthropic_streaming( + response=response, + request_body=request_body, + litellm_logging_obj=litellm_logging_obj, + ) + else: + return response.json() diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py new file mode 100644 index 0000000000..e9b598f18d --- /dev/null +++ b/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py @@ -0,0 +1,47 @@ +from typing import Optional + +from litellm.llms.base_llm.anthropic_messages.transformation import ( + BaseAnthropicMessagesConfig, +) + +DEFAULT_ANTHROPIC_API_BASE = "https://api.anthropic.com" +DEFAULT_ANTHROPIC_API_VERSION = "2023-06-01" + + +class AnthropicMessagesConfig(BaseAnthropicMessagesConfig): + def get_supported_anthropic_messages_params(self, model: str) -> list: + return [ + "messages", + "model", + "system", + "max_tokens", + "stop_sequences", + "temperature", + "top_p", + "top_k", + "tools", + "tool_choice", + "thinking", + # TODO: Add Anthropic `metadata` support + # "metadata", + ] + + def get_complete_url(self, api_base: Optional[str], model: str) -> str: + api_base = api_base or DEFAULT_ANTHROPIC_API_BASE + if not api_base.endswith("/v1/messages"): + api_base = f"{api_base}/v1/messages" + return api_base + + def validate_environment( + self, + headers: dict, + model: str, + api_key: Optional[str] = None, + ) -> dict: + if "x-api-key" not in headers: + headers["x-api-key"] = api_key + if "anthropic-version" not in headers: + headers["anthropic-version"] = DEFAULT_ANTHROPIC_API_VERSION + if "content-type" not in headers: + headers["content-type"] = "application/json" + return headers diff --git a/litellm/llms/anthropic/experimental_pass_through/transformation.py b/litellm/llms/anthropic/experimental_pass_through/transformation.py deleted file mode 100644 index b24cf47ad4..0000000000 --- a/litellm/llms/anthropic/experimental_pass_through/transformation.py +++ /dev/null @@ -1,412 +0,0 @@ -import json -from typing import List, Literal, Optional, Tuple, Union - -from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice - -from litellm.types.llms.anthropic import ( - AllAnthropicToolsValues, - AnthopicMessagesAssistantMessageParam, - AnthropicFinishReason, - AnthropicMessagesRequest, - AnthropicMessagesToolChoice, - AnthropicMessagesUserMessageParam, - AnthropicResponse, - AnthropicResponseContentBlockText, - AnthropicResponseContentBlockToolUse, - AnthropicResponseUsageBlock, - ContentBlockDelta, - ContentJsonBlockDelta, - ContentTextBlockDelta, - MessageBlockDelta, - MessageDelta, - UsageDelta, -) -from litellm.types.llms.openai import ( - AllMessageValues, - ChatCompletionAssistantMessage, - ChatCompletionAssistantToolCall, - ChatCompletionImageObject, - ChatCompletionImageUrlObject, - ChatCompletionRequest, - ChatCompletionSystemMessage, - ChatCompletionTextObject, - ChatCompletionToolCallFunctionChunk, - ChatCompletionToolChoiceFunctionParam, - ChatCompletionToolChoiceObjectParam, - ChatCompletionToolChoiceValues, - ChatCompletionToolMessage, - ChatCompletionToolParam, - ChatCompletionToolParamFunctionChunk, - ChatCompletionUserMessage, -) -from litellm.types.utils import Choices, ModelResponse, Usage - - -class AnthropicExperimentalPassThroughConfig: - def __init__(self): - pass - - ### FOR [BETA] `/v1/messages` endpoint support - - def translatable_anthropic_params(self) -> List: - """ - Which anthropic params, we need to translate to the openai format. - """ - return ["messages", "metadata", "system", "tool_choice", "tools"] - - def translate_anthropic_messages_to_openai( # noqa: PLR0915 - self, - messages: List[ - Union[ - AnthropicMessagesUserMessageParam, - AnthopicMessagesAssistantMessageParam, - ] - ], - ) -> List: - new_messages: List[AllMessageValues] = [] - for m in messages: - user_message: Optional[ChatCompletionUserMessage] = None - tool_message_list: List[ChatCompletionToolMessage] = [] - new_user_content_list: List[ - Union[ChatCompletionTextObject, ChatCompletionImageObject] - ] = [] - ## USER MESSAGE ## - if m["role"] == "user": - ## translate user message - message_content = m.get("content") - if message_content and isinstance(message_content, str): - user_message = ChatCompletionUserMessage( - role="user", content=message_content - ) - elif message_content and isinstance(message_content, list): - for content in message_content: - if content["type"] == "text": - text_obj = ChatCompletionTextObject( - type="text", text=content["text"] - ) - new_user_content_list.append(text_obj) - elif content["type"] == "image": - image_url = ChatCompletionImageUrlObject( - url=f"data:{content['type']};base64,{content['source']}" - ) - image_obj = ChatCompletionImageObject( - type="image_url", image_url=image_url - ) - - new_user_content_list.append(image_obj) - elif content["type"] == "tool_result": - if "content" not in content: - tool_result = ChatCompletionToolMessage( - role="tool", - tool_call_id=content["tool_use_id"], - content="", - ) - tool_message_list.append(tool_result) - elif isinstance(content["content"], str): - tool_result = ChatCompletionToolMessage( - role="tool", - tool_call_id=content["tool_use_id"], - content=content["content"], - ) - tool_message_list.append(tool_result) - elif isinstance(content["content"], list): - for c in content["content"]: - if c["type"] == "text": - tool_result = ChatCompletionToolMessage( - role="tool", - tool_call_id=content["tool_use_id"], - content=c["text"], - ) - tool_message_list.append(tool_result) - elif c["type"] == "image": - image_str = ( - f"data:{c['type']};base64,{c['source']}" - ) - tool_result = ChatCompletionToolMessage( - role="tool", - tool_call_id=content["tool_use_id"], - content=image_str, - ) - tool_message_list.append(tool_result) - - if user_message is not None: - new_messages.append(user_message) - - if len(new_user_content_list) > 0: - new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore - - if len(tool_message_list) > 0: - new_messages.extend(tool_message_list) - - ## ASSISTANT MESSAGE ## - assistant_message_str: Optional[str] = None - tool_calls: List[ChatCompletionAssistantToolCall] = [] - if m["role"] == "assistant": - if isinstance(m["content"], str): - assistant_message_str = m["content"] - elif isinstance(m["content"], list): - for content in m["content"]: - if content["type"] == "text": - if assistant_message_str is None: - assistant_message_str = content["text"] - else: - assistant_message_str += content["text"] - elif content["type"] == "tool_use": - function_chunk = ChatCompletionToolCallFunctionChunk( - name=content["name"], - arguments=json.dumps(content["input"]), - ) - - tool_calls.append( - ChatCompletionAssistantToolCall( - id=content["id"], - type="function", - function=function_chunk, - ) - ) - - if assistant_message_str is not None or len(tool_calls) > 0: - assistant_message = ChatCompletionAssistantMessage( - role="assistant", - content=assistant_message_str, - ) - if len(tool_calls) > 0: - assistant_message["tool_calls"] = tool_calls - new_messages.append(assistant_message) - - return new_messages - - def translate_anthropic_tool_choice_to_openai( - self, tool_choice: AnthropicMessagesToolChoice - ) -> ChatCompletionToolChoiceValues: - if tool_choice["type"] == "any": - return "required" - elif tool_choice["type"] == "auto": - return "auto" - elif tool_choice["type"] == "tool": - tc_function_param = ChatCompletionToolChoiceFunctionParam( - name=tool_choice.get("name", "") - ) - return ChatCompletionToolChoiceObjectParam( - type="function", function=tc_function_param - ) - else: - raise ValueError( - "Incompatible tool choice param submitted - {}".format(tool_choice) - ) - - def translate_anthropic_tools_to_openai( - self, tools: List[AllAnthropicToolsValues] - ) -> List[ChatCompletionToolParam]: - new_tools: List[ChatCompletionToolParam] = [] - mapped_tool_params = ["name", "input_schema", "description"] - for tool in tools: - function_chunk = ChatCompletionToolParamFunctionChunk( - name=tool["name"], - ) - if "input_schema" in tool: - function_chunk["parameters"] = tool["input_schema"] # type: ignore - if "description" in tool: - function_chunk["description"] = tool["description"] # type: ignore - - for k, v in tool.items(): - if k not in mapped_tool_params: # pass additional computer kwargs - function_chunk.setdefault("parameters", {}).update({k: v}) - new_tools.append( - ChatCompletionToolParam(type="function", function=function_chunk) - ) - - return new_tools - - def translate_anthropic_to_openai( - self, anthropic_message_request: AnthropicMessagesRequest - ) -> ChatCompletionRequest: - """ - This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format. - """ - new_messages: List[AllMessageValues] = [] - - ## CONVERT ANTHROPIC MESSAGES TO OPENAI - new_messages = self.translate_anthropic_messages_to_openai( - messages=anthropic_message_request["messages"] - ) - ## ADD SYSTEM MESSAGE TO MESSAGES - if "system" in anthropic_message_request: - new_messages.insert( - 0, - ChatCompletionSystemMessage( - role="system", content=anthropic_message_request["system"] - ), - ) - - new_kwargs: ChatCompletionRequest = { - "model": anthropic_message_request["model"], - "messages": new_messages, - } - ## CONVERT METADATA (user_id) - if "metadata" in anthropic_message_request: - if "user_id" in anthropic_message_request["metadata"]: - new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"] - - # Pass litellm proxy specific metadata - if "litellm_metadata" in anthropic_message_request: - # metadata will be passed to litellm.acompletion(), it's a litellm_param - new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata") - - ## CONVERT TOOL CHOICE - if "tool_choice" in anthropic_message_request: - new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai( - tool_choice=anthropic_message_request["tool_choice"] - ) - ## CONVERT TOOLS - if "tools" in anthropic_message_request: - new_kwargs["tools"] = self.translate_anthropic_tools_to_openai( - tools=anthropic_message_request["tools"] - ) - - translatable_params = self.translatable_anthropic_params() - for k, v in anthropic_message_request.items(): - if k not in translatable_params: # pass remaining params as is - new_kwargs[k] = v # type: ignore - - return new_kwargs - - def _translate_openai_content_to_anthropic( - self, choices: List[Choices] - ) -> List[ - Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse] - ]: - new_content: List[ - Union[ - AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse - ] - ] = [] - for choice in choices: - if ( - choice.message.tool_calls is not None - and len(choice.message.tool_calls) > 0 - ): - for tool_call in choice.message.tool_calls: - new_content.append( - AnthropicResponseContentBlockToolUse( - type="tool_use", - id=tool_call.id, - name=tool_call.function.name or "", - input=json.loads(tool_call.function.arguments), - ) - ) - elif choice.message.content is not None: - new_content.append( - AnthropicResponseContentBlockText( - type="text", text=choice.message.content - ) - ) - - return new_content - - def _translate_openai_finish_reason_to_anthropic( - self, openai_finish_reason: str - ) -> AnthropicFinishReason: - if openai_finish_reason == "stop": - return "end_turn" - elif openai_finish_reason == "length": - return "max_tokens" - elif openai_finish_reason == "tool_calls": - return "tool_use" - return "end_turn" - - def translate_openai_response_to_anthropic( - self, response: ModelResponse - ) -> AnthropicResponse: - ## translate content block - anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore - ## extract finish reason - anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic( - openai_finish_reason=response.choices[0].finish_reason # type: ignore - ) - # extract usage - usage: Usage = getattr(response, "usage") - anthropic_usage = AnthropicResponseUsageBlock( - input_tokens=usage.prompt_tokens or 0, - output_tokens=usage.completion_tokens or 0, - ) - translated_obj = AnthropicResponse( - id=response.id, - type="message", - role="assistant", - model=response.model or "unknown-model", - stop_sequence=None, - usage=anthropic_usage, - content=anthropic_content, - stop_reason=anthropic_finish_reason, - ) - - return translated_obj - - def _translate_streaming_openai_chunk_to_anthropic( - self, choices: List[OpenAIStreamingChoice] - ) -> Tuple[ - Literal["text_delta", "input_json_delta"], - Union[ContentTextBlockDelta, ContentJsonBlockDelta], - ]: - text: str = "" - partial_json: Optional[str] = None - for choice in choices: - if choice.delta.content is not None: - text += choice.delta.content - elif choice.delta.tool_calls is not None: - partial_json = "" - for tool in choice.delta.tool_calls: - if ( - tool.function is not None - and tool.function.arguments is not None - ): - partial_json += tool.function.arguments - - if partial_json is not None: - return "input_json_delta", ContentJsonBlockDelta( - type="input_json_delta", partial_json=partial_json - ) - else: - return "text_delta", ContentTextBlockDelta(type="text_delta", text=text) - - def translate_streaming_openai_response_to_anthropic( - self, response: ModelResponse - ) -> Union[ContentBlockDelta, MessageBlockDelta]: - ## base case - final chunk w/ finish reason - if response.choices[0].finish_reason is not None: - delta = MessageDelta( - stop_reason=self._translate_openai_finish_reason_to_anthropic( - response.choices[0].finish_reason - ), - ) - if getattr(response, "usage", None) is not None: - litellm_usage_chunk: Optional[Usage] = response.usage # type: ignore - elif ( - hasattr(response, "_hidden_params") - and "usage" in response._hidden_params - ): - litellm_usage_chunk = response._hidden_params["usage"] - else: - litellm_usage_chunk = None - if litellm_usage_chunk is not None: - usage_delta = UsageDelta( - input_tokens=litellm_usage_chunk.prompt_tokens or 0, - output_tokens=litellm_usage_chunk.completion_tokens or 0, - ) - else: - usage_delta = UsageDelta(input_tokens=0, output_tokens=0) - return MessageBlockDelta( - type="message_delta", delta=delta, usage=usage_delta - ) - ( - type_of_content, - content_block_delta, - ) = self._translate_streaming_openai_chunk_to_anthropic( - choices=response.choices # type: ignore - ) - return ContentBlockDelta( - type="content_block_delta", - index=response.choices[0].index, - delta=content_block_delta, - ) diff --git a/litellm/llms/azure/batches/handler.py b/litellm/llms/azure/batches/handler.py index 5fae527670..d36ae648ab 100644 --- a/litellm/llms/azure/batches/handler.py +++ b/litellm/llms/azure/batches/handler.py @@ -2,7 +2,7 @@ Azure Batches API Handler """ -from typing import Any, Coroutine, Optional, Union +from typing import Any, Coroutine, Optional, Union, cast import httpx @@ -14,6 +14,7 @@ from litellm.types.llms.openai import ( CreateBatchRequest, RetrieveBatchRequest, ) +from litellm.types.utils import LiteLLMBatch class AzureBatchesAPI: @@ -64,9 +65,9 @@ class AzureBatchesAPI: self, create_batch_data: CreateBatchRequest, azure_client: AsyncAzureOpenAI, - ) -> Batch: + ) -> LiteLLMBatch: response = await azure_client.batches.create(**create_batch_data) - return response + return LiteLLMBatch(**response.model_dump()) def create_batch( self, @@ -78,7 +79,7 @@ class AzureBatchesAPI: timeout: Union[float, httpx.Timeout], max_retries: Optional[int], client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None, - ) -> Union[Batch, Coroutine[Any, Any, Batch]]: + ) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]: azure_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = ( self.get_azure_openai_client( api_key=api_key, @@ -103,16 +104,16 @@ class AzureBatchesAPI: return self.acreate_batch( # type: ignore create_batch_data=create_batch_data, azure_client=azure_client ) - response = azure_client.batches.create(**create_batch_data) - return response + response = cast(AzureOpenAI, azure_client).batches.create(**create_batch_data) + return LiteLLMBatch(**response.model_dump()) async def aretrieve_batch( self, retrieve_batch_data: RetrieveBatchRequest, client: AsyncAzureOpenAI, - ) -> Batch: + ) -> LiteLLMBatch: response = await client.batches.retrieve(**retrieve_batch_data) - return response + return LiteLLMBatch(**response.model_dump()) def retrieve_batch( self, @@ -149,8 +150,10 @@ class AzureBatchesAPI: return self.aretrieve_batch( # type: ignore retrieve_batch_data=retrieve_batch_data, client=azure_client ) - response = azure_client.batches.retrieve(**retrieve_batch_data) - return response + response = cast(AzureOpenAI, azure_client).batches.retrieve( + **retrieve_batch_data + ) + return LiteLLMBatch(**response.model_dump()) async def acancel_batch( self, diff --git a/litellm/llms/azure_ai/chat/transformation.py b/litellm/llms/azure_ai/chat/transformation.py index afedc95001..46a1a6bf9c 100644 --- a/litellm/llms/azure_ai/chat/transformation.py +++ b/litellm/llms/azure_ai/chat/transformation.py @@ -1,4 +1,5 @@ from typing import Any, List, Optional, Tuple, cast +from urllib.parse import urlparse import httpx from httpx import Response @@ -28,16 +29,29 @@ class AzureAIStudioConfig(OpenAIConfig): api_key: Optional[str] = None, api_base: Optional[str] = None, ) -> dict: - if api_base and "services.ai.azure.com" in api_base: + if api_base and self._should_use_api_key_header(api_base): headers["api-key"] = api_key else: headers["Authorization"] = f"Bearer {api_key}" return headers + def _should_use_api_key_header(self, api_base: str) -> bool: + """ + Returns True if the request should use `api-key` header for authentication. + """ + parsed_url = urlparse(api_base) + host = parsed_url.hostname + if host and ( + host.endswith(".services.ai.azure.com") + or host.endswith(".openai.azure.com") + ): + return True + return False + def get_complete_url( self, - api_base: str, + api_base: Optional[str], model: str, optional_params: dict, stream: Optional[bool] = None, @@ -58,6 +72,10 @@ class AzureAIStudioConfig(OpenAIConfig): - A complete URL string, e.g., "https://litellm8397336933.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview" """ + if api_base is None: + raise ValueError( + f"api_base is required for Azure AI Studio. Please set the api_base parameter. Passed `api_base={api_base}`" + ) original_url = httpx.URL(api_base) # Extract api_version or use default diff --git a/litellm/llms/base_llm/anthropic_messages/transformation.py b/litellm/llms/base_llm/anthropic_messages/transformation.py new file mode 100644 index 0000000000..7619ffbbf6 --- /dev/null +++ b/litellm/llms/base_llm/anthropic_messages/transformation.py @@ -0,0 +1,35 @@ +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Optional + +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj + + LiteLLMLoggingObj = _LiteLLMLoggingObj +else: + LiteLLMLoggingObj = Any + + +class BaseAnthropicMessagesConfig(ABC): + @abstractmethod + def validate_environment( + self, + headers: dict, + model: str, + api_key: Optional[str] = None, + ) -> dict: + pass + + @abstractmethod + def get_complete_url(self, api_base: Optional[str], model: str) -> str: + """ + OPTIONAL + + Get the complete url for the request + + Some providers need `model` in `api_base` + """ + return api_base or "" + + @abstractmethod + def get_supported_anthropic_messages_params(self, model: str) -> list: + pass diff --git a/litellm/llms/base_llm/chat/transformation.py b/litellm/llms/base_llm/chat/transformation.py index ac82476a0a..d05b79dd7f 100644 --- a/litellm/llms/base_llm/chat/transformation.py +++ b/litellm/llms/base_llm/chat/transformation.py @@ -111,6 +111,19 @@ class BaseConfig(ABC): """ return False + def _add_tools_to_optional_params(self, optional_params: dict, tools: List) -> dict: + """ + Helper util to add tools to optional_params. + """ + if "tools" not in optional_params: + optional_params["tools"] = tools + else: + optional_params["tools"] = [ + *optional_params["tools"], + *tools, + ] + return optional_params + def translate_developer_role_to_system_role( self, messages: List[AllMessageValues], @@ -158,6 +171,7 @@ class BaseConfig(ABC): optional_params: dict, value: dict, is_response_format_supported: bool, + enforce_tool_choice: bool = True, ) -> dict: """ Follow similar approach to anthropic - translate to a single tool call. @@ -195,9 +209,11 @@ class BaseConfig(ABC): optional_params.setdefault("tools", []) optional_params["tools"].append(_tool) - optional_params["tool_choice"] = _tool_choice + if enforce_tool_choice: + optional_params["tool_choice"] = _tool_choice + optional_params["json_mode"] = True - else: + elif is_response_format_supported: optional_params["response_format"] = value return optional_params @@ -249,7 +265,7 @@ class BaseConfig(ABC): def get_complete_url( self, - api_base: str, + api_base: Optional[str], model: str, optional_params: dict, stream: Optional[bool] = None, @@ -261,6 +277,8 @@ class BaseConfig(ABC): Some providers need `model` in `api_base` """ + if api_base is None: + raise ValueError("api_base is required") return api_base @abstractmethod @@ -315,6 +333,7 @@ class BaseConfig(ABC): data: dict, messages: list, client: Optional[AsyncHTTPHandler] = None, + json_mode: Optional[bool] = None, ) -> CustomStreamWrapper: raise NotImplementedError @@ -328,6 +347,7 @@ class BaseConfig(ABC): data: dict, messages: list, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + json_mode: Optional[bool] = None, ) -> CustomStreamWrapper: raise NotImplementedError diff --git a/litellm/llms/bedrock/base_aws_llm.py b/litellm/llms/bedrock/base_aws_llm.py index 8158ceab8f..86b47675d4 100644 --- a/litellm/llms/bedrock/base_aws_llm.py +++ b/litellm/llms/bedrock/base_aws_llm.py @@ -2,13 +2,14 @@ import hashlib import json import os from datetime import datetime -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast, get_args import httpx from pydantic import BaseModel from litellm._logging import verbose_logger from litellm.caching.caching import DualCache +from litellm.constants import BEDROCK_INVOKE_PROVIDERS_LITERAL from litellm.litellm_core_utils.dd_tracing import tracer from litellm.secret_managers.main import get_secret @@ -223,6 +224,60 @@ class BaseAWSLLM: # Catch any unexpected errors and return None return None + @staticmethod + def _get_provider_from_model_path( + model_path: str, + ) -> Optional[BEDROCK_INVOKE_PROVIDERS_LITERAL]: + """ + Helper function to get the provider from a model path with format: provider/model-name + + Args: + model_path (str): The model path (e.g., 'llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n' or 'anthropic/model-name') + + Returns: + Optional[str]: The provider name, or None if no valid provider found + """ + parts = model_path.split("/") + if len(parts) >= 1: + provider = parts[0] + if provider in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL): + return cast(BEDROCK_INVOKE_PROVIDERS_LITERAL, provider) + return None + + @staticmethod + def get_bedrock_invoke_provider( + model: str, + ) -> Optional[BEDROCK_INVOKE_PROVIDERS_LITERAL]: + """ + Helper function to get the bedrock provider from the model + + handles 3 scenarions: + 1. model=invoke/anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic` + 2. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic` + 3. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama` + 4. model=us.amazon.nova-pro-v1:0 -> Returns `nova` + """ + if model.startswith("invoke/"): + model = model.replace("invoke/", "", 1) + + _split_model = model.split(".")[0] + if _split_model in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL): + return cast(BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model) + + # If not a known provider, check for pattern with two slashes + provider = BaseAWSLLM._get_provider_from_model_path(model) + if provider is not None: + return provider + + # check if provider == "nova" + if "nova" in model: + return "nova" + else: + for provider in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL): + if provider in model: + return provider + return None + def _get_aws_region_name( self, optional_params: dict, model: Optional[str] = None ) -> str: @@ -499,6 +554,7 @@ class BaseAWSLLM: aws_access_key_id = optional_params.pop("aws_access_key_id", None) aws_session_token = optional_params.pop("aws_session_token", None) aws_region_name = self._get_aws_region_name(optional_params, model) + optional_params.pop("aws_region_name", None) aws_role_name = optional_params.pop("aws_role_name", None) aws_session_name = optional_params.pop("aws_session_name", None) aws_profile_name = optional_params.pop("aws_profile_name", None) diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index 68ae3af478..0b0d55f23d 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -23,6 +23,7 @@ from litellm.types.llms.openai import ( AllMessageValues, ChatCompletionResponseMessage, ChatCompletionSystemMessage, + ChatCompletionThinkingBlock, ChatCompletionToolCallChunk, ChatCompletionToolCallFunctionChunk, ChatCompletionToolParam, @@ -116,6 +117,10 @@ class AmazonConverseConfig(BaseConfig): # only anthropic and mistral support tool choice config. otherwise (E.g. cohere) will fail the call - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html supported_params.append("tool_choice") + if ( + "claude-3-7" in model + ): # [TODO]: move to a 'supports_reasoning_content' param from model cost map + supported_params.append("thinking") return supported_params def map_tool_choice_values( @@ -162,6 +167,7 @@ class AmazonConverseConfig(BaseConfig): self, json_schema: Optional[dict] = None, schema_name: str = "json_tool_call", + description: Optional[str] = None, ) -> ChatCompletionToolParam: """ Handles creating a tool call for getting responses in JSON format. @@ -184,11 +190,15 @@ class AmazonConverseConfig(BaseConfig): else: _input_schema = json_schema + tool_param_function_chunk = ChatCompletionToolParamFunctionChunk( + name=schema_name, parameters=_input_schema + ) + if description: + tool_param_function_chunk["description"] = description + _tool = ChatCompletionToolParam( type="function", - function=ChatCompletionToolParamFunctionChunk( - name=schema_name, parameters=_input_schema - ), + function=tool_param_function_chunk, ) return _tool @@ -201,15 +211,26 @@ class AmazonConverseConfig(BaseConfig): messages: Optional[List[AllMessageValues]] = None, ) -> dict: for param, value in non_default_params.items(): - if param == "response_format": + if param == "response_format" and isinstance(value, dict): + + ignore_response_format_types = ["text"] + if value["type"] in ignore_response_format_types: # value is a no-op + continue + json_schema: Optional[dict] = None schema_name: str = "" + description: Optional[str] = None if "response_schema" in value: json_schema = value["response_schema"] schema_name = "json_tool_call" elif "json_schema" in value: json_schema = value["json_schema"]["schema"] schema_name = value["json_schema"]["name"] + description = value["json_schema"].get("description") + + if "type" in value and value["type"] == "text": + continue + """ Follow similar approach to anthropic - translate to a single tool call. @@ -218,12 +239,14 @@ class AmazonConverseConfig(BaseConfig): - You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool - Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective. """ - _tool_choice = {"name": schema_name, "type": "tool"} _tool = self._create_json_tool_call_for_response_format( json_schema=json_schema, schema_name=schema_name if schema_name != "" else "json_tool_call", + description=description, + ) + optional_params = self._add_tools_to_optional_params( + optional_params=optional_params, tools=[_tool] ) - optional_params["tools"] = [_tool] if litellm.utils.supports_tool_choice( model=model, custom_llm_provider=self.custom_llm_provider ): @@ -249,15 +272,18 @@ class AmazonConverseConfig(BaseConfig): optional_params["temperature"] = value if param == "top_p": optional_params["topP"] = value - if param == "tools": - optional_params["tools"] = value + if param == "tools" and isinstance(value, list): + optional_params = self._add_tools_to_optional_params( + optional_params=optional_params, tools=value + ) if param == "tool_choice": _tool_choice_value = self.map_tool_choice_values( model=model, tool_choice=value, drop_params=drop_params # type: ignore ) if _tool_choice_value is not None: optional_params["tool_choice"] = _tool_choice_value - + if param == "thinking": + optional_params["thinking"] = value return optional_params @overload @@ -545,6 +571,37 @@ class AmazonConverseConfig(BaseConfig): encoding=encoding, ) + def _transform_reasoning_content( + self, reasoning_content_blocks: List[BedrockConverseReasoningContentBlock] + ) -> str: + """ + Extract the reasoning text from the reasoning content blocks + + Ensures deepseek reasoning content compatible output. + """ + reasoning_content_str = "" + for block in reasoning_content_blocks: + if "reasoningText" in block: + reasoning_content_str += block["reasoningText"]["text"] + return reasoning_content_str + + def _transform_thinking_blocks( + self, thinking_blocks: List[BedrockConverseReasoningContentBlock] + ) -> List[ChatCompletionThinkingBlock]: + """Return a consistent format for thinking blocks between Anthropic and Bedrock.""" + thinking_blocks_list: List[ChatCompletionThinkingBlock] = [] + for block in thinking_blocks: + if "reasoningText" in block: + _thinking_block = ChatCompletionThinkingBlock(type="thinking") + _text = block["reasoningText"].get("text") + _signature = block["reasoningText"].get("signature") + if _text is not None: + _thinking_block["thinking"] = _text + if _signature is not None: + _thinking_block["signature"] = _signature + thinking_blocks_list.append(_thinking_block) + return thinking_blocks_list + def _transform_response( self, model: str, @@ -618,6 +675,10 @@ class AmazonConverseConfig(BaseConfig): chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"} content_str = "" tools: List[ChatCompletionToolCallChunk] = [] + reasoningContentBlocks: Optional[List[BedrockConverseReasoningContentBlock]] = ( + None + ) + if message is not None: for idx, content in enumerate(message["content"]): """ @@ -644,8 +705,22 @@ class AmazonConverseConfig(BaseConfig): index=idx, ) tools.append(_tool_response_chunk) - chat_completion_message["content"] = content_str + if "reasoningContent" in content: + if reasoningContentBlocks is None: + reasoningContentBlocks = [] + reasoningContentBlocks.append(content["reasoningContent"]) + if reasoningContentBlocks is not None: + chat_completion_message["provider_specific_fields"] = { + "reasoningContentBlocks": reasoningContentBlocks, + } + chat_completion_message["reasoning_content"] = ( + self._transform_reasoning_content(reasoningContentBlocks) + ) + chat_completion_message["thinking_blocks"] = ( + self._transform_thinking_blocks(reasoningContentBlocks) + ) + chat_completion_message["content"] = content_str if json_mode is True and tools is not None and len(tools) == 1: # to support 'json_schema' logic on bedrock models json_mode_content_str: Optional[str] = tools[0]["function"].get("arguments") diff --git a/litellm/llms/bedrock/chat/invoke_handler.py b/litellm/llms/bedrock/chat/invoke_handler.py index 35173f3c56..27289164f7 100644 --- a/litellm/llms/bedrock/chat/invoke_handler.py +++ b/litellm/llms/bedrock/chat/invoke_handler.py @@ -26,7 +26,6 @@ import httpx # type: ignore import litellm from litellm import verbose_logger -from litellm._logging import print_verbose from litellm.caching.caching import InMemoryCache from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.litellm_core_utils.litellm_logging import Logging @@ -51,13 +50,19 @@ from litellm.llms.custom_httpx.http_handler import ( ) from litellm.types.llms.bedrock import * from litellm.types.llms.openai import ( + ChatCompletionThinkingBlock, ChatCompletionToolCallChunk, ChatCompletionToolCallFunctionChunk, ChatCompletionUsageBlock, ) -from litellm.types.utils import ChatCompletionMessageToolCall, Choices +from litellm.types.utils import ChatCompletionMessageToolCall, Choices, Delta from litellm.types.utils import GenericStreamingChunk as GChunk -from litellm.types.utils import ModelResponse, ModelResponseStream, Usage +from litellm.types.utils import ( + ModelResponse, + ModelResponseStream, + StreamingChoices, + Usage, +) from litellm.utils import CustomStreamWrapper, get_secret from ..base_aws_llm import BaseAWSLLM @@ -212,7 +217,6 @@ async def make_call( api_key="", data=data, messages=messages, - print_verbose=print_verbose, encoding=litellm.encoding, ) # type: ignore completion_stream: Any = MockResponseIterator( @@ -222,6 +226,7 @@ async def make_call( decoder: AWSEventStreamDecoder = AmazonAnthropicClaudeStreamDecoder( model=model, sync_stream=False, + json_mode=json_mode, ) completion_stream = decoder.aiter_bytes( response.aiter_bytes(chunk_size=1024) @@ -298,7 +303,6 @@ def make_sync_call( api_key="", data=data, messages=messages, - print_verbose=print_verbose, encoding=litellm.encoding, ) # type: ignore completion_stream: Any = MockResponseIterator( @@ -308,6 +312,7 @@ def make_sync_call( decoder: AWSEventStreamDecoder = AmazonAnthropicClaudeStreamDecoder( model=model, sync_stream=True, + json_mode=json_mode, ) completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024)) elif bedrock_invoke_provider == "deepseek_r1": @@ -525,7 +530,7 @@ class BedrockLLM(BaseAWSLLM): ].message.tool_calls: _tool_call = {**tool_call.dict(), "index": 0} _tool_calls.append(_tool_call) - delta_obj = litellm.utils.Delta( + delta_obj = Delta( content=getattr( model_response.choices[0].message, "content", None ), @@ -1146,27 +1151,6 @@ class BedrockLLM(BaseAWSLLM): ) return streaming_response - @staticmethod - def get_bedrock_invoke_provider( - model: str, - ) -> Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL]: - """ - Helper function to get the bedrock provider from the model - - handles 2 scenarions: - 1. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic` - 2. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama` - """ - _split_model = model.split(".")[0] - if _split_model in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL): - return cast(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model) - - # If not a known provider, check for pattern with two slashes - provider = BedrockLLM._get_provider_from_model_path(model) - if provider is not None: - return provider - return None - @staticmethod def _get_provider_from_model_path( model_path: str, @@ -1258,14 +1242,40 @@ class AWSEventStreamDecoder: return True return False - def converse_chunk_parser(self, chunk_data: dict) -> GChunk: + def extract_reasoning_content_str( + self, reasoning_content_block: BedrockConverseReasoningContentBlockDelta + ) -> Optional[str]: + if "text" in reasoning_content_block: + return reasoning_content_block["text"] + return None + + def translate_thinking_blocks( + self, thinking_block: BedrockConverseReasoningContentBlockDelta + ) -> Optional[List[ChatCompletionThinkingBlock]]: + """ + Translate the thinking blocks to a string + """ + + thinking_blocks_list: List[ChatCompletionThinkingBlock] = [] + _thinking_block = ChatCompletionThinkingBlock(type="thinking") + if "text" in thinking_block: + _thinking_block["thinking"] = thinking_block["text"] + elif "signature" in thinking_block: + _thinking_block["signature"] = thinking_block["signature"] + _thinking_block["thinking"] = "" # consistent with anthropic response + thinking_blocks_list.append(_thinking_block) + return thinking_blocks_list + + def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream: try: verbose_logger.debug("\n\nRaw Chunk: {}\n\n".format(chunk_data)) text = "" tool_use: Optional[ChatCompletionToolCallChunk] = None - is_finished = False finish_reason = "" usage: Optional[ChatCompletionUsageBlock] = None + provider_specific_fields: dict = {} + reasoning_content: Optional[str] = None + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None index = int(chunk_data.get("contentBlockIndex", 0)) if "start" in chunk_data: @@ -1305,6 +1315,22 @@ class AWSEventStreamDecoder: }, "index": index, } + elif "reasoningContent" in delta_obj: + provider_specific_fields = { + "reasoningContent": delta_obj["reasoningContent"], + } + reasoning_content = self.extract_reasoning_content_str( + delta_obj["reasoningContent"] + ) + thinking_blocks = self.translate_thinking_blocks( + delta_obj["reasoningContent"] + ) + if ( + thinking_blocks + and len(thinking_blocks) > 0 + and reasoning_content is None + ): + reasoning_content = "" # set to non-empty string to ensure consistency with Anthropic elif ( "contentBlockIndex" in chunk_data ): # stop block, no 'start' or 'delta' object @@ -1321,7 +1347,6 @@ class AWSEventStreamDecoder: } elif "stopReason" in chunk_data: finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop")) - is_finished = True elif "usage" in chunk_data: usage = ChatCompletionUsageBlock( prompt_tokens=chunk_data.get("inputTokens", 0), @@ -1329,18 +1354,33 @@ class AWSEventStreamDecoder: total_tokens=chunk_data.get("totalTokens", 0), ) - response = GChunk( - text=text, - tool_use=tool_use, - is_finished=is_finished, - finish_reason=finish_reason, - usage=usage, - index=index, - ) - + model_response_provider_specific_fields = {} if "trace" in chunk_data: trace = chunk_data.get("trace") - response["provider_specific_fields"] = {"trace": trace} + model_response_provider_specific_fields["trace"] = trace + response = ModelResponseStream( + choices=[ + StreamingChoices( + finish_reason=finish_reason, + index=index, + delta=Delta( + content=text, + role="assistant", + tool_calls=[tool_use] if tool_use else None, + provider_specific_fields=( + provider_specific_fields + if provider_specific_fields + else None + ), + thinking_blocks=thinking_blocks, + reasoning_content=reasoning_content, + ), + ) + ], + usage=usage, + provider_specific_fields=model_response_provider_specific_fields, + ) + return response except Exception as e: raise Exception("Received streaming error - {}".format(str(e))) @@ -1474,6 +1514,7 @@ class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder): self, model: str, sync_stream: bool, + json_mode: Optional[bool] = None, ) -> None: """ Child class of AWSEventStreamDecoder that handles the streaming response from the Anthropic family of models @@ -1484,9 +1525,10 @@ class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder): self.anthropic_model_response_iterator = AnthropicModelResponseIterator( streaming_response=None, sync_stream=sync_stream, + json_mode=json_mode, ) - def _chunk_parser(self, chunk_data: dict) -> GChunk: + def _chunk_parser(self, chunk_data: dict) -> ModelResponseStream: return self.anthropic_model_response_iterator.chunk_parser(chunk=chunk_data) diff --git a/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude2_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude2_transformation.py index 085cf0b9ca..d0d06ef2b2 100644 --- a/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude2_transformation.py +++ b/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude2_transformation.py @@ -3,8 +3,10 @@ from typing import Optional import litellm +from .base_invoke_transformation import AmazonInvokeConfig -class AmazonAnthropicConfig: + +class AmazonAnthropicConfig(AmazonInvokeConfig): """ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude @@ -57,9 +59,7 @@ class AmazonAnthropicConfig: and v is not None } - def get_supported_openai_params( - self, - ): + def get_supported_openai_params(self, model: str): return [ "max_tokens", "max_completion_tokens", @@ -69,7 +69,13 @@ class AmazonAnthropicConfig: "stream", ] - def map_openai_params(self, non_default_params: dict, optional_params: dict): + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ): for param, value in non_default_params.items(): if param == "max_tokens" or param == "max_completion_tokens": optional_params["max_tokens_to_sample"] = value diff --git a/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude3_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude3_transformation.py index 09842aef01..0cac339a3c 100644 --- a/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude3_transformation.py +++ b/litellm/llms/bedrock/chat/invoke_transformations/anthropic_claude3_transformation.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, List, Optional import httpx -import litellm +from litellm.llms.anthropic.chat.transformation import AnthropicConfig from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import ( AmazonInvokeConfig, ) @@ -17,7 +17,7 @@ else: LiteLLMLoggingObj = Any -class AmazonAnthropicClaude3Config(AmazonInvokeConfig): +class AmazonAnthropicClaude3Config(AmazonInvokeConfig, AnthropicConfig): """ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude @@ -28,18 +28,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig): anthropic_version: str = "bedrock-2023-05-31" - def get_supported_openai_params(self, model: str): - return [ - "max_tokens", - "max_completion_tokens", - "tools", - "tool_choice", - "stream", - "stop", - "temperature", - "top_p", - "extra_headers", - ] + def get_supported_openai_params(self, model: str) -> List[str]: + return AnthropicConfig.get_supported_openai_params(self, model) def map_openai_params( self, @@ -47,21 +37,14 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig): optional_params: dict, model: str, drop_params: bool, - ): - for param, value in non_default_params.items(): - if param == "max_tokens" or param == "max_completion_tokens": - optional_params["max_tokens"] = value - if param == "tools": - optional_params["tools"] = value - if param == "stream": - optional_params["stream"] = value - if param == "stop": - optional_params["stop_sequences"] = value - if param == "temperature": - optional_params["temperature"] = value - if param == "top_p": - optional_params["top_p"] = value - return optional_params + ) -> dict: + return AnthropicConfig.map_openai_params( + self, + non_default_params, + optional_params, + model, + drop_params, + ) def transform_request( self, @@ -71,7 +54,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig): litellm_params: dict, headers: dict, ) -> dict: - _anthropic_request = litellm.AnthropicConfig().transform_request( + _anthropic_request = AnthropicConfig.transform_request( + self, model=model, messages=messages, optional_params=optional_params, @@ -80,6 +64,7 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig): ) _anthropic_request.pop("model", None) + _anthropic_request.pop("stream", None) if "anthropic_version" not in _anthropic_request: _anthropic_request["anthropic_version"] = self.anthropic_version @@ -99,7 +84,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig): api_key: Optional[str] = None, json_mode: Optional[bool] = None, ) -> ModelResponse: - return litellm.AnthropicConfig().transform_response( + return AnthropicConfig.transform_response( + self, model=model, raw_response=raw_response, model_response=model_response, diff --git a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py index b7d4f0ae6d..a316eb7ea6 100644 --- a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py +++ b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py @@ -73,7 +73,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): def get_complete_url( self, - api_base: str, + api_base: Optional[str], model: str, optional_params: dict, stream: Optional[bool] = None, @@ -461,6 +461,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): data: dict, messages: list, client: Optional[AsyncHTTPHandler] = None, + json_mode: Optional[bool] = None, ) -> CustomStreamWrapper: streaming_response = CustomStreamWrapper( completion_stream=None, @@ -475,6 +476,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): logging_obj=logging_obj, fake_stream=True if "ai21" in api_base else False, bedrock_invoke_provider=self.get_bedrock_invoke_provider(model), + json_mode=json_mode, ), model=model, custom_llm_provider="bedrock", @@ -493,6 +495,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): data: dict, messages: list, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + json_mode: Optional[bool] = None, ) -> CustomStreamWrapper: if client is None or isinstance(client, AsyncHTTPHandler): client = _get_httpx_client(params={}) @@ -509,6 +512,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): logging_obj=logging_obj, fake_stream=True if "ai21" in api_base else False, bedrock_invoke_provider=self.get_bedrock_invoke_provider(model), + json_mode=json_mode, ), model=model, custom_llm_provider="bedrock", @@ -534,7 +538,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): """ Helper function to get the bedrock provider from the model - handles 3 scenarions: + handles 4 scenarios: 1. model=invoke/anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic` 2. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic` 3. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama` @@ -555,6 +559,10 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM): # check if provider == "nova" if "nova" in model: return "nova" + + for provider in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL): + if provider in model: + return provider return None @staticmethod diff --git a/litellm/llms/bedrock/image/image_handler.py b/litellm/llms/bedrock/image/image_handler.py index 4bd63fd21b..59a80b2222 100644 --- a/litellm/llms/bedrock/image/image_handler.py +++ b/litellm/llms/bedrock/image/image_handler.py @@ -10,6 +10,8 @@ import litellm from litellm._logging import verbose_logger from litellm.litellm_core_utils.litellm_logging import Logging as LitellmLogging from litellm.llms.custom_httpx.http_handler import ( + AsyncHTTPHandler, + HTTPHandler, _get_httpx_client, get_async_httpx_client, ) @@ -51,6 +53,7 @@ class BedrockImageGeneration(BaseAWSLLM): aimg_generation: bool = False, api_base: Optional[str] = None, extra_headers: Optional[dict] = None, + client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ): prepared_request = self._prepare_request( model=model, @@ -69,9 +72,15 @@ class BedrockImageGeneration(BaseAWSLLM): logging_obj=logging_obj, prompt=prompt, model_response=model_response, + client=( + client + if client is not None and isinstance(client, AsyncHTTPHandler) + else None + ), ) - client = _get_httpx_client() + if client is None or not isinstance(client, HTTPHandler): + client = _get_httpx_client() try: response = client.post(url=prepared_request.endpoint_url, headers=prepared_request.prepped.headers, data=prepared_request.body) # type: ignore response.raise_for_status() @@ -99,13 +108,14 @@ class BedrockImageGeneration(BaseAWSLLM): logging_obj: LitellmLogging, prompt: str, model_response: ImageResponse, + client: Optional[AsyncHTTPHandler] = None, ) -> ImageResponse: """ Asynchronous handler for bedrock image generation Awaits the response from the bedrock image generation endpoint """ - async_client = get_async_httpx_client( + async_client = client or get_async_httpx_client( llm_provider=litellm.LlmProviders.BEDROCK, params={"timeout": timeout}, ) diff --git a/litellm/llms/cloudflare/chat/transformation.py b/litellm/llms/cloudflare/chat/transformation.py index 1ef6da5a4b..555e3c21f4 100644 --- a/litellm/llms/cloudflare/chat/transformation.py +++ b/litellm/llms/cloudflare/chat/transformation.py @@ -11,6 +11,7 @@ from litellm.llms.base_llm.chat.transformation import ( BaseLLMException, LiteLLMLoggingObj, ) +from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import ( ChatCompletionToolCallChunk, @@ -75,11 +76,16 @@ class CloudflareChatConfig(BaseConfig): def get_complete_url( self, - api_base: str, + api_base: Optional[str], model: str, optional_params: dict, stream: Optional[bool] = None, ) -> str: + if api_base is None: + account_id = get_secret_str("CLOUDFLARE_ACCOUNT_ID") + api_base = ( + f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/" + ) return api_base + model def get_supported_openai_params(self, model: str) -> List[str]: diff --git a/litellm/llms/codestral/completion/transformation.py b/litellm/llms/codestral/completion/transformation.py index 84551cd553..5955e91deb 100644 --- a/litellm/llms/codestral/completion/transformation.py +++ b/litellm/llms/codestral/completion/transformation.py @@ -84,7 +84,9 @@ class CodestralTextCompletionConfig(OpenAITextCompletionConfig): finish_reason = None logprobs = None - chunk_data = chunk_data.replace("data:", "") + chunk_data = ( + litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk_data) or "" + ) chunk_data = chunk_data.strip() if len(chunk_data) == 0 or chunk_data == "[DONE]": return { diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index ebe5308c1c..0459854c4e 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -159,6 +159,7 @@ class BaseLLMHTTPHandler: encoding: Any, api_key: Optional[str] = None, client: Optional[AsyncHTTPHandler] = None, + json_mode: bool = False, ): if client is None: async_httpx_client = get_async_httpx_client( @@ -190,6 +191,7 @@ class BaseLLMHTTPHandler: optional_params=optional_params, litellm_params=litellm_params, encoding=encoding, + json_mode=json_mode, ) def completion( @@ -211,6 +213,7 @@ class BaseLLMHTTPHandler: headers: Optional[dict] = {}, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ): + json_mode: bool = optional_params.pop("json_mode", False) provider_config = ProviderConfigManager.get_provider_chat_config( model=model, provider=litellm.LlmProviders(custom_llm_provider) @@ -286,6 +289,7 @@ class BaseLLMHTTPHandler: else None ), litellm_params=litellm_params, + json_mode=json_mode, ) else: @@ -309,6 +313,7 @@ class BaseLLMHTTPHandler: if client is not None and isinstance(client, AsyncHTTPHandler) else None ), + json_mode=json_mode, ) if stream is True: @@ -327,6 +332,7 @@ class BaseLLMHTTPHandler: data=data, messages=messages, client=client, + json_mode=json_mode, ) completion_stream, headers = self.make_sync_call( provider_config=provider_config, @@ -380,6 +386,7 @@ class BaseLLMHTTPHandler: optional_params=optional_params, litellm_params=litellm_params, encoding=encoding, + json_mode=json_mode, ) def make_sync_call( @@ -453,6 +460,7 @@ class BaseLLMHTTPHandler: litellm_params: dict, fake_stream: bool = False, client: Optional[AsyncHTTPHandler] = None, + json_mode: Optional[bool] = None, ): if provider_config.has_custom_stream_wrapper is True: return provider_config.get_async_custom_stream_wrapper( @@ -464,6 +472,7 @@ class BaseLLMHTTPHandler: data=data, messages=messages, client=client, + json_mode=json_mode, ) completion_stream, _response_headers = await self.make_async_call_stream_helper( @@ -720,7 +729,7 @@ class BaseLLMHTTPHandler: api_base: Optional[str] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ) -> RerankResponse: - + # get config from model, custom llm provider headers = provider_config.validate_environment( api_key=api_key, @@ -864,7 +873,7 @@ class BaseLLMHTTPHandler: elif isinstance(audio_file, bytes): # Assume it's already binary data binary_data = audio_file - elif isinstance(audio_file, io.BufferedReader): + elif isinstance(audio_file, io.BufferedReader) or isinstance(audio_file, io.BytesIO): # Handle file-like objects binary_data = audio_file.read() diff --git a/litellm/llms/databricks/streaming_utils.py b/litellm/llms/databricks/streaming_utils.py index 0deaa06988..2db53df908 100644 --- a/litellm/llms/databricks/streaming_utils.py +++ b/litellm/llms/databricks/streaming_utils.py @@ -89,7 +89,7 @@ class ModelResponseIterator: raise RuntimeError(f"Error receiving chunk from stream: {e}") try: - chunk = chunk.replace("data:", "") + chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or "" chunk = chunk.strip() if len(chunk) > 0: json_chunk = json.loads(chunk) @@ -134,7 +134,7 @@ class ModelResponseIterator: raise RuntimeError(f"Error receiving chunk from stream: {e}") try: - chunk = chunk.replace("data:", "") + chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or "" chunk = chunk.strip() if chunk == "[DONE]": raise StopAsyncIteration diff --git a/litellm/llms/deepseek/chat/transformation.py b/litellm/llms/deepseek/chat/transformation.py index e6704de1a1..747129ddd8 100644 --- a/litellm/llms/deepseek/chat/transformation.py +++ b/litellm/llms/deepseek/chat/transformation.py @@ -34,3 +34,21 @@ class DeepSeekChatConfig(OpenAIGPTConfig): ) # type: ignore dynamic_api_key = api_key or get_secret_str("DEEPSEEK_API_KEY") return api_base, dynamic_api_key + + def get_complete_url( + self, + api_base: Optional[str], + model: str, + optional_params: dict, + stream: Optional[bool] = None, + ) -> str: + """ + If api_base is not provided, use the default DeepSeek /chat/completions endpoint. + """ + if not api_base: + api_base = "https://api.deepseek.com/beta" + + if not api_base.endswith("/chat/completions"): + api_base = f"{api_base}/chat/completions" + + return api_base diff --git a/litellm/llms/fireworks_ai/chat/transformation.py b/litellm/llms/fireworks_ai/chat/transformation.py index d64d7b6d29..1c82f24ac0 100644 --- a/litellm/llms/fireworks_ai/chat/transformation.py +++ b/litellm/llms/fireworks_ai/chat/transformation.py @@ -90,6 +90,11 @@ class FireworksAIConfig(OpenAIGPTConfig): ) -> dict: supported_openai_params = self.get_supported_openai_params(model=model) + is_tools_set = any( + param == "tools" and value is not None + for param, value in non_default_params.items() + ) + for param, value in non_default_params.items(): if param == "tool_choice": if value == "required": @@ -98,18 +103,30 @@ class FireworksAIConfig(OpenAIGPTConfig): else: # pass through the value of tool choice optional_params["tool_choice"] = value - elif ( - param == "response_format" and value.get("type", None) == "json_schema" - ): - optional_params["response_format"] = { - "type": "json_object", - "schema": value["json_schema"]["schema"], - } + elif param == "response_format": + + if ( + is_tools_set + ): # fireworks ai doesn't support tools and response_format together + optional_params = self._add_response_format_to_tools( + optional_params=optional_params, + value=value, + is_response_format_supported=False, + enforce_tool_choice=False, # tools and response_format are both set, don't enforce tool_choice + ) + elif "json_schema" in value: + optional_params["response_format"] = { + "type": "json_object", + "schema": value["json_schema"]["schema"], + } + else: + optional_params["response_format"] = value elif param == "max_completion_tokens": optional_params["max_tokens"] = value elif param in supported_openai_params: if value is not None: optional_params[param] = value + return optional_params def _add_transform_inline_image_block( diff --git a/litellm/llms/gemini/chat/transformation.py b/litellm/llms/gemini/chat/transformation.py index 6aa4cf5b52..fbc1916dcc 100644 --- a/litellm/llms/gemini/chat/transformation.py +++ b/litellm/llms/gemini/chat/transformation.py @@ -114,12 +114,16 @@ class GoogleAIStudioGeminiConfig(VertexGeminiConfig): if element.get("type") == "image_url": img_element = element _image_url: Optional[str] = None + format: Optional[str] = None if isinstance(img_element.get("image_url"), dict): _image_url = img_element["image_url"].get("url") # type: ignore + format = img_element["image_url"].get("format") # type: ignore else: _image_url = img_element.get("image_url") # type: ignore if _image_url and "https://" in _image_url: - image_obj = convert_to_anthropic_image_obj(_image_url) + image_obj = convert_to_anthropic_image_obj( + _image_url, format=format + ) img_element["image_url"] = ( # type: ignore convert_generic_image_chunk_to_openai_image_obj( image_obj diff --git a/litellm/llms/ollama/completion/transformation.py b/litellm/llms/ollama/completion/transformation.py index da981b6afb..283b2a2437 100644 --- a/litellm/llms/ollama/completion/transformation.py +++ b/litellm/llms/ollama/completion/transformation.py @@ -353,7 +353,7 @@ class OllamaConfig(BaseConfig): def get_complete_url( self, - api_base: str, + api_base: Optional[str], model: str, optional_params: dict, stream: Optional[bool] = None, @@ -365,6 +365,8 @@ class OllamaConfig(BaseConfig): Some providers need `model` in `api_base` """ + if api_base is None: + api_base = "http://localhost:11434" if api_base.endswith("/api/generate"): url = api_base else: diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index 1047012c2e..6f421680b4 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -1,7 +1,7 @@ import json import time import uuid -from typing import Any, List, Optional +from typing import Any, List, Optional, Union import aiohttp import httpx @@ -9,7 +9,11 @@ from pydantic import BaseModel import litellm from litellm import verbose_logger -from litellm.llms.custom_httpx.http_handler import get_async_httpx_client +from litellm.llms.custom_httpx.http_handler import ( + AsyncHTTPHandler, + HTTPHandler, + get_async_httpx_client, +) from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig from litellm.types.llms.ollama import OllamaToolCall, OllamaToolCallFunction from litellm.types.llms.openai import ChatCompletionAssistantToolCall @@ -205,6 +209,7 @@ def get_ollama_response( # noqa: PLR0915 api_key: Optional[str] = None, acompletion: bool = False, encoding=None, + client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, ): if api_base.endswith("/api/chat"): url = api_base @@ -301,7 +306,11 @@ def get_ollama_response( # noqa: PLR0915 headers: Optional[dict] = None if api_key is not None: headers = {"Authorization": "Bearer {}".format(api_key)} - response = litellm.module_level_client.post( + + sync_client = litellm.module_level_client + if client is not None and isinstance(client, HTTPHandler): + sync_client = client + response = sync_client.post( url=url, json=data, headers=headers, @@ -508,6 +517,7 @@ async def ollama_async_streaming( verbose_logger.exception( "LiteLLM.ollama(): Exception occured - {}".format(str(e)) ) + raise e async def ollama_acompletion( diff --git a/litellm/llms/openai/chat/gpt_transformation.py b/litellm/llms/openai/chat/gpt_transformation.py index 84a57bbaa6..1f34d63681 100644 --- a/litellm/llms/openai/chat/gpt_transformation.py +++ b/litellm/llms/openai/chat/gpt_transformation.py @@ -20,7 +20,11 @@ from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator from litellm.llms.base_llm.base_utils import BaseLLMModelInfo from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException from litellm.secret_managers.main import get_secret_str -from litellm.types.llms.openai import AllMessageValues +from litellm.types.llms.openai import ( + AllMessageValues, + ChatCompletionImageObject, + ChatCompletionImageUrlObject, +) from litellm.types.utils import ModelResponse, ModelResponseStream from litellm.utils import convert_to_model_response_object @@ -178,6 +182,27 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig): def _transform_messages( self, messages: List[AllMessageValues], model: str ) -> List[AllMessageValues]: + """OpenAI no longer supports image_url as a string, so we need to convert it to a dict""" + for message in messages: + message_content = message.get("content") + if message_content and isinstance(message_content, list): + for content_item in message_content: + if content_item.get("type") == "image_url": + content_item = cast(ChatCompletionImageObject, content_item) + if isinstance(content_item["image_url"], str): + content_item["image_url"] = { + "url": content_item["image_url"], + } + elif isinstance(content_item["image_url"], dict): + litellm_specific_params = {"format"} + new_image_url_obj = ChatCompletionImageUrlObject( + **{ # type: ignore + k: v + for k, v in content_item["image_url"].items() + if k not in litellm_specific_params + } + ) + content_item["image_url"] = new_image_url_obj return messages def transform_request( @@ -263,7 +288,7 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig): def get_complete_url( self, - api_base: str, + api_base: Optional[str], model: str, optional_params: dict, stream: Optional[bool] = None, @@ -274,6 +299,8 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig): Returns: str: The complete URL for the API call. """ + if api_base is None: + api_base = "https://api.openai.com" endpoint = "chat/completions" # Remove trailing slash from api_base if present diff --git a/litellm/llms/openai/chat/o_series_transformation.py b/litellm/llms/openai/chat/o_series_transformation.py index 9e68fca46f..b2ffda6e7d 100644 --- a/litellm/llms/openai/chat/o_series_transformation.py +++ b/litellm/llms/openai/chat/o_series_transformation.py @@ -19,6 +19,7 @@ from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage from litellm.utils import ( supports_function_calling, + supports_parallel_function_calling, supports_response_schema, supports_system_messages, ) @@ -76,14 +77,19 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig): model, custom_llm_provider ) _supports_response_schema = supports_response_schema(model, custom_llm_provider) + _supports_parallel_tool_calls = supports_parallel_function_calling( + model, custom_llm_provider + ) if not _supports_function_calling: non_supported_params.append("tools") non_supported_params.append("tool_choice") - non_supported_params.append("parallel_tool_calls") non_supported_params.append("function_call") non_supported_params.append("functions") + if not _supports_parallel_tool_calls: + non_supported_params.append("parallel_tool_calls") + if not _supports_response_schema: non_supported_params.append("response_format") @@ -146,4 +152,5 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig): ) messages[i] = new_message # Replace the old message with the new one + messages = super()._transform_messages(messages, model) return messages diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index 5465a24945..3fddca53e7 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -37,6 +37,7 @@ from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENT from litellm.types.utils import ( EmbeddingResponse, ImageResponse, + LiteLLMBatch, ModelResponse, ModelResponseStream, ) @@ -1755,9 +1756,9 @@ class OpenAIBatchesAPI(BaseLLM): self, create_batch_data: CreateBatchRequest, openai_client: AsyncOpenAI, - ) -> Batch: + ) -> LiteLLMBatch: response = await openai_client.batches.create(**create_batch_data) - return response + return LiteLLMBatch(**response.model_dump()) def create_batch( self, @@ -1769,7 +1770,7 @@ class OpenAIBatchesAPI(BaseLLM): max_retries: Optional[int], organization: Optional[str], client: Optional[Union[OpenAI, AsyncOpenAI]] = None, - ) -> Union[Batch, Coroutine[Any, Any, Batch]]: + ) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]: openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client( api_key=api_key, api_base=api_base, @@ -1792,17 +1793,18 @@ class OpenAIBatchesAPI(BaseLLM): return self.acreate_batch( # type: ignore create_batch_data=create_batch_data, openai_client=openai_client ) - response = openai_client.batches.create(**create_batch_data) - return response + response = cast(OpenAI, openai_client).batches.create(**create_batch_data) + + return LiteLLMBatch(**response.model_dump()) async def aretrieve_batch( self, retrieve_batch_data: RetrieveBatchRequest, openai_client: AsyncOpenAI, - ) -> Batch: + ) -> LiteLLMBatch: verbose_logger.debug("retrieving batch, args= %s", retrieve_batch_data) response = await openai_client.batches.retrieve(**retrieve_batch_data) - return response + return LiteLLMBatch(**response.model_dump()) def retrieve_batch( self, @@ -1837,8 +1839,8 @@ class OpenAIBatchesAPI(BaseLLM): return self.aretrieve_batch( # type: ignore retrieve_batch_data=retrieve_batch_data, openai_client=openai_client ) - response = openai_client.batches.retrieve(**retrieve_batch_data) - return response + response = cast(OpenAI, openai_client).batches.retrieve(**retrieve_batch_data) + return LiteLLMBatch(**response.model_dump()) async def acancel_batch( self, diff --git a/litellm/llms/openrouter/chat/transformation.py b/litellm/llms/openrouter/chat/transformation.py index 5a4c2ff209..4b95ec87cf 100644 --- a/litellm/llms/openrouter/chat/transformation.py +++ b/litellm/llms/openrouter/chat/transformation.py @@ -6,7 +6,16 @@ Calls done in OpenAI/openai.py as OpenRouter is openai-compatible. Docs: https://openrouter.ai/docs/parameters """ +from typing import Any, AsyncIterator, Iterator, Optional, Union + +import httpx + +from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator +from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm.types.utils import ModelResponse, ModelResponseStream + from ...openai.chat.gpt_transformation import OpenAIGPTConfig +from ..common_utils import OpenRouterException class OpenrouterConfig(OpenAIGPTConfig): @@ -37,3 +46,43 @@ class OpenrouterConfig(OpenAIGPTConfig): extra_body # openai client supports `extra_body` param ) return mapped_openai_params + + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + return OpenRouterException( + message=error_message, + status_code=status_code, + headers=headers, + ) + + def get_model_response_iterator( + self, + streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse], + sync_stream: bool, + json_mode: Optional[bool] = False, + ) -> Any: + return OpenRouterChatCompletionStreamingHandler( + streaming_response=streaming_response, + sync_stream=sync_stream, + json_mode=json_mode, + ) + + +class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator): + + def chunk_parser(self, chunk: dict) -> ModelResponseStream: + try: + new_choices = [] + for choice in chunk["choices"]: + choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning") + new_choices.append(choice) + return ModelResponseStream( + id=chunk["id"], + object="chat.completion.chunk", + created=chunk["created"], + model=chunk["model"], + choices=new_choices, + ) + except Exception as e: + raise e diff --git a/litellm/llms/openrouter/common_utils.py b/litellm/llms/openrouter/common_utils.py new file mode 100644 index 0000000000..96e53a5aae --- /dev/null +++ b/litellm/llms/openrouter/common_utils.py @@ -0,0 +1,5 @@ +from litellm.llms.base_llm.chat.transformation import BaseLLMException + + +class OpenRouterException(BaseLLMException): + pass diff --git a/litellm/llms/replicate/chat/transformation.py b/litellm/llms/replicate/chat/transformation.py index e9934dada8..39aaad6808 100644 --- a/litellm/llms/replicate/chat/transformation.py +++ b/litellm/llms/replicate/chat/transformation.py @@ -138,7 +138,7 @@ class ReplicateConfig(BaseConfig): def get_complete_url( self, - api_base: str, + api_base: Optional[str], model: str, optional_params: dict, stream: Optional[bool] = None, diff --git a/litellm/llms/sagemaker/common_utils.py b/litellm/llms/sagemaker/common_utils.py index 49e4989ff1..9884f420c3 100644 --- a/litellm/llms/sagemaker/common_utils.py +++ b/litellm/llms/sagemaker/common_utils.py @@ -3,6 +3,7 @@ from typing import AsyncIterator, Iterator, List, Optional, Union import httpx +import litellm from litellm import verbose_logger from litellm.llms.base_llm.chat.transformation import BaseLLMException from litellm.types.utils import GenericStreamingChunk as GChunk @@ -78,7 +79,11 @@ class AWSEventStreamDecoder: message = self._parse_message_from_event(event) if message: # remove data: prefix and "\n\n" at the end - message = message.replace("data:", "").replace("\n\n", "") + message = ( + litellm.CustomStreamWrapper._strip_sse_data_from_chunk(message) + or "" + ) + message = message.replace("\n\n", "") # Accumulate JSON data accumulated_json += message @@ -127,7 +132,11 @@ class AWSEventStreamDecoder: if message: verbose_logger.debug("sagemaker parsed chunk bytes %s", message) # remove data: prefix and "\n\n" at the end - message = message.replace("data:", "").replace("\n\n", "") + message = ( + litellm.CustomStreamWrapper._strip_sse_data_from_chunk(message) + or "" + ) + message = message.replace("\n\n", "") # Accumulate JSON data accumulated_json += message diff --git a/litellm/llms/sagemaker/completion/handler.py b/litellm/llms/sagemaker/completion/handler.py index 0a403dc484..4aff5f5d71 100644 --- a/litellm/llms/sagemaker/completion/handler.py +++ b/litellm/llms/sagemaker/completion/handler.py @@ -433,6 +433,10 @@ class SagemakerLLM(BaseAWSLLM): "messages": messages, } prepared_request = await asyncified_prepare_request(**prepared_request_args) + if model_id is not None: # Fixes https://github.com/BerriAI/litellm/issues/8889 + prepared_request.headers.update( + {"X-Amzn-SageMaker-Inference-Component": model_id} + ) completion_stream = await self.make_async_call( api_base=prepared_request.url, headers=prepared_request.headers, # type: ignore @@ -511,7 +515,7 @@ class SagemakerLLM(BaseAWSLLM): # Add model_id as InferenceComponentName header # boto3 doc: https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html prepared_request.headers.update( - {"X-Amzn-SageMaker-Inference-Componen": model_id} + {"X-Amzn-SageMaker-Inference-Component": model_id} ) # make async httpx post request here try: diff --git a/litellm/llms/sambanova/chat.py b/litellm/llms/sambanova/chat.py index 4eea1914ce..abf55d44fb 100644 --- a/litellm/llms/sambanova/chat.py +++ b/litellm/llms/sambanova/chat.py @@ -11,7 +11,7 @@ from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig class SambanovaConfig(OpenAIGPTConfig): """ - Reference: https://community.sambanova.ai/t/create-chat-completion-api/ + Reference: https://docs.sambanova.ai/cloud/api-reference/ Below are the parameters: """ diff --git a/litellm/llms/vertex_ai/batches/handler.py b/litellm/llms/vertex_ai/batches/handler.py index 0274cd5b05..b82268bef6 100644 --- a/litellm/llms/vertex_ai/batches/handler.py +++ b/litellm/llms/vertex_ai/batches/handler.py @@ -9,8 +9,12 @@ from litellm.llms.custom_httpx.http_handler import ( get_async_httpx_client, ) from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM -from litellm.types.llms.openai import Batch, CreateBatchRequest -from litellm.types.llms.vertex_ai import VertexAIBatchPredictionJob +from litellm.types.llms.openai import CreateBatchRequest +from litellm.types.llms.vertex_ai import ( + VERTEX_CREDENTIALS_TYPES, + VertexAIBatchPredictionJob, +) +from litellm.types.utils import LiteLLMBatch from .transformation import VertexAIBatchTransformation @@ -25,12 +29,12 @@ class VertexAIBatchPrediction(VertexLLM): _is_async: bool, create_batch_data: CreateBatchRequest, api_base: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], vertex_project: Optional[str], vertex_location: Optional[str], timeout: Union[float, httpx.Timeout], max_retries: Optional[int], - ) -> Union[Batch, Coroutine[Any, Any, Batch]]: + ) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]: sync_handler = _get_httpx_client() @@ -98,7 +102,7 @@ class VertexAIBatchPrediction(VertexLLM): vertex_batch_request: VertexAIBatchPredictionJob, api_base: str, headers: Dict[str, str], - ) -> Batch: + ) -> LiteLLMBatch: client = get_async_httpx_client( llm_provider=litellm.LlmProviders.VERTEX_AI, ) @@ -130,12 +134,12 @@ class VertexAIBatchPrediction(VertexLLM): _is_async: bool, batch_id: str, api_base: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], vertex_project: Optional[str], vertex_location: Optional[str], timeout: Union[float, httpx.Timeout], max_retries: Optional[int], - ) -> Union[Batch, Coroutine[Any, Any, Batch]]: + ) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]: sync_handler = _get_httpx_client() access_token, project_id = self._ensure_access_token( @@ -196,7 +200,7 @@ class VertexAIBatchPrediction(VertexLLM): self, api_base: str, headers: Dict[str, str], - ) -> Batch: + ) -> LiteLLMBatch: client = get_async_httpx_client( llm_provider=litellm.LlmProviders.VERTEX_AI, ) diff --git a/litellm/llms/vertex_ai/batches/transformation.py b/litellm/llms/vertex_ai/batches/transformation.py index 32cabdcf56..a97f312d48 100644 --- a/litellm/llms/vertex_ai/batches/transformation.py +++ b/litellm/llms/vertex_ai/batches/transformation.py @@ -4,8 +4,9 @@ from typing import Dict from litellm.llms.vertex_ai.common_utils import ( _convert_vertex_datetime_to_openai_datetime, ) -from litellm.types.llms.openai import Batch, BatchJobStatus, CreateBatchRequest +from litellm.types.llms.openai import BatchJobStatus, CreateBatchRequest from litellm.types.llms.vertex_ai import * +from litellm.types.utils import LiteLLMBatch class VertexAIBatchTransformation: @@ -47,8 +48,8 @@ class VertexAIBatchTransformation: @classmethod def transform_vertex_ai_batch_response_to_openai_batch_response( cls, response: VertexBatchPredictionResponse - ) -> Batch: - return Batch( + ) -> LiteLLMBatch: + return LiteLLMBatch( id=cls._get_batch_id_from_vertex_ai_batch_response(response), completion_window="24hrs", created_at=_convert_vertex_datetime_to_openai_datetime( diff --git a/litellm/llms/vertex_ai/common_utils.py b/litellm/llms/vertex_ai/common_utils.py index a412a1f0db..f7149c349a 100644 --- a/litellm/llms/vertex_ai/common_utils.py +++ b/litellm/llms/vertex_ai/common_utils.py @@ -170,6 +170,9 @@ def _build_vertex_schema(parameters: dict): strip_field( parameters, field_name="$schema" ) # 5. Remove $schema - json schema value, not supported by OpenAPI - causes vertex errors. + strip_field( + parameters, field_name="$id" + ) # 6. Remove id - json schema value, not supported by OpenAPI - causes vertex errors. return parameters diff --git a/litellm/llms/vertex_ai/files/handler.py b/litellm/llms/vertex_ai/files/handler.py index 4bae106045..266169cdfb 100644 --- a/litellm/llms/vertex_ai/files/handler.py +++ b/litellm/llms/vertex_ai/files/handler.py @@ -9,6 +9,7 @@ from litellm.integrations.gcs_bucket.gcs_bucket_base import ( ) from litellm.llms.custom_httpx.http_handler import get_async_httpx_client from litellm.types.llms.openai import CreateFileRequest, FileObject +from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES from .transformation import VertexAIFilesTransformation @@ -34,7 +35,7 @@ class VertexAIFilesHandler(GCSBucketBase): self, create_file_data: CreateFileRequest, api_base: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], vertex_project: Optional[str], vertex_location: Optional[str], timeout: Union[float, httpx.Timeout], @@ -70,7 +71,7 @@ class VertexAIFilesHandler(GCSBucketBase): _is_async: bool, create_file_data: CreateFileRequest, api_base: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], vertex_project: Optional[str], vertex_location: Optional[str], timeout: Union[float, httpx.Timeout], diff --git a/litellm/llms/vertex_ai/fine_tuning/handler.py b/litellm/llms/vertex_ai/fine_tuning/handler.py index 8564b8cb69..3cf409c78e 100644 --- a/litellm/llms/vertex_ai/fine_tuning/handler.py +++ b/litellm/llms/vertex_ai/fine_tuning/handler.py @@ -13,6 +13,7 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import Ver from litellm.types.fine_tuning import OpenAIFineTuningHyperparameters from litellm.types.llms.openai import FineTuningJobCreate from litellm.types.llms.vertex_ai import ( + VERTEX_CREDENTIALS_TYPES, FineTuneHyperparameters, FineTuneJobCreate, FineTunesupervisedTuningSpec, @@ -222,7 +223,7 @@ class VertexFineTuningAPI(VertexLLM): create_fine_tuning_job_data: FineTuningJobCreate, vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], api_base: Optional[str], timeout: Union[float, httpx.Timeout], kwargs: Optional[dict] = None, diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py index 8109c8bf61..d6bafc7c60 100644 --- a/litellm/llms/vertex_ai/gemini/transformation.py +++ b/litellm/llms/vertex_ai/gemini/transformation.py @@ -55,10 +55,11 @@ else: LiteLLMLoggingObj = Any -def _process_gemini_image(image_url: str) -> PartType: +def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartType: """ Given an image URL, return the appropriate PartType for Gemini """ + try: # GCS URIs if "gs://" in image_url: @@ -66,25 +67,30 @@ def _process_gemini_image(image_url: str) -> PartType: extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png" extension = extension_with_dot[1:] # Ex: "png" - file_type = get_file_type_from_extension(extension) + if not format: + file_type = get_file_type_from_extension(extension) - # Validate the file type is supported by Gemini - if not is_gemini_1_5_accepted_file_type(file_type): - raise Exception(f"File type not supported by gemini - {file_type}") + # Validate the file type is supported by Gemini + if not is_gemini_1_5_accepted_file_type(file_type): + raise Exception(f"File type not supported by gemini - {file_type}") - mime_type = get_file_mime_type_for_file_type(file_type) + mime_type = get_file_mime_type_for_file_type(file_type) + else: + mime_type = format file_data = FileDataType(mime_type=mime_type, file_uri=image_url) return PartType(file_data=file_data) elif ( "https://" in image_url - and (image_type := _get_image_mime_type_from_url(image_url)) is not None + and (image_type := format or _get_image_mime_type_from_url(image_url)) + is not None ): + file_data = FileDataType(file_uri=image_url, mime_type=image_type) return PartType(file_data=file_data) elif "http://" in image_url or "https://" in image_url or "base64" in image_url: # https links for unsupported mime types and base64 images - image = convert_to_anthropic_image_obj(image_url) + image = convert_to_anthropic_image_obj(image_url, format=format) _blob = BlobType(data=image["data"], mime_type=image["media_type"]) return PartType(inline_data=_blob) raise Exception("Invalid image received - {}".format(image_url)) @@ -159,11 +165,15 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915 elif element["type"] == "image_url": element = cast(ChatCompletionImageObject, element) img_element = element + format: Optional[str] = None if isinstance(img_element["image_url"], dict): image_url = img_element["image_url"]["url"] + format = img_element["image_url"].get("format") else: image_url = img_element["image_url"] - _part = _process_gemini_image(image_url=image_url) + _part = _process_gemini_image( + image_url=image_url, format=format + ) _parts.append(_part) user_content.extend(_parts) elif ( diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index dff63ce148..294939a3c5 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -40,6 +40,7 @@ from litellm.types.llms.openai import ( ChatCompletionUsageBlock, ) from litellm.types.llms.vertex_ai import ( + VERTEX_CREDENTIALS_TYPES, Candidates, ContentType, FunctionCallingConfig, @@ -930,7 +931,7 @@ class VertexLLM(VertexBase): client: Optional[AsyncHTTPHandler] = None, vertex_project: Optional[str] = None, vertex_location: Optional[str] = None, - vertex_credentials: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, gemini_api_key: Optional[str] = None, extra_headers: Optional[dict] = None, ) -> CustomStreamWrapper: @@ -1018,11 +1019,10 @@ class VertexLLM(VertexBase): client: Optional[AsyncHTTPHandler] = None, vertex_project: Optional[str] = None, vertex_location: Optional[str] = None, - vertex_credentials: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, gemini_api_key: Optional[str] = None, extra_headers: Optional[dict] = None, ) -> Union[ModelResponse, CustomStreamWrapper]: - should_use_v1beta1_features = self.is_using_v1beta1_features( optional_params=optional_params ) @@ -1123,7 +1123,7 @@ class VertexLLM(VertexBase): timeout: Optional[Union[float, httpx.Timeout]], vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], gemini_api_key: Optional[str], litellm_params: dict, logger_fn=None, @@ -1408,7 +1408,8 @@ class ModelResponseIterator: return self.chunk_parser(chunk=json_chunk) def handle_accumulated_json_chunk(self, chunk: str) -> GenericStreamingChunk: - message = chunk.replace("data:", "").replace("\n\n", "") + chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or "" + message = chunk.replace("\n\n", "") # Accumulate JSON data self.accumulated_json += message @@ -1431,7 +1432,7 @@ class ModelResponseIterator: def _common_chunk_parsing_logic(self, chunk: str) -> GenericStreamingChunk: try: - chunk = chunk.replace("data:", "") + chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or "" if len(chunk) > 0: """ Check if initial chunk valid json diff --git a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py index bb39fcb1ad..1d5322c08d 100644 --- a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py +++ b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py @@ -11,6 +11,7 @@ from litellm.llms.custom_httpx.http_handler import ( get_async_httpx_client, ) from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM +from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES from litellm.types.utils import ImageResponse @@ -44,7 +45,7 @@ class VertexImageGeneration(VertexLLM): prompt: str, vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], model_response: ImageResponse, logging_obj: Any, model: Optional[ @@ -139,7 +140,7 @@ class VertexImageGeneration(VertexLLM): prompt: str, vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], model_response: litellm.ImageResponse, logging_obj: Any, model: Optional[ diff --git a/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py b/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py index 10c73e815c..18bc72db46 100644 --- a/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py +++ b/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py @@ -9,6 +9,7 @@ from litellm.llms.custom_httpx.http_handler import ( ) from litellm.llms.openai.openai import HttpxBinaryResponseContent from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM +from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES class VertexInput(TypedDict, total=False): @@ -45,7 +46,7 @@ class VertexTextToSpeechAPI(VertexLLM): logging_obj, vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], api_base: Optional[str], timeout: Union[float, httpx.Timeout], model: str, diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py index ad52472130..fb2393631b 100644 --- a/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py +++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py @@ -160,7 +160,8 @@ class VertexAIPartnerModels(VertexBase): url=default_api_base, ) - model = model.split("@")[0] + if "codestral" in model or "mistral" in model: + model = model.split("@")[0] if "codestral" in model and litellm_params.get("text_completion") is True: optional_params["model"] = model diff --git a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py index 0f73db30a0..3ef40703e8 100644 --- a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py +++ b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py @@ -41,7 +41,7 @@ class VertexEmbedding(VertexBase): client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None, vertex_project: Optional[str] = None, vertex_location: Optional[str] = None, - vertex_credentials: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, gemini_api_key: Optional[str] = None, extra_headers: Optional[dict] = None, ) -> EmbeddingResponse: @@ -148,7 +148,7 @@ class VertexEmbedding(VertexBase): client: Optional[AsyncHTTPHandler] = None, vertex_project: Optional[str] = None, vertex_location: Optional[str] = None, - vertex_credentials: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, gemini_api_key: Optional[str] = None, extra_headers: Optional[dict] = None, encoding=None, diff --git a/litellm/llms/vertex_ai/vertex_llm_base.py b/litellm/llms/vertex_ai/vertex_llm_base.py index 71346a2e01..8286cb515f 100644 --- a/litellm/llms/vertex_ai/vertex_llm_base.py +++ b/litellm/llms/vertex_ai/vertex_llm_base.py @@ -12,6 +12,7 @@ from litellm._logging import verbose_logger from litellm.litellm_core_utils.asyncify import asyncify from litellm.llms.base import BaseLLM from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler +from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES from .common_utils import _get_gemini_url, _get_vertex_url, all_gemini_url_modes @@ -34,7 +35,7 @@ class VertexBase(BaseLLM): return vertex_region or "us-central1" def load_auth( - self, credentials: Optional[str], project_id: Optional[str] + self, credentials: Optional[VERTEX_CREDENTIALS_TYPES], project_id: Optional[str] ) -> Tuple[Any, str]: import google.auth as google_auth from google.auth import identity_pool @@ -42,29 +43,36 @@ class VertexBase(BaseLLM): Request, # type: ignore[import-untyped] ) - if credentials is not None and isinstance(credentials, str): + if credentials is not None: import google.oauth2.service_account - verbose_logger.debug( - "Vertex: Loading vertex credentials from %s", credentials - ) - verbose_logger.debug( - "Vertex: checking if credentials is a valid path, os.path.exists(%s)=%s, current dir %s", - credentials, - os.path.exists(credentials), - os.getcwd(), - ) + if isinstance(credentials, str): + verbose_logger.debug( + "Vertex: Loading vertex credentials from %s", credentials + ) + verbose_logger.debug( + "Vertex: checking if credentials is a valid path, os.path.exists(%s)=%s, current dir %s", + credentials, + os.path.exists(credentials), + os.getcwd(), + ) - try: - if os.path.exists(credentials): - json_obj = json.load(open(credentials)) - else: - json_obj = json.loads(credentials) - except Exception: - raise Exception( - "Unable to load vertex credentials from environment. Got={}".format( - credentials + try: + if os.path.exists(credentials): + json_obj = json.load(open(credentials)) + else: + json_obj = json.loads(credentials) + except Exception: + raise Exception( + "Unable to load vertex credentials from environment. Got={}".format( + credentials + ) ) + elif isinstance(credentials, dict): + json_obj = credentials + else: + raise ValueError( + "Invalid credentials type: {}".format(type(credentials)) ) # Check if the JSON object contains Workload Identity Federation configuration @@ -109,7 +117,7 @@ class VertexBase(BaseLLM): def _ensure_access_token( self, - credentials: Optional[str], + credentials: Optional[VERTEX_CREDENTIALS_TYPES], project_id: Optional[str], custom_llm_provider: Literal[ "vertex_ai", "vertex_ai_beta", "gemini" @@ -202,7 +210,7 @@ class VertexBase(BaseLLM): gemini_api_key: Optional[str], vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], stream: Optional[bool], custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"], api_base: Optional[str], @@ -253,7 +261,7 @@ class VertexBase(BaseLLM): async def _ensure_access_token_async( self, - credentials: Optional[str], + credentials: Optional[VERTEX_CREDENTIALS_TYPES], project_id: Optional[str], custom_llm_provider: Literal[ "vertex_ai", "vertex_ai_beta", "gemini" diff --git a/litellm/llms/watsonx/chat/transformation.py b/litellm/llms/watsonx/chat/transformation.py index 208da82ef5..d5e0ed6544 100644 --- a/litellm/llms/watsonx/chat/transformation.py +++ b/litellm/llms/watsonx/chat/transformation.py @@ -80,7 +80,7 @@ class IBMWatsonXChatConfig(IBMWatsonXMixin, OpenAIGPTConfig): def get_complete_url( self, - api_base: str, + api_base: Optional[str], model: str, optional_params: dict, stream: Optional[bool] = None, diff --git a/litellm/llms/watsonx/completion/transformation.py b/litellm/llms/watsonx/completion/transformation.py index ebebbde021..7a4df23944 100644 --- a/litellm/llms/watsonx/completion/transformation.py +++ b/litellm/llms/watsonx/completion/transformation.py @@ -315,7 +315,7 @@ class IBMWatsonXAIConfig(IBMWatsonXMixin, BaseConfig): def get_complete_url( self, - api_base: str, + api_base: Optional[str], model: str, optional_params: dict, stream: Optional[bool] = None, diff --git a/litellm/main.py b/litellm/main.py index ece484f1f2..846a908a8e 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -94,7 +94,7 @@ from litellm.utils import ( read_config_args, supports_httpx_timeout, token_counter, - validate_chat_completion_messages, + validate_and_fix_openai_messages, validate_chat_completion_tool_choice, ) @@ -166,6 +166,7 @@ from .llms.vertex_ai.vertex_model_garden.main import VertexAIModelGardenModels from .llms.vllm.completion import handler as vllm_handler from .llms.watsonx.chat.handler import WatsonXChatHandler from .llms.watsonx.common_utils import IBMWatsonXMixin +from .types.llms.anthropic import AnthropicThinkingParam from .types.llms.openai import ( ChatCompletionAssistantMessage, ChatCompletionAudioParam, @@ -341,6 +342,7 @@ async def acompletion( model_list: Optional[list] = None, # pass in a list of api_base,keys, etc. extra_headers: Optional[dict] = None, # Optional liteLLM function params + thinking: Optional[AnthropicThinkingParam] = None, **kwargs, ) -> Union[ModelResponse, CustomStreamWrapper]: """ @@ -431,6 +433,7 @@ async def acompletion( "reasoning_effort": reasoning_effort, "extra_headers": extra_headers, "acompletion": True, # assuming this is a required parameter + "thinking": thinking, } if custom_llm_provider is None: _, custom_llm_provider, _, _ = get_llm_provider( @@ -800,6 +803,7 @@ def completion( # type: ignore # noqa: PLR0915 api_key: Optional[str] = None, model_list: Optional[list] = None, # pass in a list of api_base,keys, etc. # Optional liteLLM function params + thinking: Optional[AnthropicThinkingParam] = None, **kwargs, ) -> Union[ModelResponse, CustomStreamWrapper]: """ @@ -851,7 +855,7 @@ def completion( # type: ignore # noqa: PLR0915 if model is None: raise ValueError("model param not passed in.") # validate messages - messages = validate_chat_completion_messages(messages=messages) + messages = validate_and_fix_openai_messages(messages=messages) # validate tool_choice tool_choice = validate_chat_completion_tool_choice(tool_choice=tool_choice) ######### unpacking kwargs ##################### @@ -1106,6 +1110,7 @@ def completion( # type: ignore # noqa: PLR0915 parallel_tool_calls=parallel_tool_calls, messages=messages, reasoning_effort=reasoning_effort, + thinking=thinking, **non_default_params, ) @@ -1154,6 +1159,9 @@ def completion( # type: ignore # noqa: PLR0915 prompt_id=prompt_id, prompt_variables=prompt_variables, ssl_verify=ssl_verify, + merge_reasoning_content_in_choices=kwargs.get( + "merge_reasoning_content_in_choices", None + ), ) logging.update_environment_variables( model=model, @@ -2266,23 +2274,22 @@ def completion( # type: ignore # noqa: PLR0915 data = {"model": model, "messages": messages, **optional_params} ## COMPLETION CALL - response = openai_like_chat_completion.completion( + response = base_llm_http_handler.completion( model=model, + stream=stream, messages=messages, - headers=headers, - api_key=api_key, + acompletion=acompletion, api_base=api_base, model_response=model_response, - print_verbose=print_verbose, optional_params=optional_params, litellm_params=litellm_params, - logger_fn=logger_fn, - logging_obj=logging, - acompletion=acompletion, - timeout=timeout, # type: ignore custom_llm_provider="openrouter", - custom_prompt_dict=custom_prompt_dict, + timeout=timeout, + headers=headers, encoding=encoding, + api_key=api_key, + logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements + client=client, ) ## LOGGING logging.post_call( @@ -2848,6 +2855,7 @@ def completion( # type: ignore # noqa: PLR0915 acompletion=acompletion, model_response=model_response, encoding=encoding, + client=client, ) if acompletion is True or optional_params.get("stream", False) is True: return generator @@ -4516,6 +4524,7 @@ def image_generation( # noqa: PLR0915 non_default_params = { k: v for k, v in kwargs.items() if k not in default_params } # model-specific params - pass them straight to the model/provider + optional_params = get_optional_params_image_gen( model=model, n=n, @@ -4527,6 +4536,7 @@ def image_generation( # noqa: PLR0915 custom_llm_provider=custom_llm_provider, **non_default_params, ) + logging: Logging = litellm_logging_obj logging.update_environment_variables( model=model, @@ -4625,6 +4635,7 @@ def image_generation( # noqa: PLR0915 optional_params=optional_params, model_response=model_response, aimg_generation=aimg_generation, + client=client, ) elif custom_llm_provider == "vertex_ai": vertex_ai_project = ( diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 0255a8b890..cb2322752b 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -76,6 +76,44 @@ "supports_system_messages": true, "supports_tool_choice": true }, + "gpt-4.5-preview": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.000075, + "output_cost_per_token": 0.00015, + "input_cost_per_token_batches": 0.0000375, + "output_cost_per_token_batches": 0.000075, + "cache_read_input_token_cost": 0.0000375, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4.5-preview-2025-02-27": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.000075, + "output_cost_per_token": 0.00015, + "input_cost_per_token_batches": 0.0000375, + "output_cost_per_token_batches": 0.000075, + "cache_read_input_token_cost": 0.0000375, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, "gpt-4o-audio-preview": { "max_tokens": 16384, "max_input_tokens": 128000, @@ -1030,9 +1068,9 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000012, - "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.00000121, + "output_cost_per_token": 0.00000484, + "cache_read_input_token_cost": 0.000000605, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -1044,9 +1082,9 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000012, - "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.00000121, + "output_cost_per_token": 0.00000484, + "cache_read_input_token_cost": 0.000000605, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -1409,7 +1447,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "deprecation_date": "2025-03-31", + "deprecation_date": "2025-05-31", "supports_tool_choice": true }, "azure/gpt-3.5-turbo-0125": { @@ -2747,15 +2785,17 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, - "cache_creation_input_token_cost": 0.00000125, - "cache_read_input_token_cost": 0.0000001, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.000004, + "cache_creation_input_token_cost": 0.000001, + "cache_read_input_token_cost": 0.0000008, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, + "supports_vision": true, "tool_use_system_prompt_tokens": 264, "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2025-10-01", @@ -2772,8 +2812,10 @@ "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, + "supports_vision": true, "tool_use_system_prompt_tokens": 264, "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2025-10-01", @@ -2848,6 +2890,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159, "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2025-06-01", @@ -2867,15 +2910,16 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159, "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2025-06-01", "supports_tool_choice": true }, "claude-3-7-sonnet-latest": { - "max_tokens": 8192, + "max_tokens": 128000, "max_input_tokens": 200000, - "max_output_tokens": 8192, + "max_output_tokens": 128000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, "cache_creation_input_token_cost": 0.00000375, @@ -2886,15 +2930,16 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159, "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, "deprecation_date": "2025-06-01", "supports_tool_choice": true }, "claude-3-7-sonnet-20250219": { - "max_tokens": 8192, + "max_tokens": 128000, "max_input_tokens": 200000, - "max_output_tokens": 8192, + "max_output_tokens": 128000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, "cache_creation_input_token_cost": 0.00000375, @@ -2905,9 +2950,10 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159, "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, - "deprecation_date": "2025-06-01", + "deprecation_date": "2026-02-01", "supports_tool_choice": true }, "claude-3-5-sonnet-20241022": { @@ -4119,6 +4165,7 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, + "supports_pdf_input": true, "supports_vision": true, "supports_assistant_prefill": true, "supports_tool_choice": true @@ -4132,6 +4179,7 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, + "supports_pdf_input": true, "supports_vision": true, "supports_assistant_prefill": true, "supports_tool_choice": true @@ -4145,6 +4193,7 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, + "supports_pdf_input": true, "supports_vision": true, "supports_assistant_prefill": true, "supports_tool_choice": true @@ -4158,6 +4207,7 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, + "supports_pdf_input": true, "supports_vision": true, "supports_assistant_prefill": true, "supports_tool_choice": true @@ -4173,6 +4223,7 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, + "supports_pdf_input": true, "supports_vision": true, "tool_use_system_prompt_tokens": 159, "supports_assistant_prefill": true, @@ -4216,6 +4267,7 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, + "supports_pdf_input": true, "supports_assistant_prefill": true, "supports_tool_choice": true }, @@ -4228,6 +4280,7 @@ "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, + "supports_pdf_input": true, "supports_assistant_prefill": true, "supports_tool_choice": true }, @@ -6392,6 +6445,18 @@ "supports_prompt_caching": true, "supports_response_schema": true }, + "eu.amazon.nova-micro-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 300000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000000046, + "output_cost_per_token": 0.000000184, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, "amazon.nova-lite-v1:0": { "max_tokens": 4096, "max_input_tokens": 128000, @@ -6420,6 +6485,20 @@ "supports_prompt_caching": true, "supports_response_schema": true }, + "eu.amazon.nova-lite-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000000078, + "output_cost_per_token": 0.000000312, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, "amazon.nova-pro-v1:0": { "max_tokens": 4096, "max_input_tokens": 300000, @@ -6448,6 +6527,21 @@ "supports_prompt_caching": true, "supports_response_schema": true }, + "eu.amazon.nova-pro-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 300000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000105, + "output_cost_per_token": 0.0000042, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, "anthropic.claude-3-sonnet-20240229-v1:0": { "max_tokens": 4096, "max_input_tokens": 200000, @@ -6459,8 +6553,25 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, + "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_tool_choice": true, + "metadata": { + "notes": "Anthropic via Invoke route does not currently support pdf input." + } + }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { "max_tokens": 4096, "max_input_tokens": 200000, @@ -6472,6 +6583,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "anthropic.claude-3-7-sonnet-20250219-v1:0": { @@ -6499,6 +6611,7 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "supports_pdf_input": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_response_schema": true, @@ -6515,6 +6628,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "anthropic.claude-3-5-haiku-20241022-v1:0": { @@ -6526,6 +6640,7 @@ "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_function_calling": true, "supports_response_schema": true, "supports_prompt_caching": true, @@ -6555,6 +6670,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { @@ -6568,6 +6684,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { @@ -6580,6 +6697,7 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "supports_pdf_input": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_response_schema": true, @@ -6611,6 +6729,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "us.anthropic.claude-3-5-haiku-20241022-v1:0": { @@ -6622,6 +6741,7 @@ "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_function_calling": true, "supports_prompt_caching": true, "supports_response_schema": true, @@ -6651,6 +6771,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { @@ -6664,6 +6785,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { @@ -6676,6 +6798,7 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "supports_pdf_input": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_response_schema": true, @@ -6692,6 +6815,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "eu.anthropic.claude-3-5-haiku-20241022-v1:0": { @@ -6704,6 +6828,7 @@ "mode": "chat", "supports_function_calling": true, "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, "supports_tool_choice": true diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/250-51513f2f6dabf571.js b/litellm/proxy/_experimental/out/_next/static/chunks/250-51513f2f6dabf571.js new file mode 100644 index 0000000000..20672a1112 --- /dev/null +++ b/litellm/proxy/_experimental/out/_next/static/chunks/250-51513f2f6dabf571.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[250],{19250:function(e,t,o){o.d(t,{$I:function(){return D},AZ:function(){return V},Au:function(){return ew},BL:function(){return eS},Br:function(){return C},E9:function(){return eO},EG:function(){return eA},EY:function(){return eR},Eb:function(){return j},FC:function(){return eo},Gh:function(){return ef},H1:function(){return P},H2:function(){return a},I1:function(){return E},It:function(){return S},J$:function(){return Y},K8:function(){return i},K_:function(){return eJ},LY:function(){return eb},Lp:function(){return eT},N8:function(){return X},NV:function(){return m},Nc:function(){return ey},O3:function(){return eF},OD:function(){return eh},OU:function(){return en},Of:function(){return N},Og:function(){return u},Ov:function(){return T},PT:function(){return L},RQ:function(){return k},Rg:function(){return q},Sb:function(){return ej},So:function(){return K},Tj:function(){return eI},VA:function(){return v},Vt:function(){return eP},W_:function(){return A},X:function(){return Q},XO:function(){return g},Xd:function(){return ep},Xm:function(){return F},YU:function(){return ex},Zr:function(){return y},a6:function(){return x},ao:function(){return eG},b1:function(){return ea},cq:function(){return G},cu:function(){return ek},eH:function(){return Z},fP:function(){return H},g:function(){return ez},h3:function(){return et},hT:function(){return em},hy:function(){return p},j2:function(){return W},jA:function(){return ev},jE:function(){return eC},kK:function(){return h},kn:function(){return U},lP:function(){return d},lg:function(){return eu},mR:function(){return $},m_:function(){return J},mp:function(){return eB},n$:function(){return ei},o6:function(){return M},pf:function(){return eN},qI:function(){return f},qk:function(){return eU},qm:function(){return w},r6:function(){return B},rs:function(){return b},s0:function(){return R},sN:function(){return e_},t$:function(){return O},t3:function(){return eV},tN:function(){return er},u5:function(){return ee},um:function(){return eg},v9:function(){return ed},vh:function(){return eE},wX:function(){return _},wd:function(){return ec},xA:function(){return el},zg:function(){return es}});var r=o(41021);let a=null;console.log=function(){};let n=0,c=e=>new Promise(t=>setTimeout(t,e)),s=async e=>{let t=Date.now();t-n>6e4?(e.includes("Authentication Error - Expired Key")&&(r.ZP.info("UI Session Expired. Logging out."),n=t,await c(3e3),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=/;",window.location.href="/"),n=t):console.log("Error suppressed to prevent spam:",e)},l="Authorization";function i(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"Authorization";console.log("setGlobalLitellmHeaderName: ".concat(e)),l=e}let d=async()=>{let e=a?"".concat(a,"/openapi.json"):"/openapi.json",t=await fetch(e);return await t.json()},w=async e=>{try{let t=a?"".concat(a,"/get/litellm_model_cost_map"):"/get/litellm_model_cost_map",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}}),r=await o.json();return console.log("received litellm model cost data: ".concat(r)),r}catch(e){throw console.error("Failed to get model cost map:",e),e}},h=async(e,t)=>{try{let c=a?"".concat(a,"/model/new"):"/model/new",s=await fetch(c,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!s.ok){var o,n;let e=await s.json(),t=(null===(n=e.error)||void 0===n?void 0:null===(o=n.message)||void 0===o?void 0:o.error)||"Network response was not ok";throw r.ZP.error(t),Error(t)}let i=await s.json();return console.log("API Response:",i),r.ZP.destroy(),r.ZP.success("Model ".concat(t.model_name," created successfully"),2),i}catch(e){throw console.error("Failed to create key:",e),e}},p=async e=>{try{let t=a?"".concat(a,"/model/settings"):"/model/settings",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},u=async(e,t)=>{console.log("model_id in model delete call: ".concat(t));try{let o=a?"".concat(a,"/model/delete"):"/model/delete",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},m=async(e,t)=>{if(console.log("budget_id in budget delete call: ".concat(t)),null!=e)try{let o=a?"".concat(a,"/budget/delete"):"/budget/delete",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},y=async(e,t)=>{try{console.log("Form Values in budgetCreateCall:",t),console.log("Form Values after check:",t);let o=a?"".concat(a,"/budget/new"):"/budget/new",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},f=async(e,t)=>{try{console.log("Form Values in budgetUpdateCall:",t),console.log("Form Values after check:",t);let o=a?"".concat(a,"/budget/update"):"/budget/update",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},g=async(e,t)=>{try{let o=a?"".concat(a,"/invitation/new"):"/invitation/new",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},k=async e=>{try{let t=a?"".concat(a,"/alerting/settings"):"/alerting/settings",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},_=async(e,t,o)=>{try{if(console.log("Form Values in keyCreateCall:",o),o.description&&(o.metadata||(o.metadata={}),o.metadata.description=o.description,delete o.description,o.metadata=JSON.stringify(o.metadata)),o.metadata){console.log("formValues.metadata:",o.metadata);try{o.metadata=JSON.parse(o.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",o);let r=a?"".concat(a,"/key/generate"):"/key/generate",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t,...o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error(e)}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},T=async(e,t,o)=>{try{if(console.log("Form Values in keyCreateCall:",o),o.description&&(o.metadata||(o.metadata={}),o.metadata.description=o.description,delete o.description,o.metadata=JSON.stringify(o.metadata)),o.metadata){console.log("formValues.metadata:",o.metadata);try{o.metadata=JSON.parse(o.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",o);let r=a?"".concat(a,"/user/new"):"/user/new",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t,...o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error(e)}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},E=async(e,t)=>{try{let o=a?"".concat(a,"/key/delete"):"/key/delete";console.log("in keyDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:[t]})});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log(n),n}catch(e){throw console.error("Failed to create key:",e),e}},j=async(e,t)=>{try{let o=a?"".concat(a,"/user/delete"):"/user/delete";console.log("in userDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_ids:t})});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log(n),n}catch(e){throw console.error("Failed to delete user(s):",e),e}},b=async(e,t)=>{try{let o=a?"".concat(a,"/team/delete"):"/team/delete";console.log("in teamDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_ids:[t]})});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log(n),n}catch(e){throw console.error("Failed to delete key:",e),e}},N=async function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null,o=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null,r=arguments.length>3&&void 0!==arguments[3]?arguments[3]:null;try{let n=a?"".concat(a,"/user/list"):"/user/list";console.log("in userListCall");let c=new URLSearchParams;if(t&&t.length>0){let e=t.join(",");c.append("user_ids",e)}o&&c.append("page",o.toString()),r&&c.append("page_size",r.toString());let i=c.toString();i&&(n+="?".concat(i));let d=await fetch(n,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!d.ok){let e=await d.text();throw s(e),Error("Network response was not ok")}let w=await d.json();return console.log("/user/list API Response:",w),w}catch(e){throw console.error("Failed to create key:",e),e}},C=async function(e,t,o){let r=arguments.length>3&&void 0!==arguments[3]&&arguments[3],n=arguments.length>4?arguments[4]:void 0,c=arguments.length>5?arguments[5]:void 0;try{let i;if(r){i=a?"".concat(a,"/user/list"):"/user/list";let e=new URLSearchParams;null!=n&&e.append("page",n.toString()),null!=c&&e.append("page_size",c.toString()),i+="?".concat(e.toString())}else i=a?"".concat(a,"/user/info"):"/user/info","Admin"===o||"Admin Viewer"===o||t&&(i+="?user_id=".concat(t));console.log("Requesting user data from:",i);let d=await fetch(i,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!d.ok){let e=await d.text();throw s(e),Error("Network response was not ok")}let w=await d.json();return console.log("API Response:",w),w}catch(e){throw console.error("Failed to fetch user data:",e),e}},F=async(e,t)=>{try{let o=a?"".concat(a,"/team/info"):"/team/info";t&&(o="".concat(o,"?team_id=").concat(t)),console.log("in teamInfoCall");let r=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},S=async function(e,t){let o=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null;try{let r=a?"".concat(a,"/team/list"):"/team/list";console.log("in teamInfoCall");let n=new URLSearchParams;o&&n.append("user_id",o.toString()),t&&n.append("organization_id",t.toString());let c=n.toString();c&&(r+="?".concat(c));let i=await fetch(r,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!i.ok){let e=await i.text();throw s(e),Error("Network response was not ok")}let d=await i.json();return console.log("/team/list API Response:",d),d}catch(e){throw console.error("Failed to create key:",e),e}},x=async e=>{try{let t=a?"".concat(a,"/team/available"):"/team/available";console.log("in availableTeamListCall");let o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log("/team/available_teams API Response:",r),r}catch(e){throw e}},B=async e=>{try{let t=a?"".concat(a,"/organization/list"):"/organization/list",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},O=async(e,t)=>{try{let o=a?"".concat(a,"/organization/info"):"/organization/info";t&&(o="".concat(o,"?organization_id=").concat(t)),console.log("in teamInfoCall");let r=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},P=async(e,t)=>{try{if(console.log("Form Values in organizationCreateCall:",t),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw console.error("Failed to parse metadata:",e),Error("Failed to parse metadata: "+e)}}let o=a?"".concat(a,"/organization/new"):"/organization/new",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},v=async(e,t)=>{try{console.log("Form Values in organizationUpdateCall:",t);let o=a?"".concat(a,"/organization/update"):"/organization/update",r=await fetch(o,{method:"PATCH",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("Update Team Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},G=async(e,t)=>{try{let o=a?"".concat(a,"/organization/delete"):"/organization/delete",r=await fetch(o,{method:"DELETE",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_ids:[t]})});if(!r.ok){let e=await r.text();throw s(e),Error("Error deleting organization: ".concat(e))}return await r.json()}catch(e){throw console.error("Failed to delete organization:",e),e}},A=async e=>{try{let t=a?"".concat(a,"/onboarding/get_token"):"/onboarding/get_token";t+="?invite_link=".concat(e);let o=await fetch(t,{method:"GET",headers:{"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},J=async(e,t,o,r)=>{let n=a?"".concat(a,"/onboarding/claim_token"):"/onboarding/claim_token";try{let a=await fetch(n,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({invitation_link:t,user_id:o,password:r})});if(!a.ok){let e=await a.text();throw s(e),Error("Network response was not ok")}let c=await a.json();return console.log(c),c}catch(e){throw console.error("Failed to delete key:",e),e}},R=async(e,t,o)=>{try{let r=a?"".concat(a,"/key/").concat(t,"/regenerate"):"/key/".concat(t,"/regenerate"),n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify(o)});if(!n.ok){let e=await n.text();throw s(e),Error("Network response was not ok")}let c=await n.json();return console.log("Regenerate key Response:",c),c}catch(e){throw console.error("Failed to regenerate key:",e),e}},I=!1,z=null,V=async(e,t,o)=>{try{let t=a?"".concat(a,"/v2/model/info"):"/v2/model/info",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw e+="error shown=".concat(I),I||(e.includes("No model list passed")&&(e="No Models Exist. Click Add Model to get started."),r.ZP.info(e,10),I=!0,z&&clearTimeout(z),z=setTimeout(()=>{I=!1},1e4)),Error("Network response was not ok")}let n=await o.json();return console.log("modelInfoCall:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},U=async e=>{try{let t=a?"".concat(a,"/model_group/info"):"/model_group/info",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log("modelHubCall:",r),r}catch(e){throw console.error("Failed to create key:",e),e}},L=async e=>{try{let t=a?"".concat(a,"/get/allowed_ips"):"/get/allowed_ips",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw Error("Network response was not ok: ".concat(e))}let r=await o.json();return console.log("getAllowedIPs:",r),r.data}catch(e){throw console.error("Failed to get allowed IPs:",e),e}},Z=async(e,t)=>{try{let o=a?"".concat(a,"/add/allowed_ip"):"/add/allowed_ip",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({ip:t})});if(!r.ok){let e=await r.text();throw Error("Network response was not ok: ".concat(e))}let n=await r.json();return console.log("addAllowedIP:",n),n}catch(e){throw console.error("Failed to add allowed IP:",e),e}},D=async(e,t)=>{try{let o=a?"".concat(a,"/delete/allowed_ip"):"/delete/allowed_ip",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({ip:t})});if(!r.ok){let e=await r.text();throw Error("Network response was not ok: ".concat(e))}let n=await r.json();return console.log("deleteAllowedIP:",n),n}catch(e){throw console.error("Failed to delete allowed IP:",e),e}},M=async(e,t,o,r,n,c,i,d)=>{try{let t=a?"".concat(a,"/model/metrics"):"/model/metrics";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(n,"&endTime=").concat(c,"&api_key=").concat(i,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},q=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/model/streaming_metrics"):"/model/streaming_metrics";t&&(n="".concat(n,"?_selected_model_group=").concat(t,"&startTime=").concat(o,"&endTime=").concat(r));let c=await fetch(n,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!c.ok){let e=await c.text();throw s(e),Error("Network response was not ok")}return await c.json()}catch(e){throw console.error("Failed to create key:",e),e}},H=async(e,t,o,r,n,c,i,d)=>{try{let t=a?"".concat(a,"/model/metrics/slow_responses"):"/model/metrics/slow_responses";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(n,"&endTime=").concat(c,"&api_key=").concat(i,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},X=async(e,t,o,r,n,c,i,d)=>{try{let t=a?"".concat(a,"/model/metrics/exceptions"):"/model/metrics/exceptions";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(n,"&endTime=").concat(c,"&api_key=").concat(i,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},K=async function(e,t,o){let r=arguments.length>3&&void 0!==arguments[3]&&arguments[3];console.log("in /models calls, globalLitellmHeaderName",l);try{let t=a?"".concat(a,"/models"):"/models";!0===r&&(t+="?return_wildcard_routes=True");let o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},$=async e=>{try{let t=a?"".concat(a,"/global/spend/teams"):"/global/spend/teams";console.log("in teamSpendLogsCall:",t);let o=await fetch("".concat(t),{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},Y=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/global/spend/tags"):"/global/spend/tags";t&&o&&(n="".concat(n,"?start_date=").concat(t,"&end_date=").concat(o)),r&&(n+="".concat(n,"&tags=").concat(r.join(","))),console.log("in tagsSpendLogsCall:",n);let c=await fetch("".concat(n),{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},Q=async e=>{try{let t=a?"".concat(a,"/global/spend/all_tag_names"):"/global/spend/all_tag_names";console.log("in global/spend/all_tag_names call",t);let o=await fetch("".concat(t),{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},W=async e=>{try{let t=a?"".concat(a,"/global/all_end_users"):"/global/all_end_users";console.log("in global/all_end_users call",t);let o=await fetch("".concat(t),{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},ee=async(e,t)=>{try{let o=a?"".concat(a,"/user/filter/ui"):"/user/filter/ui";t.get("user_email")&&(o+="?user_email=".concat(t.get("user_email"))),t.get("user_id")&&(o+="?user_id=".concat(t.get("user_id")));let r=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to create key:",e),e}},et=async(e,t,o,r,n,c,i,d,w)=>{try{let h=a?"".concat(a,"/spend/logs/ui"):"/spend/logs/ui",p=new URLSearchParams;t&&p.append("api_key",t),o&&p.append("team_id",o),r&&p.append("request_id",r),n&&p.append("start_date",n),c&&p.append("end_date",c),i&&p.append("page",i.toString()),d&&p.append("page_size",d.toString()),w&&p.append("user_id",w);let u=p.toString();u&&(h+="?".concat(u));let m=await fetch(h,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!m.ok){let e=await m.text();throw s(e),Error("Network response was not ok")}let y=await m.json();return console.log("Spend Logs Response:",y),y}catch(e){throw console.error("Failed to fetch spend logs:",e),e}},eo=async e=>{try{let t=a?"".concat(a,"/global/spend/logs"):"/global/spend/logs",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},er=async e=>{try{let t=a?"".concat(a,"/global/spend/keys?limit=5"):"/global/spend/keys?limit=5",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},ea=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/global/spend/end_users"):"/global/spend/end_users",c="";c=t?JSON.stringify({api_key:t,startTime:o,endTime:r}):JSON.stringify({startTime:o,endTime:r});let i={method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:c},d=await fetch(n,i);if(!d.ok){let e=await d.text();throw s(e),Error("Network response was not ok")}let w=await d.json();return console.log(w),w}catch(e){throw console.error("Failed to create key:",e),e}},en=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/global/spend/provider"):"/global/spend/provider";o&&r&&(n+="?start_date=".concat(o,"&end_date=").concat(r)),t&&(n+="&api_key=".concat(t));let c={method:"GET",headers:{[l]:"Bearer ".concat(e)}},i=await fetch(n,c);if(!i.ok){let e=await i.text();throw s(e),Error("Network response was not ok")}let d=await i.json();return console.log(d),d}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ec=async(e,t,o)=>{try{let r=a?"".concat(a,"/global/activity"):"/global/activity";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let n={method:"GET",headers:{[l]:"Bearer ".concat(e)}},c=await fetch(r,n);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},es=async(e,t,o)=>{try{let r=a?"".concat(a,"/global/activity/cache_hits"):"/global/activity/cache_hits";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let n={method:"GET",headers:{[l]:"Bearer ".concat(e)}},c=await fetch(r,n);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},el=async(e,t,o)=>{try{let r=a?"".concat(a,"/global/activity/model"):"/global/activity/model";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let n={method:"GET",headers:{[l]:"Bearer ".concat(e)}},c=await fetch(r,n);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ei=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/global/activity/exceptions"):"/global/activity/exceptions";t&&o&&(n+="?start_date=".concat(t,"&end_date=").concat(o)),r&&(n+="&model_group=".concat(r));let c={method:"GET",headers:{[l]:"Bearer ".concat(e)}},s=await fetch(n,c);if(!s.ok)throw await s.text(),Error("Network response was not ok");let i=await s.json();return console.log(i),i}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ed=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/global/activity/exceptions/deployment"):"/global/activity/exceptions/deployment";t&&o&&(n+="?start_date=".concat(t,"&end_date=").concat(o)),r&&(n+="&model_group=".concat(r));let c={method:"GET",headers:{[l]:"Bearer ".concat(e)}},s=await fetch(n,c);if(!s.ok)throw await s.text(),Error("Network response was not ok");let i=await s.json();return console.log(i),i}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ew=async e=>{try{let t=a?"".concat(a,"/global/spend/models?limit=5"):"/global/spend/models?limit=5",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},eh=async(e,t,o,r,n)=>{try{let c=a?"".concat(a,"/key/list"):"/key/list";console.log("in keyListCall");let i=new URLSearchParams;o&&i.append("team_id",o.toString()),t&&i.append("organization_id",t.toString()),r&&i.append("page",r.toString()),n&&i.append("size",n.toString()),i.append("return_full_object","true"),i.append("include_team_keys","true");let d=i.toString();d&&(c+="?".concat(d));let w=await fetch(c,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!w.ok){let e=await w.text();throw s(e),Error("Network response was not ok")}let h=await w.json();return console.log("/team/list API Response:",h),h}catch(e){throw console.error("Failed to create key:",e),e}},ep=async(e,t)=>{try{let o=a?"".concat(a,"/user/get_users?role=").concat(t):"/user/get_users?role=".concat(t);console.log("in userGetAllUsersCall:",o);let r=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log(n),n}catch(e){throw console.error("Failed to get requested models:",e),e}},eu=async e=>{try{let t=a?"".concat(a,"/user/available_roles"):"/user/available_roles",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log("response from user/available_role",r),r}catch(e){throw e}},em=async(e,t)=>{try{if(console.log("Form Values in teamCreateCall:",t),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}let o=a?"".concat(a,"/team/new"):"/team/new",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},ey=async(e,t)=>{try{if(console.log("Form Values in keyUpdateCall:",t),t.model_tpm_limit){console.log("formValues.model_tpm_limit:",t.model_tpm_limit);try{t.model_tpm_limit=JSON.parse(t.model_tpm_limit)}catch(e){throw Error("Failed to parse model_tpm_limit: "+e)}}if(t.model_rpm_limit){console.log("formValues.model_rpm_limit:",t.model_rpm_limit);try{t.model_rpm_limit=JSON.parse(t.model_rpm_limit)}catch(e){throw Error("Failed to parse model_rpm_limit: "+e)}}let o=a?"".concat(a,"/key/update"):"/key/update",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("Update key Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},ef=async(e,t)=>{try{console.log("Form Values in teamUpateCall:",t);let o=a?"".concat(a,"/team/update"):"/team/update",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("Update Team Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},eg=async(e,t)=>{try{console.log("Form Values in modelUpateCall:",t);let o=a?"".concat(a,"/model/update"):"/model/update",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error update from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("Update model Response:",n),n}catch(e){throw console.error("Failed to update model:",e),e}},ek=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=a?"".concat(a,"/team/member_add"):"/team/member_add",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,member:o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},e_=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=a?"".concat(a,"/team/member_update"):"/team/member_update",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,role:o.role,user_id:o.user_id})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eT=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=a?"".concat(a,"/team/member_delete"):"/team/member_delete",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,...void 0!==o.user_email&&{user_email:o.user_email},...void 0!==o.user_id&&{user_id:o.user_id}})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eE=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=a?"".concat(a,"/organization/member_add"):"/organization/member_add",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,member:o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error(e)}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create organization member:",e),e}},ej=async(e,t,o)=>{try{console.log("Form Values in organizationMemberDeleteCall:",o);let r=a?"".concat(a,"/organization/member_delete"):"/organization/member_delete",n=await fetch(r,{method:"DELETE",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,user_id:o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to delete organization member:",e),e}},eb=async(e,t,o)=>{try{console.log("Form Values in organizationMemberUpdateCall:",o);let r=a?"".concat(a,"/organization/member_update"):"/organization/member_update",n=await fetch(r,{method:"PATCH",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,...o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to update organization member:",e),e}},eN=async(e,t,o)=>{try{console.log("Form Values in userUpdateUserCall:",t);let r=a?"".concat(a,"/user/update"):"/user/update",n={...t};null!==o&&(n.user_role=o),n=JSON.stringify(n);let c=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:n});if(!c.ok){let e=await c.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let i=await c.json();return console.log("API Response:",i),i}catch(e){throw console.error("Failed to create key:",e),e}},eC=async(e,t)=>{try{let o=a?"".concat(a,"/health/services?service=").concat(t):"/health/services?service=".concat(t);console.log("Checking Slack Budget Alerts service health");let n=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!n.ok){let e=await n.text();throw s(e),Error(e)}let c=await n.json();return r.ZP.success("Test request to ".concat(t," made - check logs/alerts on ").concat(t," to verify")),c}catch(e){throw console.error("Failed to perform health check:",e),e}},eF=async e=>{try{let t=a?"".concat(a,"/budget/list"):"/budget/list",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eS=async(e,t,o)=>{try{let t=a?"".concat(a,"/get/config/callbacks"):"/get/config/callbacks",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},ex=async e=>{try{let t=a?"".concat(a,"/config/list?config_type=general_settings"):"/config/list?config_type=general_settings",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eB=async e=>{try{let t=a?"".concat(a,"/config/pass_through_endpoint"):"/config/pass_through_endpoint",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eO=async(e,t)=>{try{let o=a?"".concat(a,"/config/field/info?field_name=").concat(t):"/config/field/info?field_name=".concat(t),r=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok)throw await r.text(),Error("Network response was not ok");return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eP=async(e,t)=>{try{let o=a?"".concat(a,"/config/pass_through_endpoint"):"/config/pass_through_endpoint",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},ev=async(e,t,o)=>{try{let n=a?"".concat(a,"/config/field/update"):"/config/field/update",c=await fetch(n,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:t,field_value:o,config_type:"general_settings"})});if(!c.ok){let e=await c.text();throw s(e),Error("Network response was not ok")}let i=await c.json();return r.ZP.success("Successfully updated value!"),i}catch(e){throw console.error("Failed to set callbacks:",e),e}},eG=async(e,t)=>{try{let o=a?"".concat(a,"/config/field/delete"):"/config/field/delete",n=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:t,config_type:"general_settings"})});if(!n.ok){let e=await n.text();throw s(e),Error("Network response was not ok")}let c=await n.json();return r.ZP.success("Field reset on proxy"),c}catch(e){throw console.error("Failed to get callbacks:",e),e}},eA=async(e,t)=>{try{let o=a?"".concat(a,"/config/pass_through_endpoint?endpoint_id=").concat(t):"/config/pass_through_endpoint".concat(t),r=await fetch(o,{method:"DELETE",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eJ=async(e,t)=>{try{let o=a?"".concat(a,"/config/update"):"/config/update",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eR=async e=>{try{let t=a?"".concat(a,"/health"):"/health",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to call /health:",e),e}},eI=async e=>{try{let t=a?"".concat(a,"/cache/ping"):"/cache/ping",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error(e)}return await o.json()}catch(e){throw console.error("Failed to call /cache/ping:",e),e}},ez=async e=>{try{let t=a?"".concat(a,"/sso/get/ui_settings"):"/sso/get/ui_settings",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eV=async e=>{try{let t=a?"".concat(a,"/guardrails/list"):"/guardrails/list",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log("Guardrails list response:",r),r}catch(e){throw console.error("Failed to fetch guardrails list:",e),e}},eU=async(e,t,o)=>{try{let r=a?"".concat(a,"/spend/logs/ui/").concat(t,"?start_date=").concat(encodeURIComponent(o)):"/spend/logs/ui/".concat(t,"?start_date=").concat(encodeURIComponent(o));console.log("Fetching log details from:",r);let n=await fetch(r,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!n.ok){let e=await n.text();throw s(e),Error("Network response was not ok")}let c=await n.json();return console.log("Fetched log details:",c),c}catch(e){throw console.error("Failed to fetch log details:",e),e}}}}]); \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/250-fd088aaa064b7d46.js b/litellm/proxy/_experimental/out/_next/static/chunks/250-fd088aaa064b7d46.js deleted file mode 100644 index ad163b3464..0000000000 --- a/litellm/proxy/_experimental/out/_next/static/chunks/250-fd088aaa064b7d46.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[250],{19250:function(e,t,o){o.d(t,{$I:function(){return Z},AZ:function(){return z},Au:function(){return ed},BL:function(){return eF},Br:function(){return N},E9:function(){return eB},EG:function(){return eG},EY:function(){return eJ},Eb:function(){return j},FC:function(){return et},Gh:function(){return ey},H1:function(){return O},H2:function(){return a},I1:function(){return E},It:function(){return F},J$:function(){return $},K8:function(){return i},K_:function(){return eA},LY:function(){return ej},Lp:function(){return e_},N8:function(){return H},NV:function(){return m},Nc:function(){return em},O3:function(){return eC},OD:function(){return ew},OU:function(){return ea},Og:function(){return u},Ov:function(){return T},PT:function(){return U},RQ:function(){return k},Rg:function(){return M},Sb:function(){return eE},So:function(){return X},Tj:function(){return eI},VA:function(){return P},Vt:function(){return eO},W_:function(){return G},X:function(){return Y},XO:function(){return g},Xd:function(){return eh},Xm:function(){return C},YU:function(){return eS},Zr:function(){return y},a6:function(){return S},ao:function(){return ev},b1:function(){return er},cq:function(){return v},cu:function(){return eg},eH:function(){return L},fP:function(){return q},g:function(){return eR},h3:function(){return ee},hT:function(){return eu},hy:function(){return p},j2:function(){return Q},jA:function(){return eP},jE:function(){return eN},kK:function(){return h},kn:function(){return V},lP:function(){return d},lg:function(){return ep},mR:function(){return K},m_:function(){return A},mp:function(){return ex},n$:function(){return el},o6:function(){return D},pf:function(){return eb},qI:function(){return f},qk:function(){return eV},qm:function(){return w},r6:function(){return x},rs:function(){return b},s0:function(){return J},sN:function(){return ek},t$:function(){return B},t3:function(){return ez},tN:function(){return eo},u5:function(){return W},um:function(){return ef},v9:function(){return ei},vh:function(){return eT},wX:function(){return _},wd:function(){return en},xA:function(){return es},zg:function(){return ec}});var r=o(41021);let a=null;console.log=function(){};let n=0,c=e=>new Promise(t=>setTimeout(t,e)),s=async e=>{let t=Date.now();t-n>6e4?(e.includes("Authentication Error - Expired Key")&&(r.ZP.info("UI Session Expired. Logging out."),n=t,await c(3e3),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=/;",window.location.href="/"),n=t):console.log("Error suppressed to prevent spam:",e)},l="Authorization";function i(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"Authorization";console.log("setGlobalLitellmHeaderName: ".concat(e)),l=e}let d=async()=>{let e=a?"".concat(a,"/openapi.json"):"/openapi.json",t=await fetch(e);return await t.json()},w=async e=>{try{let t=a?"".concat(a,"/get/litellm_model_cost_map"):"/get/litellm_model_cost_map",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}}),r=await o.json();return console.log("received litellm model cost data: ".concat(r)),r}catch(e){throw console.error("Failed to get model cost map:",e),e}},h=async(e,t)=>{try{let c=a?"".concat(a,"/model/new"):"/model/new",s=await fetch(c,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!s.ok){var o,n;let e=await s.json(),t=(null===(n=e.error)||void 0===n?void 0:null===(o=n.message)||void 0===o?void 0:o.error)||"Network response was not ok";throw r.ZP.error(t),Error(t)}let i=await s.json();return console.log("API Response:",i),r.ZP.destroy(),r.ZP.success("Model ".concat(t.model_name," created successfully"),2),i}catch(e){throw console.error("Failed to create key:",e),e}},p=async e=>{try{let t=a?"".concat(a,"/model/settings"):"/model/settings",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},u=async(e,t)=>{console.log("model_id in model delete call: ".concat(t));try{let o=a?"".concat(a,"/model/delete"):"/model/delete",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},m=async(e,t)=>{if(console.log("budget_id in budget delete call: ".concat(t)),null!=e)try{let o=a?"".concat(a,"/budget/delete"):"/budget/delete",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({id:t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},y=async(e,t)=>{try{console.log("Form Values in budgetCreateCall:",t),console.log("Form Values after check:",t);let o=a?"".concat(a,"/budget/new"):"/budget/new",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},f=async(e,t)=>{try{console.log("Form Values in budgetUpdateCall:",t),console.log("Form Values after check:",t);let o=a?"".concat(a,"/budget/update"):"/budget/update",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},g=async(e,t)=>{try{let o=a?"".concat(a,"/invitation/new"):"/invitation/new",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},k=async e=>{try{let t=a?"".concat(a,"/alerting/settings"):"/alerting/settings",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},_=async(e,t,o)=>{try{if(console.log("Form Values in keyCreateCall:",o),o.description&&(o.metadata||(o.metadata={}),o.metadata.description=o.description,delete o.description,o.metadata=JSON.stringify(o.metadata)),o.metadata){console.log("formValues.metadata:",o.metadata);try{o.metadata=JSON.parse(o.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",o);let r=a?"".concat(a,"/key/generate"):"/key/generate",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t,...o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error(e)}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},T=async(e,t,o)=>{try{if(console.log("Form Values in keyCreateCall:",o),o.description&&(o.metadata||(o.metadata={}),o.metadata.description=o.description,delete o.description,o.metadata=JSON.stringify(o.metadata)),o.metadata){console.log("formValues.metadata:",o.metadata);try{o.metadata=JSON.parse(o.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}console.log("Form Values after check:",o);let r=a?"".concat(a,"/user/new"):"/user/new",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_id:t,...o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error(e)}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},E=async(e,t)=>{try{let o=a?"".concat(a,"/key/delete"):"/key/delete";console.log("in keyDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({keys:[t]})});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log(n),n}catch(e){throw console.error("Failed to create key:",e),e}},j=async(e,t)=>{try{let o=a?"".concat(a,"/user/delete"):"/user/delete";console.log("in userDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({user_ids:t})});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log(n),n}catch(e){throw console.error("Failed to delete user(s):",e),e}},b=async(e,t)=>{try{let o=a?"".concat(a,"/team/delete"):"/team/delete";console.log("in teamDeleteCall:",t);let r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_ids:[t]})});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log(n),n}catch(e){throw console.error("Failed to delete key:",e),e}},N=async function(e,t,o){let r=arguments.length>3&&void 0!==arguments[3]&&arguments[3],n=arguments.length>4?arguments[4]:void 0,c=arguments.length>5?arguments[5]:void 0;try{let i;if(r){i=a?"".concat(a,"/user/list"):"/user/list";let e=new URLSearchParams;null!=n&&e.append("page",n.toString()),null!=c&&e.append("page_size",c.toString()),i+="?".concat(e.toString())}else i=a?"".concat(a,"/user/info"):"/user/info","Admin"===o||"Admin Viewer"===o||t&&(i+="?user_id=".concat(t));console.log("Requesting user data from:",i);let d=await fetch(i,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!d.ok){let e=await d.text();throw s(e),Error("Network response was not ok")}let w=await d.json();return console.log("API Response:",w),w}catch(e){throw console.error("Failed to fetch user data:",e),e}},C=async(e,t)=>{try{let o=a?"".concat(a,"/team/info"):"/team/info";t&&(o="".concat(o,"?team_id=").concat(t)),console.log("in teamInfoCall");let r=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},F=async function(e,t){let o=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null;try{let r=a?"".concat(a,"/team/list"):"/team/list";console.log("in teamInfoCall");let n=new URLSearchParams;o&&n.append("user_id",o.toString()),t&&n.append("organization_id",t.toString());let c=n.toString();c&&(r+="?".concat(c));let i=await fetch(r,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!i.ok){let e=await i.text();throw s(e),Error("Network response was not ok")}let d=await i.json();return console.log("/team/list API Response:",d),d}catch(e){throw console.error("Failed to create key:",e),e}},S=async e=>{try{let t=a?"".concat(a,"/team/available"):"/team/available";console.log("in availableTeamListCall");let o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log("/team/available_teams API Response:",r),r}catch(e){throw e}},x=async e=>{try{let t=a?"".concat(a,"/organization/list"):"/organization/list",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},B=async(e,t)=>{try{let o=a?"".concat(a,"/organization/info"):"/organization/info";t&&(o="".concat(o,"?organization_id=").concat(t)),console.log("in teamInfoCall");let r=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},O=async(e,t)=>{try{if(console.log("Form Values in organizationCreateCall:",t),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw console.error("Failed to parse metadata:",e),Error("Failed to parse metadata: "+e)}}let o=a?"".concat(a,"/organization/new"):"/organization/new",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},P=async(e,t)=>{try{console.log("Form Values in organizationUpdateCall:",t);let o=a?"".concat(a,"/organization/update"):"/organization/update",r=await fetch(o,{method:"PATCH",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("Update Team Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},v=async(e,t)=>{try{let o=a?"".concat(a,"/organization/delete"):"/organization/delete",r=await fetch(o,{method:"DELETE",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_ids:[t]})});if(!r.ok){let e=await r.text();throw s(e),Error("Error deleting organization: ".concat(e))}return await r.json()}catch(e){throw console.error("Failed to delete organization:",e),e}},G=async e=>{try{let t=a?"".concat(a,"/onboarding/get_token"):"/onboarding/get_token";t+="?invite_link=".concat(e);let o=await fetch(t,{method:"GET",headers:{"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},A=async(e,t,o,r)=>{let n=a?"".concat(a,"/onboarding/claim_token"):"/onboarding/claim_token";try{let a=await fetch(n,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({invitation_link:t,user_id:o,password:r})});if(!a.ok){let e=await a.text();throw s(e),Error("Network response was not ok")}let c=await a.json();return console.log(c),c}catch(e){throw console.error("Failed to delete key:",e),e}},J=async(e,t,o)=>{try{let r=a?"".concat(a,"/key/").concat(t,"/regenerate"):"/key/".concat(t,"/regenerate"),n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify(o)});if(!n.ok){let e=await n.text();throw s(e),Error("Network response was not ok")}let c=await n.json();return console.log("Regenerate key Response:",c),c}catch(e){throw console.error("Failed to regenerate key:",e),e}},I=!1,R=null,z=async(e,t,o)=>{try{let t=a?"".concat(a,"/v2/model/info"):"/v2/model/info",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw e+="error shown=".concat(I),I||(e.includes("No model list passed")&&(e="No Models Exist. Click Add Model to get started."),r.ZP.info(e,10),I=!0,R&&clearTimeout(R),R=setTimeout(()=>{I=!1},1e4)),Error("Network response was not ok")}let n=await o.json();return console.log("modelInfoCall:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},V=async e=>{try{let t=a?"".concat(a,"/model_group/info"):"/model_group/info",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log("modelHubCall:",r),r}catch(e){throw console.error("Failed to create key:",e),e}},U=async e=>{try{let t=a?"".concat(a,"/get/allowed_ips"):"/get/allowed_ips",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw Error("Network response was not ok: ".concat(e))}let r=await o.json();return console.log("getAllowedIPs:",r),r.data}catch(e){throw console.error("Failed to get allowed IPs:",e),e}},L=async(e,t)=>{try{let o=a?"".concat(a,"/add/allowed_ip"):"/add/allowed_ip",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({ip:t})});if(!r.ok){let e=await r.text();throw Error("Network response was not ok: ".concat(e))}let n=await r.json();return console.log("addAllowedIP:",n),n}catch(e){throw console.error("Failed to add allowed IP:",e),e}},Z=async(e,t)=>{try{let o=a?"".concat(a,"/delete/allowed_ip"):"/delete/allowed_ip",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({ip:t})});if(!r.ok){let e=await r.text();throw Error("Network response was not ok: ".concat(e))}let n=await r.json();return console.log("deleteAllowedIP:",n),n}catch(e){throw console.error("Failed to delete allowed IP:",e),e}},D=async(e,t,o,r,n,c,i,d)=>{try{let t=a?"".concat(a,"/model/metrics"):"/model/metrics";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(n,"&endTime=").concat(c,"&api_key=").concat(i,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},M=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/model/streaming_metrics"):"/model/streaming_metrics";t&&(n="".concat(n,"?_selected_model_group=").concat(t,"&startTime=").concat(o,"&endTime=").concat(r));let c=await fetch(n,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!c.ok){let e=await c.text();throw s(e),Error("Network response was not ok")}return await c.json()}catch(e){throw console.error("Failed to create key:",e),e}},q=async(e,t,o,r,n,c,i,d)=>{try{let t=a?"".concat(a,"/model/metrics/slow_responses"):"/model/metrics/slow_responses";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(n,"&endTime=").concat(c,"&api_key=").concat(i,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},H=async(e,t,o,r,n,c,i,d)=>{try{let t=a?"".concat(a,"/model/metrics/exceptions"):"/model/metrics/exceptions";r&&(t="".concat(t,"?_selected_model_group=").concat(r,"&startTime=").concat(n,"&endTime=").concat(c,"&api_key=").concat(i,"&customer=").concat(d));let o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},X=async function(e,t,o){let r=arguments.length>3&&void 0!==arguments[3]&&arguments[3];console.log("in /models calls, globalLitellmHeaderName",l);try{let t=a?"".concat(a,"/models"):"/models";!0===r&&(t+="?return_wildcard_routes=True");let o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to create key:",e),e}},K=async e=>{try{let t=a?"".concat(a,"/global/spend/teams"):"/global/spend/teams";console.log("in teamSpendLogsCall:",t);let o=await fetch("".concat(t),{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},$=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/global/spend/tags"):"/global/spend/tags";t&&o&&(n="".concat(n,"?start_date=").concat(t,"&end_date=").concat(o)),r&&(n+="".concat(n,"&tags=").concat(r.join(","))),console.log("in tagsSpendLogsCall:",n);let c=await fetch("".concat(n),{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to create key:",e),e}},Y=async e=>{try{let t=a?"".concat(a,"/global/spend/all_tag_names"):"/global/spend/all_tag_names";console.log("in global/spend/all_tag_names call",t);let o=await fetch("".concat(t),{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},Q=async e=>{try{let t=a?"".concat(a,"/global/all_end_users"):"/global/all_end_users";console.log("in global/all_end_users call",t);let o=await fetch("".concat(t),{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},W=async(e,t)=>{try{let o=a?"".concat(a,"/user/filter/ui"):"/user/filter/ui";t.get("user_email")&&(o+="?user_email=".concat(t.get("user_email"))),t.get("user_id")&&(o+="?user_id=".concat(t.get("user_id")));let r=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to create key:",e),e}},ee=async(e,t,o,r,n,c,i,d,w,h)=>{try{let p=a?"".concat(a,"/spend/logs/ui"):"/spend/logs/ui",u=new URLSearchParams;t&&u.append("api_key",t),o&&u.append("team_id",o),w&&u.append("min_spend",w.toString()),h&&u.append("max_spend",h.toString()),r&&u.append("request_id",r),n&&u.append("start_date",n),c&&u.append("end_date",c),i&&u.append("page",i.toString()),d&&u.append("page_size",d.toString());let m=u.toString();m&&(p+="?".concat(m));let y=await fetch(p,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!y.ok){let e=await y.text();throw s(e),Error("Network response was not ok")}let f=await y.json();return console.log("Spend Logs Response:",f),f}catch(e){throw console.error("Failed to fetch spend logs:",e),e}},et=async e=>{try{let t=a?"".concat(a,"/global/spend/logs"):"/global/spend/logs",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},eo=async e=>{try{let t=a?"".concat(a,"/global/spend/keys?limit=5"):"/global/spend/keys?limit=5",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},er=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/global/spend/end_users"):"/global/spend/end_users",c="";c=t?JSON.stringify({api_key:t,startTime:o,endTime:r}):JSON.stringify({startTime:o,endTime:r});let i={method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:c},d=await fetch(n,i);if(!d.ok){let e=await d.text();throw s(e),Error("Network response was not ok")}let w=await d.json();return console.log(w),w}catch(e){throw console.error("Failed to create key:",e),e}},ea=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/global/spend/provider"):"/global/spend/provider";o&&r&&(n+="?start_date=".concat(o,"&end_date=").concat(r)),t&&(n+="&api_key=".concat(t));let c={method:"GET",headers:{[l]:"Bearer ".concat(e)}},i=await fetch(n,c);if(!i.ok){let e=await i.text();throw s(e),Error("Network response was not ok")}let d=await i.json();return console.log(d),d}catch(e){throw console.error("Failed to fetch spend data:",e),e}},en=async(e,t,o)=>{try{let r=a?"".concat(a,"/global/activity"):"/global/activity";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let n={method:"GET",headers:{[l]:"Bearer ".concat(e)}},c=await fetch(r,n);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ec=async(e,t,o)=>{try{let r=a?"".concat(a,"/global/activity/cache_hits"):"/global/activity/cache_hits";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let n={method:"GET",headers:{[l]:"Bearer ".concat(e)}},c=await fetch(r,n);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},es=async(e,t,o)=>{try{let r=a?"".concat(a,"/global/activity/model"):"/global/activity/model";t&&o&&(r+="?start_date=".concat(t,"&end_date=").concat(o));let n={method:"GET",headers:{[l]:"Bearer ".concat(e)}},c=await fetch(r,n);if(!c.ok)throw await c.text(),Error("Network response was not ok");let s=await c.json();return console.log(s),s}catch(e){throw console.error("Failed to fetch spend data:",e),e}},el=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/global/activity/exceptions"):"/global/activity/exceptions";t&&o&&(n+="?start_date=".concat(t,"&end_date=").concat(o)),r&&(n+="&model_group=".concat(r));let c={method:"GET",headers:{[l]:"Bearer ".concat(e)}},s=await fetch(n,c);if(!s.ok)throw await s.text(),Error("Network response was not ok");let i=await s.json();return console.log(i),i}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ei=async(e,t,o,r)=>{try{let n=a?"".concat(a,"/global/activity/exceptions/deployment"):"/global/activity/exceptions/deployment";t&&o&&(n+="?start_date=".concat(t,"&end_date=").concat(o)),r&&(n+="&model_group=".concat(r));let c={method:"GET",headers:{[l]:"Bearer ".concat(e)}},s=await fetch(n,c);if(!s.ok)throw await s.text(),Error("Network response was not ok");let i=await s.json();return console.log(i),i}catch(e){throw console.error("Failed to fetch spend data:",e),e}},ed=async e=>{try{let t=a?"".concat(a,"/global/spend/models?limit=5"):"/global/spend/models?limit=5",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log(r),r}catch(e){throw console.error("Failed to create key:",e),e}},ew=async(e,t,o,r,n)=>{try{let c=a?"".concat(a,"/key/list"):"/key/list";console.log("in keyListCall");let i=new URLSearchParams;o&&i.append("team_id",o.toString()),t&&i.append("organization_id",t.toString()),r&&i.append("page",r.toString()),n&&i.append("size",n.toString()),i.append("return_full_object","true"),i.append("include_team_keys","true");let d=i.toString();d&&(c+="?".concat(d));let w=await fetch(c,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!w.ok){let e=await w.text();throw s(e),Error("Network response was not ok")}let h=await w.json();return console.log("/team/list API Response:",h),h}catch(e){throw console.error("Failed to create key:",e),e}},eh=async(e,t)=>{try{let o=a?"".concat(a,"/user/get_users?role=").concat(t):"/user/get_users?role=".concat(t);console.log("in userGetAllUsersCall:",o);let r=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}let n=await r.json();return console.log(n),n}catch(e){throw console.error("Failed to get requested models:",e),e}},ep=async e=>{try{let t=a?"".concat(a,"/user/available_roles"):"/user/available_roles",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");let r=await o.json();return console.log("response from user/available_role",r),r}catch(e){throw e}},eu=async(e,t)=>{try{if(console.log("Form Values in teamCreateCall:",t),t.metadata){console.log("formValues.metadata:",t.metadata);try{t.metadata=JSON.parse(t.metadata)}catch(e){throw Error("Failed to parse metadata: "+e)}}let o=a?"".concat(a,"/team/new"):"/team/new",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("API Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},em=async(e,t)=>{try{if(console.log("Form Values in keyUpdateCall:",t),t.model_tpm_limit){console.log("formValues.model_tpm_limit:",t.model_tpm_limit);try{t.model_tpm_limit=JSON.parse(t.model_tpm_limit)}catch(e){throw Error("Failed to parse model_tpm_limit: "+e)}}if(t.model_rpm_limit){console.log("formValues.model_rpm_limit:",t.model_rpm_limit);try{t.model_rpm_limit=JSON.parse(t.model_rpm_limit)}catch(e){throw Error("Failed to parse model_rpm_limit: "+e)}}let o=a?"".concat(a,"/key/update"):"/key/update",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("Update key Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},ey=async(e,t)=>{try{console.log("Form Values in teamUpateCall:",t);let o=a?"".concat(a,"/team/update"):"/team/update",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("Update Team Response:",n),n}catch(e){throw console.error("Failed to create key:",e),e}},ef=async(e,t)=>{try{console.log("Form Values in modelUpateCall:",t);let o=a?"".concat(a,"/model/update"):"/model/update",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),console.error("Error update from the server:",e),Error("Network response was not ok")}let n=await r.json();return console.log("Update model Response:",n),n}catch(e){throw console.error("Failed to update model:",e),e}},eg=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=a?"".concat(a,"/team/member_add"):"/team/member_add",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,member:o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},ek=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=a?"".concat(a,"/team/member_update"):"/team/member_update",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,role:o.role,user_id:o.user_id})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},e_=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=a?"".concat(a,"/team/member_delete"):"/team/member_delete",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({team_id:t,...void 0!==o.user_email&&{user_email:o.user_email},...void 0!==o.user_id&&{user_id:o.user_id}})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create key:",e),e}},eT=async(e,t,o)=>{try{console.log("Form Values in teamMemberAddCall:",o);let r=a?"".concat(a,"/organization/member_add"):"/organization/member_add",n=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,member:o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error(e)}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to create organization member:",e),e}},eE=async(e,t,o)=>{try{console.log("Form Values in organizationMemberDeleteCall:",o);let r=a?"".concat(a,"/organization/member_delete"):"/organization/member_delete",n=await fetch(r,{method:"DELETE",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,user_id:o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to delete organization member:",e),e}},ej=async(e,t,o)=>{try{console.log("Form Values in organizationMemberUpdateCall:",o);let r=a?"".concat(a,"/organization/member_update"):"/organization/member_update",n=await fetch(r,{method:"PATCH",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({organization_id:t,...o})});if(!n.ok){let e=await n.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let c=await n.json();return console.log("API Response:",c),c}catch(e){throw console.error("Failed to update organization member:",e),e}},eb=async(e,t,o)=>{try{console.log("Form Values in userUpdateUserCall:",t);let r=a?"".concat(a,"/user/update"):"/user/update",n={...t};null!==o&&(n.user_role=o),n=JSON.stringify(n);let c=await fetch(r,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:n});if(!c.ok){let e=await c.text();throw s(e),console.error("Error response from the server:",e),Error("Network response was not ok")}let i=await c.json();return console.log("API Response:",i),i}catch(e){throw console.error("Failed to create key:",e),e}},eN=async(e,t)=>{try{let o=a?"".concat(a,"/health/services?service=").concat(t):"/health/services?service=".concat(t);console.log("Checking Slack Budget Alerts service health");let n=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!n.ok){let e=await n.text();throw s(e),Error(e)}let c=await n.json();return r.ZP.success("Test request to ".concat(t," made - check logs/alerts on ").concat(t," to verify")),c}catch(e){throw console.error("Failed to perform health check:",e),e}},eC=async e=>{try{let t=a?"".concat(a,"/budget/list"):"/budget/list",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eF=async(e,t,o)=>{try{let t=a?"".concat(a,"/get/config/callbacks"):"/get/config/callbacks",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eS=async e=>{try{let t=a?"".concat(a,"/config/list?config_type=general_settings"):"/config/list?config_type=general_settings",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},ex=async e=>{try{let t=a?"".concat(a,"/config/pass_through_endpoint"):"/config/pass_through_endpoint",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eB=async(e,t)=>{try{let o=a?"".concat(a,"/config/field/info?field_name=").concat(t):"/config/field/info?field_name=".concat(t),r=await fetch(o,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok)throw await r.text(),Error("Network response was not ok");return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eO=async(e,t)=>{try{let o=a?"".concat(a,"/config/pass_through_endpoint"):"/config/pass_through_endpoint",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eP=async(e,t,o)=>{try{let n=a?"".concat(a,"/config/field/update"):"/config/field/update",c=await fetch(n,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:t,field_value:o,config_type:"general_settings"})});if(!c.ok){let e=await c.text();throw s(e),Error("Network response was not ok")}let i=await c.json();return r.ZP.success("Successfully updated value!"),i}catch(e){throw console.error("Failed to set callbacks:",e),e}},ev=async(e,t)=>{try{let o=a?"".concat(a,"/config/field/delete"):"/config/field/delete",n=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({field_name:t,config_type:"general_settings"})});if(!n.ok){let e=await n.text();throw s(e),Error("Network response was not ok")}let c=await n.json();return r.ZP.success("Field reset on proxy"),c}catch(e){throw console.error("Failed to get callbacks:",e),e}},eG=async(e,t)=>{try{let o=a?"".concat(a,"/config/pass_through_endpoint?endpoint_id=").concat(t):"/config/pass_through_endpoint".concat(t),r=await fetch(o,{method:"DELETE",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},eA=async(e,t)=>{try{let o=a?"".concat(a,"/config/update"):"/config/update",r=await fetch(o,{method:"POST",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"},body:JSON.stringify({...t})});if(!r.ok){let e=await r.text();throw s(e),Error("Network response was not ok")}return await r.json()}catch(e){throw console.error("Failed to set callbacks:",e),e}},eJ=async e=>{try{let t=a?"".concat(a,"/health"):"/health",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}return await o.json()}catch(e){throw console.error("Failed to call /health:",e),e}},eI=async e=>{try{let t=a?"".concat(a,"/cache/ping"):"/cache/ping",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error(e)}return await o.json()}catch(e){throw console.error("Failed to call /cache/ping:",e),e}},eR=async e=>{try{let t=a?"".concat(a,"/sso/get/ui_settings"):"/sso/get/ui_settings",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok)throw await o.text(),Error("Network response was not ok");return await o.json()}catch(e){throw console.error("Failed to get callbacks:",e),e}},ez=async e=>{try{let t=a?"".concat(a,"/guardrails/list"):"/guardrails/list",o=await fetch(t,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!o.ok){let e=await o.text();throw s(e),Error("Network response was not ok")}let r=await o.json();return console.log("Guardrails list response:",r),r}catch(e){throw console.error("Failed to fetch guardrails list:",e),e}},eV=async(e,t,o)=>{try{let r=a?"".concat(a,"/spend/logs/ui/").concat(t,"?start_date=").concat(encodeURIComponent(o)):"/spend/logs/ui/".concat(t,"?start_date=").concat(encodeURIComponent(o));console.log("Fetching log details from:",r);let n=await fetch(r,{method:"GET",headers:{[l]:"Bearer ".concat(e),"Content-Type":"application/json"}});if(!n.ok){let e=await n.text();throw s(e),Error("Network response was not ok")}let c=await n.json();return console.log("Fetched log details:",c),c}catch(e){throw console.error("Failed to fetch log details:",e),e}}}}]); \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js b/litellm/proxy/_experimental/out/_next/static/chunks/261-e48c2ac6ff0b811c.js similarity index 99% rename from litellm/proxy/_experimental/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js rename to litellm/proxy/_experimental/out/_next/static/chunks/261-e48c2ac6ff0b811c.js index 8522a957e2..f21f16362b 100644 --- a/litellm/proxy/_experimental/out/_next/static/chunks/261-cb27c20c4f8ec4c6.js +++ b/litellm/proxy/_experimental/out/_next/static/chunks/261-e48c2ac6ff0b811c.js @@ -1 +1 @@ -(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[261],{23639:function(e,t,n){"use strict";n.d(t,{Z:function(){return s}});var a=n(1119),r=n(2265),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M832 64H296c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h496v688c0 4.4 3.6 8 8 8h56c4.4 0 8-3.6 8-8V96c0-17.7-14.3-32-32-32zM704 192H192c-17.7 0-32 14.3-32 32v530.7c0 8.5 3.4 16.6 9.4 22.6l173.3 173.3c2.2 2.2 4.7 4 7.4 5.5v1.9h4.2c3.5 1.3 7.2 2 11 2H704c17.7 0 32-14.3 32-32V224c0-17.7-14.3-32-32-32zM350 856.2L263.9 770H350v86.2zM664 888H414V746c0-22.1-17.9-40-40-40H232V264h432v624z"}}]},name:"copy",theme:"outlined"},o=n(55015),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},77565:function(e,t,n){"use strict";n.d(t,{Z:function(){return s}});var a=n(1119),r=n(2265),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M765.7 486.8L314.9 134.7A7.97 7.97 0 00302 141v77.3c0 4.9 2.3 9.6 6.1 12.6l360 281.1-360 281.1c-3.9 3-6.1 7.7-6.1 12.6V883c0 6.7 7.7 10.4 12.9 6.3l450.8-352.1a31.96 31.96 0 000-50.4z"}}]},name:"right",theme:"outlined"},o=n(55015),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},12485:function(e,t,n){"use strict";n.d(t,{Z:function(){return p}});var a=n(5853),r=n(31492),i=n(26898),o=n(65954),s=n(1153),l=n(2265),c=n(35242),u=n(42698);n(64016),n(8710),n(33232);let d=(0,s.fn)("Tab"),p=l.forwardRef((e,t)=>{let{icon:n,className:p,children:g}=e,m=(0,a._T)(e,["icon","className","children"]),b=(0,l.useContext)(c.O),f=(0,l.useContext)(u.Z);return l.createElement(r.O,Object.assign({ref:t,className:(0,o.q)(d("root"),"flex whitespace-nowrap truncate max-w-xs outline-none focus:ring-0 text-tremor-default transition duration-100",f?(0,s.bM)(f,i.K.text).selectTextColor:"solid"===b?"ui-selected:text-tremor-content-emphasis dark:ui-selected:text-dark-tremor-content-emphasis":"ui-selected:text-tremor-brand dark:ui-selected:text-dark-tremor-brand",function(e,t){switch(e){case"line":return(0,o.q)("ui-selected:border-b-2 hover:border-b-2 border-transparent transition duration-100 -mb-px px-2 py-2","hover:border-tremor-content hover:text-tremor-content-emphasis text-tremor-content","dark:hover:border-dark-tremor-content-emphasis dark:hover:text-dark-tremor-content-emphasis dark:text-dark-tremor-content",t?(0,s.bM)(t,i.K.border).selectBorderColor:"ui-selected:border-tremor-brand dark:ui-selected:border-dark-tremor-brand");case"solid":return(0,o.q)("border-transparent border rounded-tremor-small px-2.5 py-1","ui-selected:border-tremor-border ui-selected:bg-tremor-background ui-selected:shadow-tremor-input hover:text-tremor-content-emphasis ui-selected:text-tremor-brand","dark:ui-selected:border-dark-tremor-border dark:ui-selected:bg-dark-tremor-background dark:ui-selected:shadow-dark-tremor-input dark:hover:text-dark-tremor-content-emphasis dark:ui-selected:text-dark-tremor-brand",t?(0,s.bM)(t,i.K.text).selectTextColor:"text-tremor-content dark:text-dark-tremor-content")}}(b,f),p)},m),n?l.createElement(n,{className:(0,o.q)(d("icon"),"flex-none h-5 w-5",g?"mr-2":"")}):null,g?l.createElement("span",null,g):null)});p.displayName="Tab"},18135:function(e,t,n){"use strict";n.d(t,{Z:function(){return c}});var a=n(5853),r=n(31492),i=n(65954),o=n(1153),s=n(2265);let l=(0,o.fn)("TabGroup"),c=s.forwardRef((e,t)=>{let{defaultIndex:n,index:o,onIndexChange:c,children:u,className:d}=e,p=(0,a._T)(e,["defaultIndex","index","onIndexChange","children","className"]);return s.createElement(r.O.Group,Object.assign({as:"div",ref:t,defaultIndex:n,selectedIndex:o,onChange:c,className:(0,i.q)(l("root"),"w-full",d)},p),u)});c.displayName="TabGroup"},35242:function(e,t,n){"use strict";n.d(t,{O:function(){return c},Z:function(){return d}});var a=n(5853),r=n(2265),i=n(42698);n(64016),n(8710),n(33232);var o=n(31492),s=n(65954);let l=(0,n(1153).fn)("TabList"),c=(0,r.createContext)("line"),u={line:(0,s.q)("flex border-b space-x-4","border-tremor-border","dark:border-dark-tremor-border"),solid:(0,s.q)("inline-flex p-0.5 rounded-tremor-default space-x-1.5","bg-tremor-background-subtle","dark:bg-dark-tremor-background-subtle")},d=r.forwardRef((e,t)=>{let{color:n,variant:d="line",children:p,className:g}=e,m=(0,a._T)(e,["color","variant","children","className"]);return r.createElement(o.O.List,Object.assign({ref:t,className:(0,s.q)(l("root"),"justify-start overflow-x-clip",u[d],g)},m),r.createElement(c.Provider,{value:d},r.createElement(i.Z.Provider,{value:n},p)))});d.displayName="TabList"},29706:function(e,t,n){"use strict";n.d(t,{Z:function(){return u}});var a=n(5853);n(42698);var r=n(64016);n(8710);var i=n(33232),o=n(65954),s=n(1153),l=n(2265);let c=(0,s.fn)("TabPanel"),u=l.forwardRef((e,t)=>{let{children:n,className:s}=e,u=(0,a._T)(e,["children","className"]),{selectedValue:d}=(0,l.useContext)(i.Z),p=d===(0,l.useContext)(r.Z);return l.createElement("div",Object.assign({ref:t,className:(0,o.q)(c("root"),"w-full mt-2",p?"":"hidden",s),"aria-selected":p?"true":"false"},u),n)});u.displayName="TabPanel"},77991:function(e,t,n){"use strict";n.d(t,{Z:function(){return d}});var a=n(5853),r=n(31492);n(42698);var i=n(64016);n(8710);var o=n(33232),s=n(65954),l=n(1153),c=n(2265);let u=(0,l.fn)("TabPanels"),d=c.forwardRef((e,t)=>{let{children:n,className:l}=e,d=(0,a._T)(e,["children","className"]);return c.createElement(r.O.Panels,Object.assign({as:"div",ref:t,className:(0,s.q)(u("root"),"w-full",l)},d),e=>{let{selectedIndex:t}=e;return c.createElement(o.Z.Provider,{value:{selectedValue:t}},c.Children.map(n,(e,t)=>c.createElement(i.Z.Provider,{value:t},e)))})});d.displayName="TabPanels"},42698:function(e,t,n){"use strict";n.d(t,{Z:function(){return i}});var a=n(2265),r=n(7084);n(65954);let i=(0,a.createContext)(r.fr.Blue)},64016:function(e,t,n){"use strict";n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)(0)},8710:function(e,t,n){"use strict";n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)(void 0)},33232:function(e,t,n){"use strict";n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)({selectedValue:void 0,handleValueChange:void 0})},93942:function(e,t,n){"use strict";n.d(t,{i:function(){return s}});var a=n(2265),r=n(50506),i=n(13959),o=n(71744);function s(e){return t=>a.createElement(i.ZP,{theme:{token:{motion:!1,zIndexPopupBase:0}}},a.createElement(e,Object.assign({},t)))}t.Z=(e,t,n,i)=>s(s=>{let{prefixCls:l,style:c}=s,u=a.useRef(null),[d,p]=a.useState(0),[g,m]=a.useState(0),[b,f]=(0,r.Z)(!1,{value:s.open}),{getPrefixCls:E}=a.useContext(o.E_),h=E(t||"select",l);a.useEffect(()=>{if(f(!0),"undefined"!=typeof ResizeObserver){let e=new ResizeObserver(e=>{let t=e[0].target;p(t.offsetHeight+8),m(t.offsetWidth)}),t=setInterval(()=>{var a;let r=n?".".concat(n(h)):".".concat(h,"-dropdown"),i=null===(a=u.current)||void 0===a?void 0:a.querySelector(r);i&&(clearInterval(t),e.observe(i))},10);return()=>{clearInterval(t),e.disconnect()}}},[]);let S=Object.assign(Object.assign({},s),{style:Object.assign(Object.assign({},c),{margin:0}),open:b,visible:b,getPopupContainer:()=>u.current});return i&&(S=i(S)),a.createElement("div",{ref:u,style:{paddingBottom:d,position:"relative",minWidth:g}},a.createElement(e,Object.assign({},S)))})},51369:function(e,t,n){"use strict";let a;n.d(t,{Z:function(){return eY}});var r=n(83145),i=n(2265),o=n(18404),s=n(71744),l=n(13959),c=n(8900),u=n(39725),d=n(54537),p=n(55726),g=n(36760),m=n.n(g),b=n(62236),f=n(68710),E=n(55274),h=n(29961),S=n(69819),y=n(73002),T=n(51248),A=e=>{let{type:t,children:n,prefixCls:a,buttonProps:r,close:o,autoFocus:s,emitEvent:l,isSilent:c,quitOnNullishReturnValue:u,actionFn:d}=e,p=i.useRef(!1),g=i.useRef(null),[m,b]=(0,S.Z)(!1),f=function(){null==o||o.apply(void 0,arguments)};i.useEffect(()=>{let e=null;return s&&(e=setTimeout(()=>{var e;null===(e=g.current)||void 0===e||e.focus()})),()=>{e&&clearTimeout(e)}},[]);let E=e=>{e&&e.then&&(b(!0),e.then(function(){b(!1,!0),f.apply(void 0,arguments),p.current=!1},e=>{if(b(!1,!0),p.current=!1,null==c||!c())return Promise.reject(e)}))};return i.createElement(y.ZP,Object.assign({},(0,T.nx)(t),{onClick:e=>{let t;if(!p.current){if(p.current=!0,!d){f();return}if(l){var n;if(t=d(e),u&&!((n=t)&&n.then)){p.current=!1,f(e);return}}else if(d.length)t=d(o),p.current=!1;else if(!(t=d())){f();return}E(t)}},loading:m,prefixCls:a},r,{ref:g}),n)};let R=i.createContext({}),{Provider:I}=R;var N=()=>{let{autoFocusButton:e,cancelButtonProps:t,cancelTextLocale:n,isSilent:a,mergedOkCancel:r,rootPrefixCls:o,close:s,onCancel:l,onConfirm:c}=(0,i.useContext)(R);return r?i.createElement(A,{isSilent:a,actionFn:l,close:function(){null==s||s.apply(void 0,arguments),null==c||c(!1)},autoFocus:"cancel"===e,buttonProps:t,prefixCls:"".concat(o,"-btn")},n):null},_=()=>{let{autoFocusButton:e,close:t,isSilent:n,okButtonProps:a,rootPrefixCls:r,okTextLocale:o,okType:s,onConfirm:l,onOk:c}=(0,i.useContext)(R);return i.createElement(A,{isSilent:n,type:s||"primary",actionFn:c,close:function(){null==t||t.apply(void 0,arguments),null==l||l(!0)},autoFocus:"ok"===e,buttonProps:a,prefixCls:"".concat(r,"-btn")},o)},v=n(49638),w=n(1119),k=n(26365),C=n(28036),O=i.createContext({}),x=n(31686),L=n(2161),D=n(92491),P=n(95814),M=n(18242);function F(e,t,n){var a=t;return!a&&n&&(a="".concat(e,"-").concat(n)),a}function U(e,t){var n=e["page".concat(t?"Y":"X","Offset")],a="scroll".concat(t?"Top":"Left");if("number"!=typeof n){var r=e.document;"number"!=typeof(n=r.documentElement[a])&&(n=r.body[a])}return n}var B=n(47970),G=n(28791),$=i.memo(function(e){return e.children},function(e,t){return!t.shouldUpdate}),H={width:0,height:0,overflow:"hidden",outline:"none"},z=i.forwardRef(function(e,t){var n,a,r,o=e.prefixCls,s=e.className,l=e.style,c=e.title,u=e.ariaId,d=e.footer,p=e.closable,g=e.closeIcon,b=e.onClose,f=e.children,E=e.bodyStyle,h=e.bodyProps,S=e.modalRender,y=e.onMouseDown,T=e.onMouseUp,A=e.holderRef,R=e.visible,I=e.forceRender,N=e.width,_=e.height,v=e.classNames,k=e.styles,C=i.useContext(O).panel,L=(0,G.x1)(A,C),D=(0,i.useRef)(),P=(0,i.useRef)();i.useImperativeHandle(t,function(){return{focus:function(){var e;null===(e=D.current)||void 0===e||e.focus()},changeActive:function(e){var t=document.activeElement;e&&t===P.current?D.current.focus():e||t!==D.current||P.current.focus()}}});var M={};void 0!==N&&(M.width=N),void 0!==_&&(M.height=_),d&&(n=i.createElement("div",{className:m()("".concat(o,"-footer"),null==v?void 0:v.footer),style:(0,x.Z)({},null==k?void 0:k.footer)},d)),c&&(a=i.createElement("div",{className:m()("".concat(o,"-header"),null==v?void 0:v.header),style:(0,x.Z)({},null==k?void 0:k.header)},i.createElement("div",{className:"".concat(o,"-title"),id:u},c))),p&&(r=i.createElement("button",{type:"button",onClick:b,"aria-label":"Close",className:"".concat(o,"-close")},g||i.createElement("span",{className:"".concat(o,"-close-x")})));var F=i.createElement("div",{className:m()("".concat(o,"-content"),null==v?void 0:v.content),style:null==k?void 0:k.content},r,a,i.createElement("div",(0,w.Z)({className:m()("".concat(o,"-body"),null==v?void 0:v.body),style:(0,x.Z)((0,x.Z)({},E),null==k?void 0:k.body)},h),f),n);return i.createElement("div",{key:"dialog-element",role:"dialog","aria-labelledby":c?u:null,"aria-modal":"true",ref:L,style:(0,x.Z)((0,x.Z)({},l),M),className:m()(o,s),onMouseDown:y,onMouseUp:T},i.createElement("div",{tabIndex:0,ref:D,style:H,"aria-hidden":"true"}),i.createElement($,{shouldUpdate:R||I},S?S(F):F),i.createElement("div",{tabIndex:0,ref:P,style:H,"aria-hidden":"true"}))}),j=i.forwardRef(function(e,t){var n=e.prefixCls,a=e.title,r=e.style,o=e.className,s=e.visible,l=e.forceRender,c=e.destroyOnClose,u=e.motionName,d=e.ariaId,p=e.onVisibleChanged,g=e.mousePosition,b=(0,i.useRef)(),f=i.useState(),E=(0,k.Z)(f,2),h=E[0],S=E[1],y={};function T(){var e,t,n,a,r,i=(n={left:(t=(e=b.current).getBoundingClientRect()).left,top:t.top},r=(a=e.ownerDocument).defaultView||a.parentWindow,n.left+=U(r),n.top+=U(r,!0),n);S(g?"".concat(g.x-i.left,"px ").concat(g.y-i.top,"px"):"")}return h&&(y.transformOrigin=h),i.createElement(B.ZP,{visible:s,onVisibleChanged:p,onAppearPrepare:T,onEnterPrepare:T,forceRender:l,motionName:u,removeOnLeave:c,ref:b},function(s,l){var c=s.className,u=s.style;return i.createElement(z,(0,w.Z)({},e,{ref:t,title:a,ariaId:d,prefixCls:n,holderRef:l,style:(0,x.Z)((0,x.Z)((0,x.Z)({},u),r),y),className:m()(o,c)}))})});function V(e){var t=e.prefixCls,n=e.style,a=e.visible,r=e.maskProps,o=e.motionName,s=e.className;return i.createElement(B.ZP,{key:"mask",visible:a,motionName:o,leavedClassName:"".concat(t,"-mask-hidden")},function(e,a){var o=e.className,l=e.style;return i.createElement("div",(0,w.Z)({ref:a,style:(0,x.Z)((0,x.Z)({},l),n),className:m()("".concat(t,"-mask"),o,s)},r))})}function W(e){var t=e.prefixCls,n=void 0===t?"rc-dialog":t,a=e.zIndex,r=e.visible,o=void 0!==r&&r,s=e.keyboard,l=void 0===s||s,c=e.focusTriggerAfterClose,u=void 0===c||c,d=e.wrapStyle,p=e.wrapClassName,g=e.wrapProps,b=e.onClose,f=e.afterOpenChange,E=e.afterClose,h=e.transitionName,S=e.animation,y=e.closable,T=e.mask,A=void 0===T||T,R=e.maskTransitionName,I=e.maskAnimation,N=e.maskClosable,_=e.maskStyle,v=e.maskProps,C=e.rootClassName,O=e.classNames,U=e.styles,B=(0,i.useRef)(),G=(0,i.useRef)(),$=(0,i.useRef)(),H=i.useState(o),z=(0,k.Z)(H,2),W=z[0],q=z[1],Y=(0,D.Z)();function K(e){null==b||b(e)}var Z=(0,i.useRef)(!1),X=(0,i.useRef)(),Q=null;return(void 0===N||N)&&(Q=function(e){Z.current?Z.current=!1:G.current===e.target&&K(e)}),(0,i.useEffect)(function(){o&&(q(!0),(0,L.Z)(G.current,document.activeElement)||(B.current=document.activeElement))},[o]),(0,i.useEffect)(function(){return function(){clearTimeout(X.current)}},[]),i.createElement("div",(0,w.Z)({className:m()("".concat(n,"-root"),C)},(0,M.Z)(e,{data:!0})),i.createElement(V,{prefixCls:n,visible:A&&o,motionName:F(n,R,I),style:(0,x.Z)((0,x.Z)({zIndex:a},_),null==U?void 0:U.mask),maskProps:v,className:null==O?void 0:O.mask}),i.createElement("div",(0,w.Z)({tabIndex:-1,onKeyDown:function(e){if(l&&e.keyCode===P.Z.ESC){e.stopPropagation(),K(e);return}o&&e.keyCode===P.Z.TAB&&$.current.changeActive(!e.shiftKey)},className:m()("".concat(n,"-wrap"),p,null==O?void 0:O.wrapper),ref:G,onClick:Q,style:(0,x.Z)((0,x.Z)((0,x.Z)({zIndex:a},d),null==U?void 0:U.wrapper),{},{display:W?null:"none"})},g),i.createElement(j,(0,w.Z)({},e,{onMouseDown:function(){clearTimeout(X.current),Z.current=!0},onMouseUp:function(){X.current=setTimeout(function(){Z.current=!1})},ref:$,closable:void 0===y||y,ariaId:Y,prefixCls:n,visible:o&&W,onClose:K,onVisibleChanged:function(e){if(e)!function(){if(!(0,L.Z)(G.current,document.activeElement)){var e;null===(e=$.current)||void 0===e||e.focus()}}();else{if(q(!1),A&&B.current&&u){try{B.current.focus({preventScroll:!0})}catch(e){}B.current=null}W&&(null==E||E())}null==f||f(e)},motionName:F(n,h,S)}))))}j.displayName="Content",n(32559);var q=function(e){var t=e.visible,n=e.getContainer,a=e.forceRender,r=e.destroyOnClose,o=void 0!==r&&r,s=e.afterClose,l=e.panelRef,c=i.useState(t),u=(0,k.Z)(c,2),d=u[0],p=u[1],g=i.useMemo(function(){return{panel:l}},[l]);return(i.useEffect(function(){t&&p(!0)},[t]),a||!o||d)?i.createElement(O.Provider,{value:g},i.createElement(C.Z,{open:t||a||d,autoDestroy:!1,getContainer:n,autoLock:t||d},i.createElement(W,(0,w.Z)({},e,{destroyOnClose:o,afterClose:function(){null==s||s(),p(!1)}})))):null};q.displayName="Dialog";var Y=function(e,t,n){let a=arguments.length>3&&void 0!==arguments[3]?arguments[3]:i.createElement(v.Z,null),r=arguments.length>4&&void 0!==arguments[4]&&arguments[4];if("boolean"==typeof e?!e:void 0===t?!r:!1===t||null===t)return[!1,null];let o="boolean"==typeof t||null==t?a:t;return[!0,n?n(o):o]},K=n(94981),Z=n(95140),X=n(39109),Q=n(65658),J=n(74126);function ee(){}let et=i.createContext({add:ee,remove:ee});var en=n(86586),ea=()=>{let{cancelButtonProps:e,cancelTextLocale:t,onCancel:n}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({onClick:n},e),t)},er=()=>{let{confirmLoading:e,okButtonProps:t,okType:n,okTextLocale:a,onOk:r}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({},(0,T.nx)(n),{loading:e,onClick:r},t),a)},ei=n(92246);function eo(e,t){return i.createElement("span",{className:"".concat(e,"-close-x")},t||i.createElement(v.Z,{className:"".concat(e,"-close-icon")}))}let es=e=>{let t;let{okText:n,okType:a="primary",cancelText:o,confirmLoading:s,onOk:l,onCancel:c,okButtonProps:u,cancelButtonProps:d,footer:p}=e,[g]=(0,E.Z)("Modal",(0,ei.A)()),m={confirmLoading:s,okButtonProps:u,cancelButtonProps:d,okTextLocale:n||(null==g?void 0:g.okText),cancelTextLocale:o||(null==g?void 0:g.cancelText),okType:a,onOk:l,onCancel:c},b=i.useMemo(()=>m,(0,r.Z)(Object.values(m)));return"function"==typeof p||void 0===p?(t=i.createElement(i.Fragment,null,i.createElement(ea,null),i.createElement(er,null)),"function"==typeof p&&(t=p(t,{OkBtn:er,CancelBtn:ea})),t=i.createElement(I,{value:b},t)):t=p,i.createElement(en.n,{disabled:!1},t)};var el=n(12918),ec=n(11699),eu=n(691),ed=n(3104),ep=n(80669),eg=n(352);function em(e){return{position:e,inset:0}}let eb=e=>{let{componentCls:t,antCls:n}=e;return[{["".concat(t,"-root")]:{["".concat(t).concat(n,"-zoom-enter, ").concat(t).concat(n,"-zoom-appear")]:{transform:"none",opacity:0,animationDuration:e.motionDurationSlow,userSelect:"none"},["".concat(t).concat(n,"-zoom-leave ").concat(t,"-content")]:{pointerEvents:"none"},["".concat(t,"-mask")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,height:"100%",backgroundColor:e.colorBgMask,pointerEvents:"none",["".concat(t,"-hidden")]:{display:"none"}}),["".concat(t,"-wrap")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,overflow:"auto",outline:0,WebkitOverflowScrolling:"touch",["&:has(".concat(t).concat(n,"-zoom-enter), &:has(").concat(t).concat(n,"-zoom-appear)")]:{pointerEvents:"none"}})}},{["".concat(t,"-root")]:(0,ec.J$)(e)}]},ef=e=>{let{componentCls:t}=e;return[{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl"},["".concat(t,"-centered")]:{textAlign:"center","&::before":{display:"inline-block",width:0,height:"100%",verticalAlign:"middle",content:'""'},[t]:{top:0,display:"inline-block",paddingBottom:0,textAlign:"start",verticalAlign:"middle"}},["@media (max-width: ".concat(e.screenSMMax,"px)")]:{[t]:{maxWidth:"calc(100vw - 16px)",margin:"".concat((0,eg.bf)(e.marginXS)," auto")},["".concat(t,"-centered")]:{[t]:{flex:1}}}}},{[t]:Object.assign(Object.assign({},(0,el.Wf)(e)),{pointerEvents:"none",position:"relative",top:100,width:"auto",maxWidth:"calc(100vw - ".concat((0,eg.bf)(e.calc(e.margin).mul(2).equal()),")"),margin:"0 auto",paddingBottom:e.paddingLG,["".concat(t,"-title")]:{margin:0,color:e.titleColor,fontWeight:e.fontWeightStrong,fontSize:e.titleFontSize,lineHeight:e.titleLineHeight,wordWrap:"break-word"},["".concat(t,"-content")]:{position:"relative",backgroundColor:e.contentBg,backgroundClip:"padding-box",border:0,borderRadius:e.borderRadiusLG,boxShadow:e.boxShadow,pointerEvents:"auto",padding:e.contentPadding},["".concat(t,"-close")]:Object.assign({position:"absolute",top:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),insetInlineEnd:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),zIndex:e.calc(e.zIndexPopupBase).add(10).equal(),padding:0,color:e.modalCloseIconColor,fontWeight:e.fontWeightStrong,lineHeight:1,textDecoration:"none",background:"transparent",borderRadius:e.borderRadiusSM,width:e.modalCloseBtnSize,height:e.modalCloseBtnSize,border:0,outline:0,cursor:"pointer",transition:"color ".concat(e.motionDurationMid,", background-color ").concat(e.motionDurationMid),"&-x":{display:"flex",fontSize:e.fontSizeLG,fontStyle:"normal",lineHeight:"".concat((0,eg.bf)(e.modalCloseBtnSize)),justifyContent:"center",textTransform:"none",textRendering:"auto"},"&:hover":{color:e.modalIconHoverColor,backgroundColor:e.closeBtnHoverBg,textDecoration:"none"},"&:active":{backgroundColor:e.closeBtnActiveBg}},(0,el.Qy)(e)),["".concat(t,"-header")]:{color:e.colorText,background:e.headerBg,borderRadius:"".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)," 0 0"),marginBottom:e.headerMarginBottom,padding:e.headerPadding,borderBottom:e.headerBorderBottom},["".concat(t,"-body")]:{fontSize:e.fontSize,lineHeight:e.lineHeight,wordWrap:"break-word",padding:e.bodyPadding},["".concat(t,"-footer")]:{textAlign:"end",background:e.footerBg,marginTop:e.footerMarginTop,padding:e.footerPadding,borderTop:e.footerBorderTop,borderRadius:e.footerBorderRadius,["> ".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginInlineStart:e.marginXS}},["".concat(t,"-open")]:{overflow:"hidden"}})},{["".concat(t,"-pure-panel")]:{top:"auto",padding:0,display:"flex",flexDirection:"column",["".concat(t,"-content,\n ").concat(t,"-body,\n ").concat(t,"-confirm-body-wrapper")]:{display:"flex",flexDirection:"column",flex:"auto"},["".concat(t,"-confirm-body")]:{marginBottom:"auto"}}}]},eE=e=>{let{componentCls:t}=e;return{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl",["".concat(t,"-confirm-body")]:{direction:"rtl"}}}}},eh=e=>{let t=e.padding,n=e.fontSizeHeading5,a=e.lineHeightHeading5;return(0,ed.TS)(e,{modalHeaderHeight:e.calc(e.calc(a).mul(n).equal()).add(e.calc(t).mul(2).equal()).equal(),modalFooterBorderColorSplit:e.colorSplit,modalFooterBorderStyle:e.lineType,modalFooterBorderWidth:e.lineWidth,modalIconHoverColor:e.colorIconHover,modalCloseIconColor:e.colorIcon,modalCloseBtnSize:e.fontHeight,modalConfirmIconSize:e.fontHeight,modalTitleHeight:e.calc(e.titleFontSize).mul(e.titleLineHeight).equal()})},eS=e=>({footerBg:"transparent",headerBg:e.colorBgElevated,titleLineHeight:e.lineHeightHeading5,titleFontSize:e.fontSizeHeading5,contentBg:e.colorBgElevated,titleColor:e.colorTextHeading,closeBtnHoverBg:e.wireframe?"transparent":e.colorFillContent,closeBtnActiveBg:e.wireframe?"transparent":e.colorFillContentHover,contentPadding:e.wireframe?0:"".concat((0,eg.bf)(e.paddingMD)," ").concat((0,eg.bf)(e.paddingContentHorizontalLG)),headerPadding:e.wireframe?"".concat((0,eg.bf)(e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,headerBorderBottom:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",headerMarginBottom:e.wireframe?0:e.marginXS,bodyPadding:e.wireframe?e.paddingLG:0,footerPadding:e.wireframe?"".concat((0,eg.bf)(e.paddingXS)," ").concat((0,eg.bf)(e.padding)):0,footerBorderTop:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",footerBorderRadius:e.wireframe?"0 0 ".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)):0,footerMarginTop:e.wireframe?0:e.marginSM,confirmBodyPadding:e.wireframe?"".concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,confirmIconMarginInlineEnd:e.wireframe?e.margin:e.marginSM,confirmBtnsMarginTop:e.wireframe?e.marginLG:e.marginSM});var ey=(0,ep.I$)("Modal",e=>{let t=eh(e);return[ef(t),eE(t),eb(t),(0,eu._y)(t,"zoom")]},eS,{unitless:{titleLineHeight:!0}}),eT=n(64024),eA=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};(0,K.Z)()&&window.document.documentElement&&document.documentElement.addEventListener("click",e=>{a={x:e.pageX,y:e.pageY},setTimeout(()=>{a=null},100)},!0);var eR=e=>{var t;let{getPopupContainer:n,getPrefixCls:r,direction:o,modal:l}=i.useContext(s.E_),c=t=>{let{onCancel:n}=e;null==n||n(t)},{prefixCls:u,className:d,rootClassName:p,open:g,wrapClassName:E,centered:h,getContainer:S,closeIcon:y,closable:T,focusTriggerAfterClose:A=!0,style:R,visible:I,width:N=520,footer:_,classNames:w,styles:k}=e,C=eA(e,["prefixCls","className","rootClassName","open","wrapClassName","centered","getContainer","closeIcon","closable","focusTriggerAfterClose","style","visible","width","footer","classNames","styles"]),O=r("modal",u),x=r(),L=(0,eT.Z)(O),[D,P,M]=ey(O,L),F=m()(E,{["".concat(O,"-centered")]:!!h,["".concat(O,"-wrap-rtl")]:"rtl"===o}),U=null!==_&&i.createElement(es,Object.assign({},e,{onOk:t=>{let{onOk:n}=e;null==n||n(t)},onCancel:c})),[B,G]=Y(T,y,e=>eo(O,e),i.createElement(v.Z,{className:"".concat(O,"-close-icon")}),!0),$=function(e){let t=i.useContext(et),n=i.useRef();return(0,J.zX)(a=>{if(a){let r=e?a.querySelector(e):a;t.add(r),n.current=r}else t.remove(n.current)})}(".".concat(O,"-content")),[H,z]=(0,b.Cn)("Modal",C.zIndex);return D(i.createElement(Q.BR,null,i.createElement(X.Ux,{status:!0,override:!0},i.createElement(Z.Z.Provider,{value:z},i.createElement(q,Object.assign({width:N},C,{zIndex:H,getContainer:void 0===S?n:S,prefixCls:O,rootClassName:m()(P,p,M,L),footer:U,visible:null!=g?g:I,mousePosition:null!==(t=C.mousePosition)&&void 0!==t?t:a,onClose:c,closable:B,closeIcon:G,focusTriggerAfterClose:A,transitionName:(0,f.m)(x,"zoom",e.transitionName),maskTransitionName:(0,f.m)(x,"fade",e.maskTransitionName),className:m()(P,d,null==l?void 0:l.className),style:Object.assign(Object.assign({},null==l?void 0:l.style),R),classNames:Object.assign(Object.assign({wrapper:F},null==l?void 0:l.classNames),w),styles:Object.assign(Object.assign({},null==l?void 0:l.styles),k),panelRef:$}))))))};let eI=e=>{let{componentCls:t,titleFontSize:n,titleLineHeight:a,modalConfirmIconSize:r,fontSize:i,lineHeight:o,modalTitleHeight:s,fontHeight:l,confirmBodyPadding:c}=e,u="".concat(t,"-confirm");return{[u]:{"&-rtl":{direction:"rtl"},["".concat(e.antCls,"-modal-header")]:{display:"none"},["".concat(u,"-body-wrapper")]:Object.assign({},(0,el.dF)()),["&".concat(t," ").concat(t,"-body")]:{padding:c},["".concat(u,"-body")]:{display:"flex",flexWrap:"nowrap",alignItems:"start",["> ".concat(e.iconCls)]:{flex:"none",fontSize:r,marginInlineEnd:e.confirmIconMarginInlineEnd,marginTop:e.calc(e.calc(l).sub(r).equal()).div(2).equal()},["&-has-title > ".concat(e.iconCls)]:{marginTop:e.calc(e.calc(s).sub(r).equal()).div(2).equal()}},["".concat(u,"-paragraph")]:{display:"flex",flexDirection:"column",flex:"auto",rowGap:e.marginXS,maxWidth:"calc(100% - ".concat((0,eg.bf)(e.calc(e.modalConfirmIconSize).add(e.marginSM).equal()),")")},["".concat(u,"-title")]:{color:e.colorTextHeading,fontWeight:e.fontWeightStrong,fontSize:n,lineHeight:a},["".concat(u,"-content")]:{color:e.colorText,fontSize:i,lineHeight:o},["".concat(u,"-btns")]:{textAlign:"end",marginTop:e.confirmBtnsMarginTop,["".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginBottom:0,marginInlineStart:e.marginXS}}},["".concat(u,"-error ").concat(u,"-body > ").concat(e.iconCls)]:{color:e.colorError},["".concat(u,"-warning ").concat(u,"-body > ").concat(e.iconCls,",\n ").concat(u,"-confirm ").concat(u,"-body > ").concat(e.iconCls)]:{color:e.colorWarning},["".concat(u,"-info ").concat(u,"-body > ").concat(e.iconCls)]:{color:e.colorInfo},["".concat(u,"-success ").concat(u,"-body > ").concat(e.iconCls)]:{color:e.colorSuccess}}};var eN=(0,ep.bk)(["Modal","confirm"],e=>[eI(eh(e))],eS,{order:-1e3}),e_=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};function ev(e){let{prefixCls:t,icon:n,okText:a,cancelText:o,confirmPrefixCls:s,type:l,okCancel:g,footer:b,locale:f}=e,h=e_(e,["prefixCls","icon","okText","cancelText","confirmPrefixCls","type","okCancel","footer","locale"]),S=n;if(!n&&null!==n)switch(l){case"info":S=i.createElement(p.Z,null);break;case"success":S=i.createElement(c.Z,null);break;case"error":S=i.createElement(u.Z,null);break;default:S=i.createElement(d.Z,null)}let y=null!=g?g:"confirm"===l,T=null!==e.autoFocusButton&&(e.autoFocusButton||"ok"),[A]=(0,E.Z)("Modal"),R=f||A,v=a||(y?null==R?void 0:R.okText:null==R?void 0:R.justOkText),w=Object.assign({autoFocusButton:T,cancelTextLocale:o||(null==R?void 0:R.cancelText),okTextLocale:v,mergedOkCancel:y},h),k=i.useMemo(()=>w,(0,r.Z)(Object.values(w))),C=i.createElement(i.Fragment,null,i.createElement(N,null),i.createElement(_,null)),O=void 0!==e.title&&null!==e.title,x="".concat(s,"-body");return i.createElement("div",{className:"".concat(s,"-body-wrapper")},i.createElement("div",{className:m()(x,{["".concat(x,"-has-title")]:O})},S,i.createElement("div",{className:"".concat(s,"-paragraph")},O&&i.createElement("span",{className:"".concat(s,"-title")},e.title),i.createElement("div",{className:"".concat(s,"-content")},e.content))),void 0===b||"function"==typeof b?i.createElement(I,{value:k},i.createElement("div",{className:"".concat(s,"-btns")},"function"==typeof b?b(C,{OkBtn:_,CancelBtn:N}):C)):b,i.createElement(eN,{prefixCls:t}))}let ew=e=>{let{close:t,zIndex:n,afterClose:a,open:r,keyboard:o,centered:s,getContainer:l,maskStyle:c,direction:u,prefixCls:d,wrapClassName:p,rootPrefixCls:g,bodyStyle:E,closable:S=!1,closeIcon:y,modalRender:T,focusTriggerAfterClose:A,onConfirm:R,styles:I}=e,N="".concat(d,"-confirm"),_=e.width||416,v=e.style||{},w=void 0===e.mask||e.mask,k=void 0!==e.maskClosable&&e.maskClosable,C=m()(N,"".concat(N,"-").concat(e.type),{["".concat(N,"-rtl")]:"rtl"===u},e.className),[,O]=(0,h.ZP)(),x=i.useMemo(()=>void 0!==n?n:O.zIndexPopupBase+b.u6,[n,O]);return i.createElement(eR,{prefixCls:d,className:C,wrapClassName:m()({["".concat(N,"-centered")]:!!e.centered},p),onCancel:()=>{null==t||t({triggerCancel:!0}),null==R||R(!1)},open:r,title:"",footer:null,transitionName:(0,f.m)(g||"","zoom",e.transitionName),maskTransitionName:(0,f.m)(g||"","fade",e.maskTransitionName),mask:w,maskClosable:k,style:v,styles:Object.assign({body:E,mask:c},I),width:_,zIndex:x,afterClose:a,keyboard:o,centered:s,getContainer:l,closable:S,closeIcon:y,modalRender:T,focusTriggerAfterClose:A},i.createElement(ev,Object.assign({},e,{confirmPrefixCls:N})))};var ek=e=>{let{rootPrefixCls:t,iconPrefixCls:n,direction:a,theme:r}=e;return i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:n,direction:a,theme:r},i.createElement(ew,Object.assign({},e)))},eC=[];let eO="",ex=e=>{var t,n;let{prefixCls:a,getContainer:r,direction:o}=e,l=(0,ei.A)(),c=(0,i.useContext)(s.E_),u=eO||c.getPrefixCls(),d=a||"".concat(u,"-modal"),p=r;return!1===p&&(p=void 0),i.createElement(ek,Object.assign({},e,{rootPrefixCls:u,prefixCls:d,iconPrefixCls:c.iconPrefixCls,theme:c.theme,direction:null!=o?o:c.direction,locale:null!==(n=null===(t=c.locale)||void 0===t?void 0:t.Modal)&&void 0!==n?n:l,getContainer:p}))};function eL(e){let t;let n=(0,l.w6)(),a=document.createDocumentFragment(),s=Object.assign(Object.assign({},e),{close:d,open:!0});function c(){for(var t=arguments.length,n=Array(t),i=0;ie&&e.triggerCancel);e.onCancel&&s&&e.onCancel.apply(e,[()=>{}].concat((0,r.Z)(n.slice(1))));for(let e=0;e{let t=n.getPrefixCls(void 0,eO),r=n.getIconPrefixCls(),s=n.getTheme(),c=i.createElement(ex,Object.assign({},e));(0,o.s)(i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:r,theme:s},n.holderRender?n.holderRender(c):c),a)})}function d(){for(var t=arguments.length,n=Array(t),a=0;a{"function"==typeof e.afterClose&&e.afterClose(),c.apply(this,n)}})).visible&&delete s.visible,u(s)}return u(s),eC.push(d),{destroy:d,update:function(e){u(s="function"==typeof e?e(s):Object.assign(Object.assign({},s),e))}}}function eD(e){return Object.assign(Object.assign({},e),{type:"warning"})}function eP(e){return Object.assign(Object.assign({},e),{type:"info"})}function eM(e){return Object.assign(Object.assign({},e),{type:"success"})}function eF(e){return Object.assign(Object.assign({},e),{type:"error"})}function eU(e){return Object.assign(Object.assign({},e),{type:"confirm"})}var eB=n(93942),eG=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},e$=(0,eB.i)(e=>{let{prefixCls:t,className:n,closeIcon:a,closable:r,type:o,title:l,children:c,footer:u}=e,d=eG(e,["prefixCls","className","closeIcon","closable","type","title","children","footer"]),{getPrefixCls:p}=i.useContext(s.E_),g=p(),b=t||p("modal"),f=(0,eT.Z)(g),[E,h,S]=ey(b,f),y="".concat(b,"-confirm"),T={};return T=o?{closable:null!=r&&r,title:"",footer:"",children:i.createElement(ev,Object.assign({},e,{prefixCls:b,confirmPrefixCls:y,rootPrefixCls:g,content:c}))}:{closable:null==r||r,title:l,footer:null!==u&&i.createElement(es,Object.assign({},e)),children:c},E(i.createElement(z,Object.assign({prefixCls:b,className:m()(h,"".concat(b,"-pure-panel"),o&&y,o&&"".concat(y,"-").concat(o),n,S,f)},d,{closeIcon:eo(b,a),closable:r},T)))}),eH=n(13823),ez=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},ej=i.forwardRef((e,t)=>{var n,{afterClose:a,config:o}=e,l=ez(e,["afterClose","config"]);let[c,u]=i.useState(!0),[d,p]=i.useState(o),{direction:g,getPrefixCls:m}=i.useContext(s.E_),b=m("modal"),f=m(),h=function(){u(!1);for(var e=arguments.length,t=Array(e),n=0;ne&&e.triggerCancel);d.onCancel&&a&&d.onCancel.apply(d,[()=>{}].concat((0,r.Z)(t.slice(1))))};i.useImperativeHandle(t,()=>({destroy:h,update:e=>{p(t=>Object.assign(Object.assign({},t),e))}}));let S=null!==(n=d.okCancel)&&void 0!==n?n:"confirm"===d.type,[y]=(0,E.Z)("Modal",eH.Z.Modal);return i.createElement(ek,Object.assign({prefixCls:b,rootPrefixCls:f},d,{close:h,open:c,afterClose:()=>{var e;a(),null===(e=d.afterClose)||void 0===e||e.call(d)},okText:d.okText||(S?null==y?void 0:y.okText:null==y?void 0:y.justOkText),direction:d.direction||g,cancelText:d.cancelText||(null==y?void 0:y.cancelText)},l))});let eV=0,eW=i.memo(i.forwardRef((e,t)=>{let[n,a]=function(){let[e,t]=i.useState([]);return[e,i.useCallback(e=>(t(t=>[].concat((0,r.Z)(t),[e])),()=>{t(t=>t.filter(t=>t!==e))}),[])]}();return i.useImperativeHandle(t,()=>({patchElement:a}),[]),i.createElement(i.Fragment,null,n)}));function eq(e){return eL(eD(e))}eR.useModal=function(){let e=i.useRef(null),[t,n]=i.useState([]);i.useEffect(()=>{t.length&&((0,r.Z)(t).forEach(e=>{e()}),n([]))},[t]);let a=i.useCallback(t=>function(a){var o;let s,l;eV+=1;let c=i.createRef(),u=new Promise(e=>{s=e}),d=!1,p=i.createElement(ej,{key:"modal-".concat(eV),config:t(a),ref:c,afterClose:()=>{null==l||l()},isSilent:()=>d,onConfirm:e=>{s(e)}});return(l=null===(o=e.current)||void 0===o?void 0:o.patchElement(p))&&eC.push(l),{destroy:()=>{function e(){var e;null===(e=c.current)||void 0===e||e.destroy()}c.current?e():n(t=>[].concat((0,r.Z)(t),[e]))},update:e=>{function t(){var t;null===(t=c.current)||void 0===t||t.update(e)}c.current?t():n(e=>[].concat((0,r.Z)(e),[t]))},then:e=>(d=!0,u.then(e))}},[]);return[i.useMemo(()=>({info:a(eP),success:a(eM),error:a(eF),warning:a(eD),confirm:a(eU)}),[]),i.createElement(eW,{key:"modal-holder",ref:e})]},eR.info=function(e){return eL(eP(e))},eR.success=function(e){return eL(eM(e))},eR.error=function(e){return eL(eF(e))},eR.warning=eq,eR.warn=eq,eR.confirm=function(e){return eL(eU(e))},eR.destroyAll=function(){for(;eC.length;){let e=eC.pop();e&&e()}},eR.config=function(e){let{rootPrefixCls:t}=e;eO=t},eR._InternalPanelDoNotUseOrYouWillBeFired=e$;var eY=eR},11699:function(e,t,n){"use strict";n.d(t,{J$:function(){return s}});var a=n(352),r=n(37133);let i=new a.E4("antFadeIn",{"0%":{opacity:0},"100%":{opacity:1}}),o=new a.E4("antFadeOut",{"0%":{opacity:1},"100%":{opacity:0}}),s=function(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],{antCls:n}=e,a="".concat(n,"-fade"),s=t?"&":"";return[(0,r.R)(a,i,o,e.motionDurationMid,t),{["\n ".concat(s).concat(a,"-enter,\n ").concat(s).concat(a,"-appear\n ")]:{opacity:0,animationTimingFunction:"linear"},["".concat(s).concat(a,"-leave")]:{animationTimingFunction:"linear"}}]}},26035:function(e){"use strict";e.exports=function(e,n){for(var a,r,i,o=e||"",s=n||"div",l={},c=0;c4&&m.slice(0,4)===o&&s.test(t)&&("-"===t.charAt(4)?b=o+(n=t.slice(5).replace(l,d)).charAt(0).toUpperCase()+n.slice(1):(g=(p=t).slice(4),t=l.test(g)?p:("-"!==(g=g.replace(c,u)).charAt(0)&&(g="-"+g),o+g)),f=r),new f(b,t))};var s=/^data[-\w.:]+$/i,l=/-[a-z]/g,c=/[A-Z]/g;function u(e){return"-"+e.toLowerCase()}function d(e){return e.charAt(1).toUpperCase()}},30466:function(e,t,n){"use strict";var a=n(82855),r=n(64541),i=n(80808),o=n(44987),s=n(72731),l=n(98946);e.exports=a([i,r,o,s,l])},72731:function(e,t,n){"use strict";var a=n(20321),r=n(41757),i=a.booleanish,o=a.number,s=a.spaceSeparated;e.exports=r({transform:function(e,t){return"role"===t?t:"aria-"+t.slice(4).toLowerCase()},properties:{ariaActiveDescendant:null,ariaAtomic:i,ariaAutoComplete:null,ariaBusy:i,ariaChecked:i,ariaColCount:o,ariaColIndex:o,ariaColSpan:o,ariaControls:s,ariaCurrent:null,ariaDescribedBy:s,ariaDetails:null,ariaDisabled:i,ariaDropEffect:s,ariaErrorMessage:null,ariaExpanded:i,ariaFlowTo:s,ariaGrabbed:i,ariaHasPopup:null,ariaHidden:i,ariaInvalid:null,ariaKeyShortcuts:null,ariaLabel:null,ariaLabelledBy:s,ariaLevel:o,ariaLive:null,ariaModal:i,ariaMultiLine:i,ariaMultiSelectable:i,ariaOrientation:null,ariaOwns:s,ariaPlaceholder:null,ariaPosInSet:o,ariaPressed:i,ariaReadOnly:i,ariaRelevant:null,ariaRequired:i,ariaRoleDescription:s,ariaRowCount:o,ariaRowIndex:o,ariaRowSpan:o,ariaSelected:i,ariaSetSize:o,ariaSort:null,ariaValueMax:o,ariaValueMin:o,ariaValueNow:o,ariaValueText:null,role:null}})},98946:function(e,t,n){"use strict";var a=n(20321),r=n(41757),i=n(53296),o=a.boolean,s=a.overloadedBoolean,l=a.booleanish,c=a.number,u=a.spaceSeparated,d=a.commaSeparated;e.exports=r({space:"html",attributes:{acceptcharset:"accept-charset",classname:"class",htmlfor:"for",httpequiv:"http-equiv"},transform:i,mustUseProperty:["checked","multiple","muted","selected"],properties:{abbr:null,accept:d,acceptCharset:u,accessKey:u,action:null,allow:null,allowFullScreen:o,allowPaymentRequest:o,allowUserMedia:o,alt:null,as:null,async:o,autoCapitalize:null,autoComplete:u,autoFocus:o,autoPlay:o,capture:o,charSet:null,checked:o,cite:null,className:u,cols:c,colSpan:null,content:null,contentEditable:l,controls:o,controlsList:u,coords:c|d,crossOrigin:null,data:null,dateTime:null,decoding:null,default:o,defer:o,dir:null,dirName:null,disabled:o,download:s,draggable:l,encType:null,enterKeyHint:null,form:null,formAction:null,formEncType:null,formMethod:null,formNoValidate:o,formTarget:null,headers:u,height:c,hidden:o,high:c,href:null,hrefLang:null,htmlFor:u,httpEquiv:u,id:null,imageSizes:null,imageSrcSet:d,inputMode:null,integrity:null,is:null,isMap:o,itemId:null,itemProp:u,itemRef:u,itemScope:o,itemType:u,kind:null,label:null,lang:null,language:null,list:null,loading:null,loop:o,low:c,manifest:null,max:null,maxLength:c,media:null,method:null,min:null,minLength:c,multiple:o,muted:o,name:null,nonce:null,noModule:o,noValidate:o,onAbort:null,onAfterPrint:null,onAuxClick:null,onBeforePrint:null,onBeforeUnload:null,onBlur:null,onCancel:null,onCanPlay:null,onCanPlayThrough:null,onChange:null,onClick:null,onClose:null,onContextMenu:null,onCopy:null,onCueChange:null,onCut:null,onDblClick:null,onDrag:null,onDragEnd:null,onDragEnter:null,onDragExit:null,onDragLeave:null,onDragOver:null,onDragStart:null,onDrop:null,onDurationChange:null,onEmptied:null,onEnded:null,onError:null,onFocus:null,onFormData:null,onHashChange:null,onInput:null,onInvalid:null,onKeyDown:null,onKeyPress:null,onKeyUp:null,onLanguageChange:null,onLoad:null,onLoadedData:null,onLoadedMetadata:null,onLoadEnd:null,onLoadStart:null,onMessage:null,onMessageError:null,onMouseDown:null,onMouseEnter:null,onMouseLeave:null,onMouseMove:null,onMouseOut:null,onMouseOver:null,onMouseUp:null,onOffline:null,onOnline:null,onPageHide:null,onPageShow:null,onPaste:null,onPause:null,onPlay:null,onPlaying:null,onPopState:null,onProgress:null,onRateChange:null,onRejectionHandled:null,onReset:null,onResize:null,onScroll:null,onSecurityPolicyViolation:null,onSeeked:null,onSeeking:null,onSelect:null,onSlotChange:null,onStalled:null,onStorage:null,onSubmit:null,onSuspend:null,onTimeUpdate:null,onToggle:null,onUnhandledRejection:null,onUnload:null,onVolumeChange:null,onWaiting:null,onWheel:null,open:o,optimum:c,pattern:null,ping:u,placeholder:null,playsInline:o,poster:null,preload:null,readOnly:o,referrerPolicy:null,rel:u,required:o,reversed:o,rows:c,rowSpan:c,sandbox:u,scope:null,scoped:o,seamless:o,selected:o,shape:null,size:c,sizes:null,slot:null,span:c,spellCheck:l,src:null,srcDoc:null,srcLang:null,srcSet:d,start:c,step:null,style:null,tabIndex:c,target:null,title:null,translate:null,type:null,typeMustMatch:o,useMap:null,value:l,width:c,wrap:null,align:null,aLink:null,archive:u,axis:null,background:null,bgColor:null,border:c,borderColor:null,bottomMargin:c,cellPadding:null,cellSpacing:null,char:null,charOff:null,classId:null,clear:null,code:null,codeBase:null,codeType:null,color:null,compact:o,declare:o,event:null,face:null,frame:null,frameBorder:null,hSpace:c,leftMargin:c,link:null,longDesc:null,lowSrc:null,marginHeight:c,marginWidth:c,noResize:o,noHref:o,noShade:o,noWrap:o,object:null,profile:null,prompt:null,rev:null,rightMargin:c,rules:null,scheme:null,scrolling:l,standby:null,summary:null,text:null,topMargin:c,valueType:null,version:null,vAlign:null,vLink:null,vSpace:c,allowTransparency:null,autoCorrect:null,autoSave:null,disablePictureInPicture:o,disableRemotePlayback:o,prefix:null,property:null,results:c,security:null,unselectable:null}})},53296:function(e,t,n){"use strict";var a=n(38781);e.exports=function(e,t){return a(e,t.toLowerCase())}},38781:function(e){"use strict";e.exports=function(e,t){return t in e?e[t]:t}},41757:function(e,t,n){"use strict";var a=n(96532),r=n(61723),i=n(51351);e.exports=function(e){var t,n,o=e.space,s=e.mustUseProperty||[],l=e.attributes||{},c=e.properties,u=e.transform,d={},p={};for(t in c)n=new i(t,u(l,t),c[t],o),-1!==s.indexOf(t)&&(n.mustUseProperty=!0),d[t]=n,p[a(t)]=t,p[a(n.attribute)]=t;return new r(d,p,o)}},51351:function(e,t,n){"use strict";var a=n(24192),r=n(20321);e.exports=s,s.prototype=new a,s.prototype.defined=!0;var i=["boolean","booleanish","overloadedBoolean","number","commaSeparated","spaceSeparated","commaOrSpaceSeparated"],o=i.length;function s(e,t,n,s){var l,c,u,d=-1;for(s&&(this.space=s),a.call(this,e,t);++d