From b1922e19f8bfc8580f45a445a797cc049bbbe271 Mon Sep 17 00:00:00 2001
From: fzowl <160063452+fzowl@users.noreply.github.com>
Date: Fri, 14 Nov 2025 23:09:11 +0100
Subject: [PATCH] Voyageai pricing and doc update (#16641)

* Refresh VoyageAI models and prices and context

* Refresh VoyageAI models and prices and context

* Refresh VoyageAI models and prices and context

* Updating the available VoyageAI models in the docs

* Updating the available VoyageAI models in the docs

* Updating the model prices and the docs
---
 docs/my-website/docs/providers/voyage.md | 114 ++++++++++++++++++++++-
 model_prices_and_context_window.json     |  16 ++++
 2 files changed, 127 insertions(+), 3 deletions(-)

diff --git a/docs/my-website/docs/providers/voyage.md b/docs/my-website/docs/providers/voyage.md
index 4b729bc9f5..b1e4cf932e 100644
--- a/docs/my-website/docs/providers/voyage.md
+++ b/docs/my-website/docs/providers/voyage.md
@@ -14,12 +14,41 @@ import os
 
 os.environ['VOYAGE_API_KEY'] = ""
 response = embedding(
-    model="voyage/voyage-3-large",
+    model="voyage/voyage-3.5",
     input=["good morning from litellm"],
 )
 print(response)
 ```
 
+## Supported Parameters
+
+VoyageAI embeddings support the following optional parameters:
+
+- `input_type`: Specifies the type of input for retrieval optimization
+  - `"query"`: Use for search queries
+  - `"document"`: Use for documents being indexed
+- `dimensions`: Output embedding dimensions (256, 512, 1024, or 2048)
+- `encoding_format`: Output format (`"float"`, `"int8"`, `"uint8"`, `"binary"`, `"ubinary"`)
+- `truncation`: Whether to truncate inputs exceeding max tokens (default: `True`)
+
+### Example with Parameters
+
+```python
+from litellm import embedding
+import os
+
+os.environ['VOYAGE_API_KEY'] = "your-api-key"
+
+# Embedding with custom dimensions and input type
+response = embedding(
+    model="voyage/voyage-3.5",
+    input=["Your text here"],
+    dimensions=512,
+    input_type="document"
+)
+print(f"Embedding dimensions: {len(response.data[0]['embedding'])}")
+```
+
 ## Supported Models
 All models listed here https://docs.voyageai.com/embeddings/#models-and-specifics are supported
 
@@ -40,5 +69,84 @@ All models listed here https://docs.voyageai.com/embeddings/#models-and-specific
 | voyage-2                | `embedding(model="voyage/voyage-2", input)`                | 
 | voyage-lite-02-instruct | `embedding(model="voyage/voyage-lite-02-instruct", input)` | 
 | voyage-01               | `embedding(model="voyage/voyage-01", input)`               | 
-| voyage-lite-01          | `embedding(model="voyage/voyage-lite-01", input)`          | 
-| voyage-lite-01-instruct | `embedding(model="voyage/voyage-lite-01-instruct", input)` | 
+| voyage-lite-01          | `embedding(model="voyage/voyage-lite-01", input)`          |
+| voyage-lite-01-instruct | `embedding(model="voyage/voyage-lite-01-instruct", input)` |
+
+## Contextual Embeddings (voyage-context-3)
+
+VoyageAI's `voyage-context-3` model provides contextualized chunk embeddings, where each chunk is embedded with awareness of its surrounding document context. This significantly improves retrieval quality compared to standard context-agnostic embeddings.
+
+### Key Benefits
+- Chunks understand their position and role within the full document
+- Improved retrieval accuracy for long documents (outperforms competitors by 7-23%)
+- Better handling of ambiguous references and cross-chunk dependencies
+- Seamless drop-in replacement for standard embeddings in RAG pipelines
+
+### Usage
+
+Contextual embeddings require a **nested input format** where each inner list represents chunks from a single document:
+
+```python
+from litellm import embedding
+import os
+
+os.environ['VOYAGE_API_KEY'] = "your-api-key"
+
+# Single document with multiple chunks
+response = embedding(
+    model="voyage/voyage-context-3",
+    input=[
+        [
+            "Chapter 1: Introduction to AI",
+            "This chapter covers the basics of artificial intelligence.",
+            "We will explore machine learning and deep learning."
+        ]
+    ]
+)
+print(f"Number of chunk groups: {len(response.data)}")
+
+# Multiple documents
+response = embedding(
+    model="voyage/voyage-context-3",
+    input=[
+        ["Paris is the capital of France.", "It is known for the Eiffel Tower."],
+        ["Tokyo is the capital of Japan.", "It is a major economic hub."]
+    ]
+)
+print(f"Processed {len(response.data)} documents")
+```
+
+### Specifications
+- Model: `voyage-context-3`
+- Context length: 32,000 tokens per document
+- Output dimensions: 256, 512, 1024 (default), or 2048
+- Max inputs: 1,000 per request
+- Max total tokens: 120,000
+- Max chunks: 16,000
+- Pricing: $0.18 per million tokens
+
+### When to Use Contextual Embeddings
+
+**Use `voyage-context-3` when:**
+- Processing long documents split into chunks
+- Document structure and flow are important
+- References between sections matter
+- You need to preserve document hierarchy
+
+**Use standard models (voyage-3.5, voyage-3-large) when:**
+- Embedding independent pieces of text
+- Processing short queries
+- Document context is not relevant
+- You need faster/cheaper processing
+
+## Model Selection Guide
+
+| Model | Best For | Context Length | Price/M Tokens |
+|-------|----------|----------------|----------------|
+| voyage-3.5 | General-purpose, multilingual | 32K | $0.06 |
+| voyage-3.5-lite | Latency-sensitive applications | 32K | $0.02 |
+| voyage-3-large | Best overall quality | 32K | $0.18 |
+| voyage-code-3 | Code retrieval and search | 32K | $0.18 |
+| voyage-finance-2 | Financial documents | 32K | $0.12 |
+| voyage-law-2 | Legal documents | 16K | $0.12 |
+| voyage-context-3 | Contextual document embeddings | 32K | $0.18 |
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 7c86e570c3..1c0ac1b436 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -23768,6 +23768,22 @@
         "mode": "embedding",
         "output_cost_per_token": 0.0
     },
+    "voyage/voyage-3.5": {
+        "input_cost_per_token": 6e-08,
+        "litellm_provider": "voyage",
+        "max_input_tokens": 32000,
+        "max_tokens": 32000,
+        "mode": "embedding",
+        "output_cost_per_token": 0.0
+    },
+    "voyage/voyage-3.5-lite": {
+        "input_cost_per_token": 2e-08,
+        "litellm_provider": "voyage",
+        "max_input_tokens": 32000,
+        "max_tokens": 32000,
+        "mode": "embedding",
+        "output_cost_per_token": 0.0
+    },
     "voyage/voyage-code-2": {
         "input_cost_per_token": 1.2e-07,
         "litellm_provider": "voyage",