mirror of
https://github.com/tiennm99/litellm.git
synced 2026-07-04 21:08:09 +00:00
[Fixes] Using Qwen API Tiered Pricing (#14479)
* fix: use dashscope cost calc * add qwen logo
This commit is contained in:
@@ -344,6 +344,11 @@ def cost_per_token( # noqa: PLR0915
|
||||
return perplexity_cost_per_token(model=model, usage=usage_block)
|
||||
elif custom_llm_provider == "xai":
|
||||
return xai_cost_per_token(model=model, usage=usage_block)
|
||||
elif custom_llm_provider == "dashscope":
|
||||
from litellm.llms.dashscope.cost_calculator import (
|
||||
cost_per_token as dashscope_cost_per_token,
|
||||
)
|
||||
return dashscope_cost_per_token(model=model, usage=usage_block)
|
||||
else:
|
||||
model_info = _cached_get_model_info_helper(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
|
||||
@@ -45,36 +45,33 @@ def _calculate_tiered_cost(
|
||||
cost_key: str,
|
||||
fallback_cost_key: Optional[str] = None
|
||||
) -> float:
|
||||
"""Calculate cost using tiered pricing structure."""
|
||||
"""Calculate cost using tiered pricing structure.
|
||||
|
||||
Finds the appropriate tier based on token count and applies that tier's rate to all tokens.
|
||||
"""
|
||||
if not tiered_pricing or tokens <= 0:
|
||||
return 0.0
|
||||
|
||||
total_cost = 0.0
|
||||
tokens_processed = 0
|
||||
|
||||
# Find the appropriate tier for the token count
|
||||
for tier in tiered_pricing:
|
||||
if tokens_processed >= tokens:
|
||||
break
|
||||
|
||||
tier_range = tier.get("range", [])
|
||||
if len(tier_range) != 2:
|
||||
continue
|
||||
|
||||
range_start, range_end = tier_range
|
||||
|
||||
if tokens <= range_start:
|
||||
break
|
||||
|
||||
tier_start = max(range_start, tokens_processed)
|
||||
tier_end = min(range_end, tokens)
|
||||
|
||||
if tier_end > tier_start:
|
||||
tokens_in_tier = tier_end - tier_start
|
||||
# Check if tokens fall within this tier's range
|
||||
if range_start <= tokens <= range_end:
|
||||
cost_per_token = tier.get(cost_key) or tier.get(fallback_cost_key, 0)
|
||||
total_cost += tokens_in_tier * cost_per_token
|
||||
tokens_processed = tier_end
|
||||
return tokens * cost_per_token
|
||||
|
||||
return total_cost
|
||||
# If no tier matches, use the last tier (highest tier)
|
||||
if tiered_pricing:
|
||||
last_tier = tiered_pricing[-1]
|
||||
cost_per_token = last_tier.get(cost_key) or last_tier.get(fallback_cost_key, 0)
|
||||
return tokens * cost_per_token
|
||||
|
||||
return 0.0
|
||||
|
||||
|
||||
def _calculate_flat_cost(tokens: int, cost_per_token: float) -> float:
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 48 KiB |
@@ -9,9 +9,9 @@ model_list:
|
||||
- model_name: openai/*
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
- model_name: gemini/*
|
||||
- model_name: dashscope/*
|
||||
litellm_params:
|
||||
model: gemini/*
|
||||
model: dashscope/*
|
||||
|
||||
|
||||
litellm_settings:
|
||||
|
||||
@@ -96,10 +96,10 @@ class TestDashscopeCostCalculator:
|
||||
assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
|
||||
assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
|
||||
|
||||
def test_tiered_pricing_multiple_tiers(self):
|
||||
"""Test tiered pricing when tokens span multiple tiers."""
|
||||
def test_tiered_pricing_higher_tier(self):
|
||||
"""Test tiered pricing when tokens fall in higher tier (tier 3)."""
|
||||
usage = Usage(
|
||||
prompt_tokens=150000, # Spans tiers 1 (0-32K), 2 (32K-128K), 3 (128K-256K)
|
||||
prompt_tokens=150000, # Falls in tier 3 (128K-256K)
|
||||
completion_tokens=2000,
|
||||
total_tokens=152000
|
||||
)
|
||||
@@ -110,13 +110,13 @@ class TestDashscopeCostCalculator:
|
||||
)
|
||||
|
||||
# Expected input cost calculation:
|
||||
# Tier 1 (0-32K): 32,000 tokens * $1e-6 = $0.032
|
||||
# Tier 2 (32K-128K): 96,000 tokens * $1.8e-6 = $0.1728
|
||||
# Tier 3 (128K-256K): 22,000 tokens * $3e-6 = $0.066
|
||||
# Total input cost = $0.032 + $0.1728 + $0.066 = $0.2708
|
||||
# 150,000 tokens falls in tier 3 (128K-256K), so all tokens are charged at tier 3 rate
|
||||
# Input: 150,000 tokens * $3e-6 = $0.45
|
||||
# Output: 2,000 tokens falls in tier 1 (0-32K), so charged at tier 1 rate
|
||||
# Output: 2,000 tokens * $5e-6 = $0.01
|
||||
|
||||
expected_prompt_cost = (32000 * 1e-6) + (96000 * 1.8e-6) + (22000 * 3e-6)
|
||||
expected_completion_cost = 2000 * 5e-6 # All in tier 1 for output
|
||||
expected_prompt_cost = 150000 * 3e-6 # All tokens at tier 3 rate
|
||||
expected_completion_cost = 2000 * 5e-6 # All tokens at tier 1 rate
|
||||
|
||||
assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
|
||||
assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
|
||||
@@ -140,20 +140,44 @@ class TestDashscopeCostCalculator:
|
||||
)
|
||||
|
||||
# Expected cost calculation:
|
||||
# Regular tokens: 40,000 (32K in tier 1 + 8K in tier 2)
|
||||
# - Tier 1: 32,000 * $1e-6 = $0.032
|
||||
# - Tier 2: 8,000 * $1.8e-6 = $0.0144
|
||||
# Cached tokens: 10,000 in tier 1 at discounted rate
|
||||
# - Tier 1 cached: 10,000 * $1e-7 = $0.001
|
||||
# Total input cost = $0.032 + $0.0144 + $0.001 = $0.0474
|
||||
# Regular tokens: 40,000 falls in tier 2 (32K-128K), so all charged at tier 2 rate
|
||||
# - Regular: 40,000 * $1.8e-6 = $0.072
|
||||
# Cached tokens: 10,000 falls in tier 1 (0-32K), so charged at tier 1 cached rate
|
||||
# - Cached: 10,000 * $1e-7 = $0.001
|
||||
# Total input cost = $0.072 + $0.001 = $0.073
|
||||
|
||||
regular_tokens = 40000
|
||||
cached_tokens = 10000
|
||||
|
||||
expected_regular_cost = (32000 * 1e-6) + (8000 * 1.8e-6)
|
||||
expected_regular_cost = regular_tokens * 1.8e-6 # Tier 2 rate
|
||||
expected_cached_cost = cached_tokens * 1e-7 # Tier 1 cached rate
|
||||
expected_prompt_cost = expected_regular_cost + expected_cached_cost
|
||||
expected_completion_cost = 1000 * 5e-6
|
||||
expected_completion_cost = 1000 * 5e-6 # Tier 1 rate
|
||||
|
||||
assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
|
||||
assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
|
||||
|
||||
def test_tiered_pricing_highest_tier(self):
|
||||
"""Test tiered pricing when tokens exceed highest tier range."""
|
||||
usage = Usage(
|
||||
prompt_tokens=2000000, # Exceeds tier 4 max (1M), should use tier 4 rate
|
||||
completion_tokens=5000,
|
||||
total_tokens=2005000
|
||||
)
|
||||
|
||||
prompt_cost, completion_cost = dashscope_cost_per_token(
|
||||
model="qwen3-coder-plus",
|
||||
usage=usage
|
||||
)
|
||||
|
||||
# Expected cost calculation:
|
||||
# 2,000,000 tokens exceeds tier 4 (256K-1M), so use tier 4 rate for all tokens
|
||||
# Input: 2,000,000 tokens * $6e-6 = $12.0
|
||||
# Output: 5,000 tokens falls in tier 1 (0-32K), so charged at tier 1 rate
|
||||
# Output: 5,000 tokens * $5e-6 = $0.025
|
||||
|
||||
expected_prompt_cost = 2000000 * 6e-6 # Tier 4 rate (highest tier)
|
||||
expected_completion_cost = 5000 * 5e-6 # Tier 1 rate
|
||||
|
||||
assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
|
||||
assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 48 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 48 KiB |
Reference in New Issue
Block a user