[Fixes] Using Qwen API Tiered Pricing (#14479)

* fix: use dashscope cost calc

* add qwen logo
This commit is contained in:
Ishaan Jaff
2025-09-11 20:07:41 -07:00
committed by GitHub
parent 1e230e87e3
commit 32d87c242b
7 changed files with 63 additions and 37 deletions
+5
View File
@@ -344,6 +344,11 @@ def cost_per_token( # noqa: PLR0915
return perplexity_cost_per_token(model=model, usage=usage_block)
elif custom_llm_provider == "xai":
return xai_cost_per_token(model=model, usage=usage_block)
elif custom_llm_provider == "dashscope":
from litellm.llms.dashscope.cost_calculator import (
cost_per_token as dashscope_cost_per_token,
)
return dashscope_cost_per_token(model=model, usage=usage_block)
else:
model_info = _cached_get_model_info_helper(
model=model, custom_llm_provider=custom_llm_provider
+15 -18
View File
@@ -45,36 +45,33 @@ def _calculate_tiered_cost(
cost_key: str,
fallback_cost_key: Optional[str] = None
) -> float:
"""Calculate cost using tiered pricing structure."""
"""Calculate cost using tiered pricing structure.
Finds the appropriate tier based on token count and applies that tier's rate to all tokens.
"""
if not tiered_pricing or tokens <= 0:
return 0.0
total_cost = 0.0
tokens_processed = 0
# Find the appropriate tier for the token count
for tier in tiered_pricing:
if tokens_processed >= tokens:
break
tier_range = tier.get("range", [])
if len(tier_range) != 2:
continue
range_start, range_end = tier_range
if tokens <= range_start:
break
tier_start = max(range_start, tokens_processed)
tier_end = min(range_end, tokens)
if tier_end > tier_start:
tokens_in_tier = tier_end - tier_start
# Check if tokens fall within this tier's range
if range_start <= tokens <= range_end:
cost_per_token = tier.get(cost_key) or tier.get(fallback_cost_key, 0)
total_cost += tokens_in_tier * cost_per_token
tokens_processed = tier_end
return tokens * cost_per_token
return total_cost
# If no tier matches, use the last tier (highest tier)
if tiered_pricing:
last_tier = tiered_pricing[-1]
cost_per_token = last_tier.get(cost_key) or last_tier.get(fallback_cost_key, 0)
return tokens * cost_per_token
return 0.0
def _calculate_flat_cost(tokens: int, cost_per_token: float) -> float:
Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

+2 -2
View File
@@ -9,9 +9,9 @@ model_list:
- model_name: openai/*
litellm_params:
model: openai/*
- model_name: gemini/*
- model_name: dashscope/*
litellm_params:
model: gemini/*
model: dashscope/*
litellm_settings:
@@ -96,10 +96,10 @@ class TestDashscopeCostCalculator:
assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
def test_tiered_pricing_multiple_tiers(self):
"""Test tiered pricing when tokens span multiple tiers."""
def test_tiered_pricing_higher_tier(self):
"""Test tiered pricing when tokens fall in higher tier (tier 3)."""
usage = Usage(
prompt_tokens=150000, # Spans tiers 1 (0-32K), 2 (32K-128K), 3 (128K-256K)
prompt_tokens=150000, # Falls in tier 3 (128K-256K)
completion_tokens=2000,
total_tokens=152000
)
@@ -110,13 +110,13 @@ class TestDashscopeCostCalculator:
)
# Expected input cost calculation:
# Tier 1 (0-32K): 32,000 tokens * $1e-6 = $0.032
# Tier 2 (32K-128K): 96,000 tokens * $1.8e-6 = $0.1728
# Tier 3 (128K-256K): 22,000 tokens * $3e-6 = $0.066
# Total input cost = $0.032 + $0.1728 + $0.066 = $0.2708
# 150,000 tokens falls in tier 3 (128K-256K), so all tokens are charged at tier 3 rate
# Input: 150,000 tokens * $3e-6 = $0.45
# Output: 2,000 tokens falls in tier 1 (0-32K), so charged at tier 1 rate
# Output: 2,000 tokens * $5e-6 = $0.01
expected_prompt_cost = (32000 * 1e-6) + (96000 * 1.8e-6) + (22000 * 3e-6)
expected_completion_cost = 2000 * 5e-6 # All in tier 1 for output
expected_prompt_cost = 150000 * 3e-6 # All tokens at tier 3 rate
expected_completion_cost = 2000 * 5e-6 # All tokens at tier 1 rate
assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
@@ -140,20 +140,44 @@ class TestDashscopeCostCalculator:
)
# Expected cost calculation:
# Regular tokens: 40,000 (32K in tier 1 + 8K in tier 2)
# - Tier 1: 32,000 * $1e-6 = $0.032
# - Tier 2: 8,000 * $1.8e-6 = $0.0144
# Cached tokens: 10,000 in tier 1 at discounted rate
# - Tier 1 cached: 10,000 * $1e-7 = $0.001
# Total input cost = $0.032 + $0.0144 + $0.001 = $0.0474
# Regular tokens: 40,000 falls in tier 2 (32K-128K), so all charged at tier 2 rate
# - Regular: 40,000 * $1.8e-6 = $0.072
# Cached tokens: 10,000 falls in tier 1 (0-32K), so charged at tier 1 cached rate
# - Cached: 10,000 * $1e-7 = $0.001
# Total input cost = $0.072 + $0.001 = $0.073
regular_tokens = 40000
cached_tokens = 10000
expected_regular_cost = (32000 * 1e-6) + (8000 * 1.8e-6)
expected_regular_cost = regular_tokens * 1.8e-6 # Tier 2 rate
expected_cached_cost = cached_tokens * 1e-7 # Tier 1 cached rate
expected_prompt_cost = expected_regular_cost + expected_cached_cost
expected_completion_cost = 1000 * 5e-6
expected_completion_cost = 1000 * 5e-6 # Tier 1 rate
assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
def test_tiered_pricing_highest_tier(self):
"""Test tiered pricing when tokens exceed highest tier range."""
usage = Usage(
prompt_tokens=2000000, # Exceeds tier 4 max (1M), should use tier 4 rate
completion_tokens=5000,
total_tokens=2005000
)
prompt_cost, completion_cost = dashscope_cost_per_token(
model="qwen3-coder-plus",
usage=usage
)
# Expected cost calculation:
# 2,000,000 tokens exceeds tier 4 (256K-1M), so use tier 4 rate for all tokens
# Input: 2,000,000 tokens * $6e-6 = $12.0
# Output: 5,000 tokens falls in tier 1 (0-32K), so charged at tier 1 rate
# Output: 5,000 tokens * $5e-6 = $0.025
expected_prompt_cost = 2000000 * 6e-6 # Tier 4 rate (highest tier)
expected_completion_cost = 5000 * 5e-6 # Tier 1 rate
assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB