[Fixes] Using Qwen API Tiered Pricing (#14479)

* fix: use dashscope cost calc * add qwen logo
2026-07-04 21:08:09 +00:00 · 2025-09-11 20:07:41 -07:00
parent 1e230e87e3
commit 32d87c242b
7 changed files with 63 additions and 37 deletions
@@ -344,6 +344,11 @@ def cost_per_token(  # noqa: PLR0915
        return perplexity_cost_per_token(model=model, usage=usage_block)
    elif custom_llm_provider == "xai":
        return xai_cost_per_token(model=model, usage=usage_block)
+    elif custom_llm_provider == "dashscope":
+        from litellm.llms.dashscope.cost_calculator import (
+            cost_per_token as dashscope_cost_per_token,
+        )
+        return dashscope_cost_per_token(model=model, usage=usage_block)
    else:
        model_info = _cached_get_model_info_helper(
            model=model, custom_llm_provider=custom_llm_provider
@@ -45,36 +45,33 @@ def _calculate_tiered_cost(
    cost_key: str,
    fallback_cost_key: Optional[str] = None
 ) -> float:
-    """Calculate cost using tiered pricing structure."""
+    """Calculate cost using tiered pricing structure.
+    
+    Finds the appropriate tier based on token count and applies that tier's rate to all tokens.
+    """
    if not tiered_pricing or tokens <= 0:
        return 0.0
    
-    total_cost = 0.0
-    tokens_processed = 0
-    
+    # Find the appropriate tier for the token count
    for tier in tiered_pricing:
-        if tokens_processed >= tokens:
-            break
-            
        tier_range = tier.get("range", [])
        if len(tier_range) != 2:
            continue
            
        range_start, range_end = tier_range
        
-        if tokens <= range_start:
-            break
-            
-        tier_start = max(range_start, tokens_processed)
-        tier_end = min(range_end, tokens)
-        
-        if tier_end > tier_start:
-            tokens_in_tier = tier_end - tier_start
+        # Check if tokens fall within this tier's range
+        if range_start <= tokens <= range_end:
            cost_per_token = tier.get(cost_key) or tier.get(fallback_cost_key, 0)
-            total_cost += tokens_in_tier * cost_per_token
-            tokens_processed = tier_end
+            return tokens * cost_per_token
    
-    return total_cost
+    # If no tier matches, use the last tier (highest tier)
+    if tiered_pricing:
+        last_tier = tiered_pricing[-1]
+        cost_per_token = last_tier.get(cost_key) or last_tier.get(fallback_cost_key, 0)
+        return tokens * cost_per_token
+    
+    return 0.0


 def _calculate_flat_cost(tokens: int, cost_per_token: float) -> float:
@@ -9,9 +9,9 @@ model_list:
  - model_name: openai/*
    litellm_params:
      model: openai/*
-  - model_name: gemini/*
+  - model_name: dashscope/*
    litellm_params:
-      model: gemini/*
+      model: dashscope/*


 litellm_settings:
@@ -96,10 +96,10 @@ class TestDashscopeCostCalculator:
        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)

-    def test_tiered_pricing_multiple_tiers(self):
-        """Test tiered pricing when tokens span multiple tiers."""
+    def test_tiered_pricing_higher_tier(self):
+        """Test tiered pricing when tokens fall in higher tier (tier 3)."""
        usage = Usage(
-            prompt_tokens=150000,  # Spans tiers 1 (0-32K), 2 (32K-128K), 3 (128K-256K)
+            prompt_tokens=150000,  # Falls in tier 3 (128K-256K)
            completion_tokens=2000,
            total_tokens=152000
        )
@@ -110,13 +110,13 @@ class TestDashscopeCostCalculator:
        )
        
        # Expected input cost calculation:
-        # Tier 1 (0-32K): 32,000 tokens * $1e-6 = $0.032
-        # Tier 2 (32K-128K): 96,000 tokens * $1.8e-6 = $0.1728
-        # Tier 3 (128K-256K): 22,000 tokens * $3e-6 = $0.066
-        # Total input cost = $0.032 + $0.1728 + $0.066 = $0.2708
+        # 150,000 tokens falls in tier 3 (128K-256K), so all tokens are charged at tier 3 rate
+        # Input: 150,000 tokens * $3e-6 = $0.45
+        # Output: 2,000 tokens falls in tier 1 (0-32K), so charged at tier 1 rate
+        # Output: 2,000 tokens * $5e-6 = $0.01
        
-        expected_prompt_cost = (32000 * 1e-6) + (96000 * 1.8e-6) + (22000 * 3e-6)
-        expected_completion_cost = 2000 * 5e-6  # All in tier 1 for output
+        expected_prompt_cost = 150000 * 3e-6  # All tokens at tier 3 rate
+        expected_completion_cost = 2000 * 5e-6  # All tokens at tier 1 rate
        
        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
@@ -140,20 +140,44 @@ class TestDashscopeCostCalculator:
        )
        
        # Expected cost calculation:
-        # Regular tokens: 40,000 (32K in tier 1 + 8K in tier 2)
-        # - Tier 1: 32,000 * $1e-6 = $0.032
-        # - Tier 2: 8,000 * $1.8e-6 = $0.0144
-        # Cached tokens: 10,000 in tier 1 at discounted rate
-        # - Tier 1 cached: 10,000 * $1e-7 = $0.001
-        # Total input cost = $0.032 + $0.0144 + $0.001 = $0.0474
+        # Regular tokens: 40,000 falls in tier 2 (32K-128K), so all charged at tier 2 rate
+        # - Regular: 40,000 * $1.8e-6 = $0.072
+        # Cached tokens: 10,000 falls in tier 1 (0-32K), so charged at tier 1 cached rate
+        # - Cached: 10,000 * $1e-7 = $0.001
+        # Total input cost = $0.072 + $0.001 = $0.073
        
        regular_tokens = 40000
        cached_tokens = 10000
        
-        expected_regular_cost = (32000 * 1e-6) + (8000 * 1.8e-6)
+        expected_regular_cost = regular_tokens * 1.8e-6  # Tier 2 rate
        expected_cached_cost = cached_tokens * 1e-7  # Tier 1 cached rate
        expected_prompt_cost = expected_regular_cost + expected_cached_cost
-        expected_completion_cost = 1000 * 5e-6
+        expected_completion_cost = 1000 * 5e-6  # Tier 1 rate
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
+        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)
+
+    def test_tiered_pricing_highest_tier(self):
+        """Test tiered pricing when tokens exceed highest tier range."""
+        usage = Usage(
+            prompt_tokens=2000000,  # Exceeds tier 4 max (1M), should use tier 4 rate
+            completion_tokens=5000,
+            total_tokens=2005000
+        )
+        
+        prompt_cost, completion_cost = dashscope_cost_per_token(
+            model="qwen3-coder-plus", 
+            usage=usage
+        )
+        
+        # Expected cost calculation:
+        # 2,000,000 tokens exceeds tier 4 (256K-1M), so use tier 4 rate for all tokens
+        # Input: 2,000,000 tokens * $6e-6 = $12.0
+        # Output: 5,000 tokens falls in tier 1 (0-32K), so charged at tier 1 rate
+        # Output: 5,000 tokens * $5e-6 = $0.025
+        
+        expected_prompt_cost = 2000000 * 6e-6  # Tier 4 rate (highest tier)
+        expected_completion_cost = 5000 * 5e-6  # Tier 1 rate
        
        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-10)
        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-10)