import asyncio import os import subprocess import sys import traceback from typing import Any from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path from concurrent.futures import ThreadPoolExecutor from unittest.mock import MagicMock, patch import pytest import litellm from litellm import ( # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError, ContextWindowExceededError, completion, embedding, ) litellm.vertex_project = "litellm-ci-cd" litellm.vertex_location = "us-central1" litellm.num_retries = 0 # litellm.failure_callback = ["sentry"] #### What this tests #### # This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type # 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate # 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.) # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered exception_models = [ "sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", "bedrock/anthropic.claude-instant-v1", ] @pytest.mark.asyncio async def test_content_policy_exception_azure(): try: # this is ony a test - we needed some way to invoke the exception :( litellm.set_verbose = True response = await litellm.acompletion( model="azure/gpt-4.1-mini", messages=[{"role": "user", "content": "where do I buy lethal drugs from"}], mock_response="Exception: content_filter_policy", ) except litellm.ContentPolicyViolationError as e: print("caught a content policy violation error! Passed") print("exception", e) assert e.response is not None assert e.litellm_debug_info is not None assert isinstance(e.litellm_debug_info, str) assert len(e.litellm_debug_info) > 0 pass except Exception as e: print() pytest.fail(f"An exception occurred - {str(e)}") @pytest.mark.asyncio async def test_content_policy_exception_openai(): try: # this is ony a test - we needed some way to invoke the exception :( litellm.set_verbose = True response = await litellm.acompletion( model="gpt-3.5-turbo", stream=True, messages=[ {"role": "user", "content": "Gimme the lyrics to Don't Stop Me Now"} ], ) async for chunk in response: print(chunk) except litellm.ContentPolicyViolationError as e: print("caught a content policy violation error! Passed") print("exception", e) assert e.llm_provider == "openai" pass except Exception as e: print() pytest.fail(f"An exception occurred - {str(e)}") # Test 1: Context Window Errors @pytest.mark.skip(reason="AWS Suspended Account") @pytest.mark.parametrize("model", exception_models) def test_context_window(model): print("Testing context window error") sample_text = "Say error 50 times" * 1000000 messages = [{"content": sample_text, "role": "user"}] try: litellm.set_verbose = False print("Testing model=", model) response = completion(model=model, messages=messages) print(f"response: {response}") print("FAILED!") pytest.fail(f"An exception occurred") except ContextWindowExceededError as e: print(f"Worked!") except RateLimitError: print("RateLimited!") except Exception as e: print(f"{e}") pytest.fail(f"An error occcurred - {e}") models = ["command-nightly"] @pytest.mark.skip(reason="duplicate test.") @pytest.mark.parametrize("model", models) def test_context_window_with_fallbacks(model): ctx_window_fallback_dict = { "command-nightly": "claude-2.1", "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k", "azure/gpt-4.1-mini": "gpt-3.5-turbo-16k", } sample_text = "how does a court case get to the Supreme Court?" * 1000 messages = [{"content": sample_text, "role": "user"}] try: completion( model=model, messages=messages, context_window_fallback_dict=ctx_window_fallback_dict, ) except litellm.ServiceUnavailableError as e: pass except litellm.APIConnectionError as e: pass # for model in litellm.models_by_provider["bedrock"]: # test_context_window(model=model) # test_context_window(model="chat-bison") # test_context_window_with_fallbacks(model="command-nightly") # Test 2: InvalidAuth Errors @pytest.mark.parametrize("model", models) def invalid_auth(model): # set the model key to an invalid key, depending on the model messages = [{"content": "Hello, how are you?", "role": "user"}] temporary_key = None try: if model == "gpt-3.5-turbo" or model == "gpt-3.5-turbo-instruct": temporary_key = os.environ["OPENAI_API_KEY"] os.environ["OPENAI_API_KEY"] = "bad-key" elif "bedrock" in model: temporary_aws_access_key = os.environ["AWS_ACCESS_KEY_ID"] os.environ["AWS_ACCESS_KEY_ID"] = "bad-key" temporary_aws_region_name = os.environ["AWS_REGION_NAME"] os.environ["AWS_REGION_NAME"] = "bad-key" temporary_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"] os.environ["AWS_SECRET_ACCESS_KEY"] = "bad-key" elif model == "azure/gpt-4.1-mini": temporary_key = os.environ["AZURE_AI_API_KEY"] os.environ["AZURE_AI_API_KEY"] = "bad-key" elif model == "claude-3-5-haiku-20241022": temporary_key = os.environ["ANTHROPIC_API_KEY"] os.environ["ANTHROPIC_API_KEY"] = "bad-key" elif model == "command-nightly": temporary_key = os.environ["COHERE_API_KEY"] os.environ["COHERE_API_KEY"] = "bad-key" elif "j2" in model: temporary_key = os.environ["AI21_API_KEY"] os.environ["AI21_API_KEY"] = "bad-key" elif "togethercomputer" in model: temporary_key = os.environ["TOGETHERAI_API_KEY"] os.environ["TOGETHERAI_API_KEY"] = "sk-test-togetherai-key-808" elif model in litellm.openrouter_models: temporary_key = os.environ["OPENROUTER_API_KEY"] os.environ["OPENROUTER_API_KEY"] = "bad-key" elif model in litellm.aleph_alpha_models: temporary_key = os.environ["ALEPH_ALPHA_API_KEY"] os.environ["ALEPH_ALPHA_API_KEY"] = "bad-key" elif model in litellm.nlp_cloud_models: os.environ["NLP_CLOUD_API_KEY"] = "bad-key" elif ( model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" ): temporary_key = os.environ["REPLICATE_API_KEY"] os.environ["REPLICATE_API_KEY"] = "bad-key" print(f"model: {model}") response = completion(model=model, messages=messages) print(f"response: {response}") except AuthenticationError as e: print(f"AuthenticationError Caught Exception - {str(e)}") except ( OpenAIError ) as e: # is at least an openai error -> in case of random model errors - e.g. overloaded server print(f"OpenAIError Caught Exception - {e}") except Exception as e: print(type(e)) print(type(AuthenticationError)) print(e.__class__.__name__) print(f"Uncaught Exception - {e}") pytest.fail(f"Error occurred: {e}") if temporary_key != None: # reset the key if model == "gpt-3.5-turbo": os.environ["OPENAI_API_KEY"] = temporary_key elif model == "chatgpt-test": os.environ["AZURE_AI_API_KEY"] = temporary_key azure = True elif model == "claude-3-5-haiku-20241022": os.environ["ANTHROPIC_API_KEY"] = temporary_key elif model == "command-nightly": os.environ["COHERE_API_KEY"] = temporary_key elif ( model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" ): os.environ["REPLICATE_API_KEY"] = temporary_key elif "j2" in model: os.environ["AI21_API_KEY"] = temporary_key elif "togethercomputer" in model: os.environ["TOGETHERAI_API_KEY"] = temporary_key elif model in litellm.aleph_alpha_models: os.environ["ALEPH_ALPHA_API_KEY"] = temporary_key elif model in litellm.nlp_cloud_models: os.environ.pop("NLP_CLOUD_API_KEY", None) elif "bedrock" in model: os.environ["AWS_ACCESS_KEY_ID"] = temporary_aws_access_key os.environ["AWS_REGION_NAME"] = temporary_aws_region_name os.environ["AWS_SECRET_ACCESS_KEY"] = temporary_secret_key return # for model in litellm.models_by_provider["bedrock"]: # invalid_auth(model=model) # invalid_auth(model="command-nightly") # Test 3: Invalid Request Error @pytest.mark.parametrize("model", models) def test_invalid_request_error(model): messages = [{"content": "hey, how's it going?", "role": "user"}] with pytest.raises(BadRequestError): completion(model=model, messages=messages, max_tokens="hello world") def test_completion_azure_exception(): try: import openai print("azure gpt-3.5 test\n\n") litellm.set_verbose = True ## Test azure call old_azure_key = os.environ["AZURE_AI_API_KEY"] os.environ["AZURE_AI_API_KEY"] = "good morning" response = completion( model="azure/gpt-4.1-mini", messages=[{"role": "user", "content": "hello"}], ) os.environ["AZURE_AI_API_KEY"] = old_azure_key print(f"response: {response}") print(response) except openai.AuthenticationError as e: os.environ["AZURE_AI_API_KEY"] = old_azure_key print("good job got the correct error for azure when key not set") except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_azure_exception() def test_azure_embedding_exceptions(): try: response = litellm.embedding( model="azure/text-embedding-ada-002", input="hello", mock_response="error", ) pytest.fail(f"Bad request this should have failed but got {response}") except Exception as e: print(vars(e)) # CRUCIAL Test - Ensures our exceptions are readable and not overly complicated. some users have complained exceptions will randomly have another exception raised in our exception mapping assert str(e) == "Mock error" async def asynctest_completion_azure_exception(): try: import openai import litellm print("azure gpt-3.5 test\n\n") litellm.set_verbose = True ## Test azure call old_azure_key = os.environ["AZURE_AI_API_KEY"] os.environ["AZURE_AI_API_KEY"] = "good morning" response = await litellm.acompletion( model="azure/gpt-4.1-mini", messages=[{"role": "user", "content": "hello"}], ) print(f"response: {response}") print(response) except openai.AuthenticationError as e: os.environ["AZURE_AI_API_KEY"] = old_azure_key print("good job got the correct error for azure when key not set") print(e) except Exception as e: print("Got wrong exception") print("exception", e) pytest.fail(f"Error occurred: {e}") # import asyncio # asyncio.run( # asynctest_completion_azure_exception() # ) def asynctest_completion_openai_exception_bad_model(): try: import asyncio import openai import litellm print("azure exception bad model\n\n") litellm.set_verbose = True ## Test azure call async def test(): response = await litellm.acompletion( model="openai/gpt-6", messages=[{"role": "user", "content": "hello"}], ) asyncio.run(test()) except openai.NotFoundError: print("Good job this is a NotFoundError for a model that does not exist!") print("Passed") except Exception as e: print("Raised wrong type of exception", type(e)) assert isinstance(e, openai.BadRequestError) pytest.fail(f"Error occurred: {e}") # asynctest_completion_openai_exception_bad_model() def asynctest_completion_azure_exception_bad_model(): try: import asyncio import openai import litellm print("azure exception bad model\n\n") litellm.set_verbose = True ## Test azure call async def test(): response = await litellm.acompletion( model="azure/gpt-12", messages=[{"role": "user", "content": "hello"}], ) asyncio.run(test()) except openai.NotFoundError: print("Good job this is a NotFoundError for a model that does not exist!") print("Passed") except Exception as e: print("Raised wrong type of exception", type(e)) pytest.fail(f"Error occurred: {e}") # asynctest_completion_azure_exception_bad_model() def test_completion_openai_exception(): # test if openai:gpt raises openai.AuthenticationError try: import openai print("openai gpt-3.5 test\n\n") litellm.set_verbose = True ## Test azure call old_azure_key = os.environ["OPENAI_API_KEY"] os.environ["OPENAI_API_KEY"] = "good morning" response = completion( model="gpt-4", messages=[{"role": "user", "content": "hello"}], ) print(f"response: {response}") print(response) except openai.AuthenticationError as e: os.environ["OPENAI_API_KEY"] = old_azure_key print("OpenAI: good job got the correct error for openai when key not set") except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_openai_exception() def test_anthropic_openai_exception(): # test if anthropic raises litellm.AuthenticationError try: litellm.set_verbose = True ## Test azure call old_azure_key = os.environ["ANTHROPIC_API_KEY"] os.environ.pop("ANTHROPIC_API_KEY") response = completion( model="anthropic/claude-3-sonnet-20240229", messages=[{"role": "user", "content": "hello"}], ) print(f"response: {response}") print(response) except litellm.AuthenticationError as e: os.environ["ANTHROPIC_API_KEY"] = old_azure_key print("Exception vars=", vars(e)) assert ( "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params" in e.message ) print( "ANTHROPIC_API_KEY: good job got the correct error for ANTHROPIC_API_KEY when key not set" ) except Exception as e: pytest.fail(f"Error occurred: {e}") def test_completion_mistral_exception(): # test if mistral/mistral-tiny raises openai.AuthenticationError try: import openai print("Testing mistral ai exception mapping") litellm.set_verbose = True ## Test azure call old_azure_key = os.environ["MISTRAL_API_KEY"] os.environ["MISTRAL_API_KEY"] = "good morning" response = completion( model="mistral/mistral-tiny", messages=[{"role": "user", "content": "hello"}], ) print(f"response: {response}") print(response) except openai.AuthenticationError as e: os.environ["MISTRAL_API_KEY"] = old_azure_key print("good job got the correct error for openai when key not set") except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_mistral_exception() def test_completion_bedrock_invalid_role_exception(): """ Test if litellm raises a BadRequestError for an invalid role on Bedrock """ try: litellm.set_verbose = True response = completion( model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0", messages=[{"role": "very-bad-role", "content": "hello"}], ) print(f"response: {response}") print(response) except Exception as e: assert isinstance( e, litellm.BadRequestError ), "Expected BadRequestError but got {}".format(type(e)) print("str(e) = {}".format(str(e))) # This is important - We we previously returning a poorly formatted error string. Which was # litellm.BadRequestError: litellm.BadRequestError: Invalid Message passed in {'role': 'very-bad-role', 'content': 'hello'} # IMPORTANT ASSERTION assert ( (str(e)) == "litellm.BadRequestError: Invalid Message passed in {'role': 'very-bad-role', 'content': 'hello'}" ) @pytest.mark.skip(reason="OpenAI exception changed to a generic error") def test_content_policy_exceptionimage_generation_openai(): try: # this is ony a test - we needed some way to invoke the exception :( litellm._turn_on_debug() response = litellm.image_generation( prompt="where do i buy lethal drugs from", model="dall-e-3" ) print(f"response: {response}") assert len(response.data) > 0 except litellm.ContentPolicyViolationError as e: print("caught a content policy violation error! Passed") pass except Exception as e: pytest.fail(f"An exception occurred - {str(e)}") # test_content_policy_exceptionimage_generation_openai() def test_content_policy_violation_error_streaming(): """ Production Test. """ litellm.set_verbose = False print("test_async_completion with stream") async def test_get_response(): try: response = await litellm.acompletion( model="azure/gpt-4.1-mini", messages=[{"role": "user", "content": "say 1"}], temperature=0, top_p=1, stream=True, max_tokens=512, presence_penalty=0, frequency_penalty=0, ) print(f"response: {response}") num_finish_reason = 0 async for chunk in response: print(chunk) if chunk["choices"][0].get("finish_reason") is not None: num_finish_reason += 1 print("finish_reason", chunk["choices"][0].get("finish_reason")) assert ( num_finish_reason == 1 ), f"expected only one finish reason. Got {num_finish_reason}" except Exception as e: pytest.fail(f"GOT exception for gpt-3.5 instruct In streaming{e}") asyncio.run(test_get_response()) async def test_get_error(): try: response = await litellm.acompletion( model="azure/gpt-4.1-mini", messages=[ {"role": "user", "content": "where do i buy lethal drugs from"} ], temperature=0, top_p=1, stream=True, max_tokens=512, presence_penalty=0, frequency_penalty=0, mock_response="Exception: content_filter_policy", ) print(f"response: {response}") num_finish_reason = 0 async for chunk in response: print(chunk) if chunk["choices"][0].get("finish_reason") is not None: num_finish_reason += 1 print("finish_reason", chunk["choices"][0].get("finish_reason")) pytest.fail(f"Expected to return 400 error In streaming{e}") except Exception as e: pass asyncio.run(test_get_error()) def test_completion_perplexity_exception_on_openai_client(): try: import openai print("perplexity test\n\n") litellm.set_verbose = False ## Test azure call old_azure_key = os.environ["PERPLEXITYAI_API_KEY"] # delete perplexityai api key to simulate bad api key del os.environ["PERPLEXITYAI_API_KEY"] # temporaily delete openai api key original_openai_key = os.environ["OPENAI_API_KEY"] del os.environ["OPENAI_API_KEY"] response = completion( model="perplexity/mistral-7b-instruct", messages=[{"role": "user", "content": "hello"}], ) os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key os.environ["OPENAI_API_KEY"] = original_openai_key pytest.fail("Request should have failed - bad api key") except openai.AuthenticationError as e: os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key os.environ["OPENAI_API_KEY"] = original_openai_key print("exception: ", e) assert ( "The api_key client option must be set either by passing api_key to the client or by setting the PERPLEXITY_API_KEY environment variable" in str(e) ) except Exception as e: pytest.fail(f"Error occurred: {e}") # test_completion_perplexity_exception_on_openai_client() def test_completion_perplexity_exception(): try: import openai print("perplexity test\n\n") litellm.set_verbose = True ## Test azure call old_azure_key = os.environ["PERPLEXITYAI_API_KEY"] os.environ["PERPLEXITYAI_API_KEY"] = "good morning" response = completion( model="perplexity/mistral-7b-instruct", messages=[{"role": "user", "content": "hello"}], ) os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key pytest.fail("Request should have failed - bad api key") except openai.AuthenticationError as e: os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key print("exception: ", e) assert "PerplexityException" in str(e) except Exception as e: pytest.fail(f"Error occurred: {e}") def test_completion_openai_api_key_exception(): try: import openai print("gpt-3.5 test\n\n") litellm.set_verbose = True ## Test azure call old_azure_key = os.environ["OPENAI_API_KEY"] os.environ["OPENAI_API_KEY"] = "good morning" response = completion( model="gpt-3.5-turbo", messages=[{"role": "user", "content": "hello"}], ) os.environ["OPENAI_API_KEY"] = old_azure_key pytest.fail("Request should have failed - bad api key") except openai.AuthenticationError as e: os.environ["OPENAI_API_KEY"] = old_azure_key print("exception: ", e) assert "OpenAIException" in str(e) except Exception as e: pytest.fail(f"Error occurred: {e}") # tesy_async_acompletion() def test_router_completion_vertex_exception(): try: import litellm litellm.set_verbose = True router = litellm.Router( model_list=[ { "model_name": "vertex-gemini-pro", "litellm_params": { "model": "vertex_ai/gemini-pro", "api_key": "good-morning", }, }, ] ) response = router.completion( model="vertex-gemini-pro", messages=[{"role": "user", "content": "hello"}], vertex_project="bad-project", ) pytest.fail("Request should have failed - bad api key") except Exception as e: print("exception: ", e) def test_litellm_completion_vertex_exception(): try: import litellm litellm.set_verbose = True response = completion( model="vertex_ai/gemini-pro", api_key="good-morning", messages=[{"role": "user", "content": "hello"}], vertex_project="bad-project", ) pytest.fail("Request should have failed - bad api key") except Exception as e: print("exception: ", e) def test_litellm_predibase_exception(): """ Test - Assert that the Predibase API Key is not returned on Authentication Errors """ try: import litellm litellm.set_verbose = True response = completion( model="predibase/llama-3-8b-instruct", messages=[{"role": "user", "content": "What is the meaning of life?"}], tenant_id="c4768f95", api_key="hf-rawapikey", ) pytest.fail("Request should have failed - bad api key") except Exception as e: assert "hf-rawapikey" not in str(e) print("exception: ", e) # # test_invalid_request_error(model="command-nightly") # # Test 3: Rate Limit Errors # def test_model_call(model): # try: # sample_text = "how does a court case get to the Supreme Court?" # messages = [{ "content": sample_text,"role": "user"}] # print(f"model: {model}") # response = completion(model=model, messages=messages) # except RateLimitError as e: # print(f"headers: {e.response.headers}") # return True # # except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server # # return True # except Exception as e: # print(f"Uncaught Exception {model}: {type(e).__name__} - {e}") # traceback.print_exc() # pass # return False # # Repeat each model 500 times # # extended_models = [model for model in models for _ in range(250)] # extended_models = ["azure/gpt-4.1-mini" for _ in range(250)] # def worker(model): # return test_model_call(model) # # Create a dictionary to store the results # counts = {True: 0, False: 0} # # Use Thread Pool Executor # with ThreadPoolExecutor(max_workers=500) as executor: # # Use map to start the operation in thread pool # results = executor.map(worker, extended_models) # # Iterate over results and count True/False # for result in results: # counts[result] += 1 # accuracy_score = counts[True]/(counts[True] + counts[False]) # print(f"accuracy_score: {accuracy_score}") @pytest.mark.parametrize( "provider", [ "predibase", "vertex_ai_beta", "anthropic", "databricks", "watsonx", "fireworks_ai", ], ) def test_exception_mapping(provider): """ For predibase, run through a set of mock exceptions assert that they are being mapped correctly """ litellm.set_verbose = True error_map = { 400: litellm.BadRequestError, 401: litellm.AuthenticationError, 404: litellm.NotFoundError, 408: litellm.Timeout, 429: litellm.RateLimitError, 500: litellm.InternalServerError, 503: litellm.ServiceUnavailableError, } for code, expected_exception in error_map.items(): mock_response = Exception() setattr(mock_response, "text", "This is an error message") setattr(mock_response, "llm_provider", provider) setattr(mock_response, "status_code", code) response: Any = None try: response = completion( model="{}/test-model".format(provider), messages=[{"role": "user", "content": "Hey, how's it going?"}], mock_response=mock_response, ) except expected_exception: continue except Exception as e: traceback.print_exc() response = "{}".format(str(e)) pytest.fail( "Did not raise expected exception. Expected={}, Return={},".format( expected_exception, response ) ) pass def test_fireworks_ai_exception_mapping(): """ Comprehensive test for Fireworks AI exception mapping, including: 1. Standard 429 rate limit errors 2. Text-based rate limit detection (the main issue fixed) 3. Generic 400 errors that should NOT be rate limits 4. ExceptionCheckers utility function Related to: https://github.com/BerriAI/litellm/pull/11455 Based on Fireworks AI documentation: https://docs.fireworks.ai/tools-sdks/python-client/api-reference """ import litellm from litellm.llms.fireworks_ai.common_utils import FireworksAIException from litellm.litellm_core_utils.exception_mapping_utils import ExceptionCheckers # Test scenarios covering all important cases test_scenarios = [ { "name": "Standard 429 rate limit with proper status code", "status_code": 429, "message": "Rate limit exceeded. Please try again in 60 seconds.", "expected_exception": litellm.RateLimitError, }, { "name": "Status 400 with rate limit text (the main issue fixed)", "status_code": 400, "message": '{"error":{"object":"error","type":"invalid_request_error","message":"rate limit exceeded, please try again later"}}', "expected_exception": litellm.RateLimitError, }, { "name": "Status 400 with generic invalid request (should NOT be rate limit)", "status_code": 400, "message": '{"error":{"type":"invalid_request_error","message":"Invalid parameter value"}}', "expected_exception": litellm.BadRequestError, }, ] # Test each scenario for scenario in test_scenarios: mock_exception = FireworksAIException( status_code=scenario["status_code"], message=scenario["message"], headers={} ) try: response = litellm.completion( model="fireworks_ai/llama-v3p1-70b-instruct", messages=[{"role": "user", "content": "Hello"}], mock_response=mock_exception, ) pytest.fail( f"Expected {scenario['expected_exception'].__name__} to be raised" ) except scenario["expected_exception"] as e: if scenario["expected_exception"] == litellm.RateLimitError: assert "rate limit" in str(e).lower() or "429" in str(e) except Exception as e: pytest.fail( f"Expected {scenario['expected_exception'].__name__} but got {type(e).__name__}: {e}" ) # Test ExceptionCheckers.is_error_str_rate_limit() method directly # Test cases that should return True (rate limit detected) rate_limit_strings = [ "429 rate limit exceeded", "Rate limit exceeded, please try again later", "RATE LIMIT ERROR", "Error 429: rate limit", '{"error":{"type":"invalid_request_error","message":"rate limit exceeded, please try again later"}}', "HTTP 429 Too Many Requests", ] for error_str in rate_limit_strings: assert ExceptionCheckers.is_error_str_rate_limit( error_str ), f"Should detect rate limit in: {error_str}" # Test cases that should return False (not rate limit) non_rate_limit_strings = [ "400 Bad Request", "Authentication failed", "Invalid model specified", "Context window exceeded", "Internal server error", "", "Some other error message", ] for error_str in non_rate_limit_strings: assert not ExceptionCheckers.is_error_str_rate_limit( error_str ), f"Should NOT detect rate limit in: {error_str}" # Test edge cases assert not ExceptionCheckers.is_error_str_rate_limit(None) # type: ignore assert not ExceptionCheckers.is_error_str_rate_limit(42) # type: ignore def test_anthropic_tool_calling_exception(): """ Related - https://github.com/BerriAI/litellm/issues/4348 """ tools = [ { "type": "function", "function": { "name": "get_current_weather", "description": "Get the current weather in a given location", "parameters": {}, }, } ] try: litellm.completion( model="claude-haiku-4-5-20251001", messages=[{"role": "user", "content": "Hey, how's it going?"}], tools=tools, ) except litellm.BadRequestError: pass from typing import Optional, Union from openai import AsyncOpenAI, OpenAI def _pre_call_utils( call_type: str, data: dict, client: Union[OpenAI, AsyncOpenAI], sync_mode: bool, streaming: Optional[bool], ): if call_type == "embedding": data["input"] = "Hello world!" mapped_target: Any = client.embeddings.with_raw_response if sync_mode: original_function = litellm.embedding else: original_function = litellm.aembedding elif call_type == "chat_completion": data["messages"] = [{"role": "user", "content": "Hello world"}] if streaming is True: data["stream"] = True mapped_target = client.chat.completions.with_raw_response # type: ignore if sync_mode: original_function = litellm.completion else: original_function = litellm.acompletion elif call_type == "completion": data["prompt"] = "Hello world" if streaming is True: data["stream"] = True mapped_target = client.completions.with_raw_response # type: ignore if sync_mode: original_function = litellm.text_completion else: original_function = litellm.atext_completion return data, original_function, mapped_target def _pre_call_utils_httpx( call_type: str, data: dict, client: Union[HTTPHandler, AsyncHTTPHandler], sync_mode: bool, streaming: Optional[bool], ): mapped_target: Any = client.client if call_type == "embedding": data["input"] = "Hello world!" if sync_mode: original_function = litellm.embedding else: original_function = litellm.aembedding elif call_type == "chat_completion": data["messages"] = [{"role": "user", "content": "Hello world"}] if streaming is True: data["stream"] = True if sync_mode: original_function = litellm.completion else: original_function = litellm.acompletion elif call_type == "completion": data["prompt"] = "Hello world" if streaming is True: data["stream"] = True if sync_mode: original_function = litellm.text_completion else: original_function = litellm.atext_completion return data, original_function, mapped_target @pytest.mark.parametrize( "sync_mode", [True, False], ) @pytest.mark.parametrize( "provider, model, call_type, streaming", [ ("openai", "text-embedding-ada-002", "embedding", None), ("openai", "gpt-3.5-turbo", "chat_completion", False), ("openai", "gpt-3.5-turbo", "chat_completion", True), ("openai", "gpt-3.5-turbo-instruct", "completion", True), ("azure", "azure/gpt-4.1-mini", "chat_completion", True), ("azure", "azure/text-embedding-ada-002", "embedding", True), ("azure", "azure_text/gpt-3.5-turbo-instruct", "completion", True), ], ) @pytest.mark.asyncio async def test_exception_with_headers(sync_mode, provider, model, call_type, streaming): """ User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds" but Azure says to retry in at most 9s ``` {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"} ``` """ print(f"Received args: {locals()}") import openai if sync_mode: if provider == "openai": openai_client = openai.OpenAI(api_key="") elif provider == "azure": openai_client = openai.AzureOpenAI( api_key="", base_url="", api_version=litellm.AZURE_DEFAULT_API_VERSION ) else: if provider == "openai": openai_client = openai.AsyncOpenAI(api_key="") elif provider == "azure": openai_client = openai.AsyncAzureOpenAI( api_key="", base_url="", api_version=litellm.AZURE_DEFAULT_API_VERSION ) data = {"model": model} data, original_function, mapped_target = _pre_call_utils( call_type=call_type, data=data, client=openai_client, sync_mode=sync_mode, streaming=streaming, ) cooldown_time = 30.0 def _return_exception(*args, **kwargs): import datetime from httpx import Headers, Request, Response kwargs = { "request": Request("POST", "https://www.google.com"), "message": "Error code: 429 - Rate Limit Error!", "body": {"detail": "Rate Limit Error!"}, "code": None, "param": None, "type": None, "response": Response( status_code=429, headers=Headers( { "date": "Sat, 21 Sep 2024 22:56:53 GMT", "server": "uvicorn", "retry-after": "30", "content-length": "30", "content-type": "application/json", } ), request=Request("POST", "http://0.0.0.0:9000/chat/completions"), ), "status_code": 429, "request_id": None, } exception = Exception() for k, v in kwargs.items(): setattr(exception, k, v) raise exception with patch.object( mapped_target, "create", side_effect=_return_exception, ): new_retry_after_mock_client = MagicMock(return_value=-1) litellm.utils._get_retry_after_from_exception_header = ( new_retry_after_mock_client ) exception_raised = False try: if sync_mode: resp = original_function(**data, client=openai_client) if streaming: for chunk in resp: continue else: resp = await original_function(**data, client=openai_client) if streaming: async for chunk in resp: continue except litellm.RateLimitError as e: exception_raised = True assert e.litellm_response_headers is not None assert int(e.litellm_response_headers["retry-after"]) == cooldown_time if exception_raised is False: print(resp) assert exception_raised def test_openai_gateway_timeout_error(): """ Test that the OpenAI gateway timeout error is raised """ openai_client = OpenAI() mapped_target = openai_client.chat.completions.with_raw_response # type: ignore def _return_exception(*args, **kwargs): import datetime from httpx import Headers, Request, Response kwargs = { "request": Request("POST", "https://www.google.com"), "message": "Error code: 504 - Gateway Timeout Error!", "body": {"detail": "Gateway Timeout Error!"}, "code": None, "param": None, "type": None, "response": Response( status_code=504, headers=Headers( { "date": "Sat, 21 Sep 2024 22:56:53 GMT", "server": "uvicorn", "content-length": "30", "content-type": "application/json", } ), request=Request("POST", "http://0.0.0.0:9000/chat/completions"), ), "status_code": 504, "request_id": None, } exception = Exception() for k, v in kwargs.items(): setattr(exception, k, v) raise exception try: with patch.object( mapped_target, "create", side_effect=_return_exception, ): litellm.completion( model="openai/gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}], client=openai_client, ) pytest.fail("Expected to raise Timeout") except litellm.Timeout as e: assert e.status_code == 504 @pytest.mark.parametrize( "sync_mode", [True, False], ) @pytest.mark.parametrize("streaming", [True, False]) @pytest.mark.parametrize( "provider, model, call_type", [ ("anthropic", "claude-3-haiku-20240307", "chat_completion"), ], ) @pytest.mark.asyncio async def test_exception_with_headers_httpx( sync_mode, provider, model, call_type, streaming ): """ User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds" but Azure says to retry in at most 9s ``` {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"} ``` """ print(f"Received args: {locals()}") import openai if sync_mode: client = HTTPHandler() else: client = AsyncHTTPHandler() data = {"model": model} data, original_function, mapped_target = _pre_call_utils_httpx( call_type=call_type, data=data, client=client, sync_mode=sync_mode, streaming=streaming, ) cooldown_time = 30.0 def _return_exception(*args, **kwargs): import datetime from httpx import Headers, HTTPStatusError, Request, Response # Create the Request object request = Request("POST", "http://0.0.0.0:9000/chat/completions") # Create the Response object with the necessary headers and status code response = Response( status_code=429, headers=Headers( { "date": "Sat, 21 Sep 2024 22:56:53 GMT", "server": "uvicorn", "retry-after": "30", "content-length": "30", "content-type": "application/json", } ), request=request, ) # Create and raise the HTTPStatusError exception raise HTTPStatusError( message="Error code: 429 - Rate Limit Error!", request=request, response=response, ) with patch.object( mapped_target, "send", side_effect=_return_exception, ): new_retry_after_mock_client = MagicMock(return_value=-1) litellm.utils._get_retry_after_from_exception_header = ( new_retry_after_mock_client ) exception_raised = False try: if sync_mode: resp = original_function(**data, client=client) if streaming: for chunk in resp: continue else: resp = await original_function(**data, client=client) if streaming: async for chunk in resp: continue except litellm.RateLimitError as e: exception_raised = True assert ( e.litellm_response_headers is not None ), "litellm_response_headers is None" print("e.litellm_response_headers", e.litellm_response_headers) assert int(e.litellm_response_headers["retry-after"]) == cooldown_time if exception_raised is False: print(resp) assert exception_raised @pytest.mark.asyncio @pytest.mark.parametrize("model", ["azure/gpt-4.1-mini", "openai/gpt-3.5-turbo"]) async def test_bad_request_error_contains_httpx_response(model): """ Test that the BadRequestError contains the httpx response Relevant issue: https://github.com/BerriAI/litellm/issues/6732 """ try: await litellm.acompletion( model=model, messages=[{"role": "user", "content": "Hello world"}], bad_arg="bad_arg", ) pytest.fail("Expected to raise BadRequestError") except litellm.BadRequestError as e: print("e.response", e.response) print("vars(e.response)", vars(e.response)) assert e.response is not None def test_exceptions_base_class(): try: raise litellm.RateLimitError( message="BedrockException: Rate Limit Error", model="model", llm_provider="bedrock", ) except litellm.RateLimitError as e: assert isinstance(e, litellm.RateLimitError) assert e.code == "429" assert e.type == "throttling_error" def test_context_window_exceeded_error_from_litellm_proxy(): from httpx import Response from litellm.litellm_core_utils.exception_mapping_utils import ( extract_and_raise_litellm_exception, ) args = { "response": Response(status_code=400, text="Bad Request"), "error_str": "Error code: 400 - {'error': {'message': \"litellm.ContextWindowExceededError: litellm.BadRequestError: this is a mock context window exceeded error\\nmodel=gpt-3.5-turbo. context_window_fallbacks=None. fallbacks=None.\\n\\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks\\nReceived Model Group=gpt-3.5-turbo\\nAvailable Model Group Fallbacks=None\", 'type': None, 'param': None, 'code': '400'}}", "model": "gpt-3.5-turbo", "custom_llm_provider": "litellm_proxy", } with pytest.raises(litellm.ContextWindowExceededError): extract_and_raise_litellm_exception(**args) def test_bad_request_error_with_response_without_request(): """ Test that BadRequestError handles Response objects without a request attribute. This simulates a real scenario where a Response is created without a request (e.g., in tests or when manually creating error responses), and we need to ensure it doesn't raise RuntimeError when the exception is created. """ from httpx import Response from litellm.litellm_core_utils.exception_mapping_utils import ( extract_and_raise_litellm_exception, ) # Create a Response without a request (simulates the scenario that was failing) response_without_request = Response(status_code=400, text="Bad Request") # Test that extract_and_raise_litellm_exception can handle this args = { "response": response_without_request, "error_str": "Error code: 400 - {'error': {'message': 'litellm.BadRequestError: Invalid request parameters', 'type': None, 'param': None, 'code': '400'}}", "model": "gpt-3.5-turbo", "custom_llm_provider": "openai", } # This should raise BadRequestError without RuntimeError with pytest.raises(litellm.BadRequestError) as exc_info: extract_and_raise_litellm_exception(**args) # Verify the exception was created successfully error = exc_info.value assert error is not None assert error.model == "gpt-3.5-turbo" assert error.llm_provider == "openai" # Verify the exception has a response (should be minimal error response) assert error.response is not None # The response should have a request (minimal error response has one) assert getattr(error.response, "_request", None) is not None # Should be able to access request property without RuntimeError assert error.response.request is not None @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.parametrize("stream_mode", [True, False]) @pytest.mark.parametrize("model", ["gpt-4.1-nano"]) # "gpt-4o-mini", @pytest.mark.asyncio async def test_exception_bubbling_up(sync_mode, stream_mode, model): """ make sure code, param, and type are bubbled up """ import litellm litellm.set_verbose = True with pytest.raises(Exception) as exc_info: if sync_mode: litellm.completion( model=model, messages=[{"role": "usera", "content": "hi"}], stream=stream_mode, sync_stream=sync_mode, ) else: await litellm.acompletion( model=model, messages=[{"role": "usera", "content": "hi"}], stream=stream_mode, sync_stream=sync_mode, ) assert exc_info.value.code == "invalid_value" assert exc_info.value.param is not None assert exc_info.value.type == "invalid_request_error"