diff --git a/litellm/litellm_core_utils/token_counter.py b/litellm/litellm_core_utils/token_counter.py index 4df944edba..fab2c1e76e 100644 --- a/litellm/litellm_core_utils/token_counter.py +++ b/litellm/litellm_core_utils/token_counter.py @@ -529,7 +529,7 @@ def _get_count_function( encoding = tiktoken.get_encoding("cl100k_base") def count_tokens(text: str) -> int: - return len(encoding.encode(text)) + return len(encoding.encode(text, disallowed_special=())) else: raise ValueError("Unsupported tokenizer type") diff --git a/tests/test_litellm/litellm_core_utils/test_token_counter.py b/tests/test_litellm/litellm_core_utils/test_token_counter.py index 71ee367bde..5d17ea3dc3 100644 --- a/tests/test_litellm/litellm_core_utils/test_token_counter.py +++ b/tests/test_litellm/litellm_core_utils/test_token_counter.py @@ -451,6 +451,7 @@ def test_img_url_token_counter(img_url): def test_token_encode_disallowed_special(): encode(model="gpt-3.5-turbo", text="Hello, world! <|endoftext|>") + token_counter(model="gpt-3.5-turbo", text="Hello, world! <|endoftext|>") def test_token_counter():