From 4fdeff8e1a71fdacc444dc35cfbef882fa25fe2e Mon Sep 17 00:00:00 2001 From: Yikai Zhao Date: Thu, 7 Aug 2025 22:58:07 +0800 Subject: [PATCH] Fix token_counter with special token input --- litellm/litellm_core_utils/token_counter.py | 2 +- tests/test_litellm/litellm_core_utils/test_token_counter.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/litellm_core_utils/token_counter.py b/litellm/litellm_core_utils/token_counter.py index 4df944edba..fab2c1e76e 100644 --- a/litellm/litellm_core_utils/token_counter.py +++ b/litellm/litellm_core_utils/token_counter.py @@ -529,7 +529,7 @@ def _get_count_function( encoding = tiktoken.get_encoding("cl100k_base") def count_tokens(text: str) -> int: - return len(encoding.encode(text)) + return len(encoding.encode(text, disallowed_special=())) else: raise ValueError("Unsupported tokenizer type") diff --git a/tests/test_litellm/litellm_core_utils/test_token_counter.py b/tests/test_litellm/litellm_core_utils/test_token_counter.py index 71ee367bde..5d17ea3dc3 100644 --- a/tests/test_litellm/litellm_core_utils/test_token_counter.py +++ b/tests/test_litellm/litellm_core_utils/test_token_counter.py @@ -451,6 +451,7 @@ def test_img_url_token_counter(img_url): def test_token_encode_disallowed_special(): encode(model="gpt-3.5-turbo", text="Hello, world! <|endoftext|>") + token_counter(model="gpt-3.5-turbo", text="Hello, world! <|endoftext|>") def test_token_counter():