Fix token_counter with special token input

This commit is contained in:
Yikai Zhao
2025-08-07 22:58:07 +08:00
parent dfada882f1
commit 4fdeff8e1a
2 changed files with 2 additions and 1 deletions
+1 -1
View File
@@ -529,7 +529,7 @@ def _get_count_function(
encoding = tiktoken.get_encoding("cl100k_base")
def count_tokens(text: str) -> int:
return len(encoding.encode(text))
return len(encoding.encode(text, disallowed_special=()))
else:
raise ValueError("Unsupported tokenizer type")
@@ -451,6 +451,7 @@ def test_img_url_token_counter(img_url):
def test_token_encode_disallowed_special():
encode(model="gpt-3.5-turbo", text="Hello, world! <|endoftext|>")
token_counter(model="gpt-3.5-turbo", text="Hello, world! <|endoftext|>")
def test_token_counter():