From 88d498a54a1bbff5b23d718dc1ef6686ff46400a Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 9 Jan 2024 09:47:18 +0530
Subject: [PATCH] fix(ollama.py): use tiktoken as backup for prompt token
 counting

---
 litellm/llms/ollama.py                               |  4 ++--
 .../tests/example_config_yaml/aliases_config.yaml    |  2 +-
 litellm/tests/test_provider_specific_config.py       |  6 +++---
 litellm/tests/test_text_completion.py                | 12 ++++++------
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py
index 81e16a1a69..0839975dba 100644
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@@ -217,7 +217,7 @@ def get_ollama_response(
         model_response["choices"][0]["message"]["content"] = response_json["response"]
     model_response["created"] = int(time.time())
     model_response["model"] = "ollama/" + model
-    prompt_tokens = response_json["prompt_eval_count"]  # type: ignore
+    prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt)))  # type: ignore
     completion_tokens = response_json["eval_count"]
     model_response["usage"] = litellm.Usage(
         prompt_tokens=prompt_tokens,
@@ -318,7 +318,7 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
                 ]
             model_response["created"] = int(time.time())
             model_response["model"] = "ollama/" + data["model"]
-            prompt_tokens = response_json["prompt_eval_count"]  # type: ignore
+            prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"])))  # type: ignore
             completion_tokens = response_json["eval_count"]
             model_response["usage"] = litellm.Usage(
                 prompt_tokens=prompt_tokens,
diff --git a/litellm/tests/example_config_yaml/aliases_config.yaml b/litellm/tests/example_config_yaml/aliases_config.yaml
index 266f6cf22e..43681f64ba 100644
--- a/litellm/tests/example_config_yaml/aliases_config.yaml
+++ b/litellm/tests/example_config_yaml/aliases_config.yaml
@@ -1,5 +1,5 @@
 model_list:
-  - model_name: text-davinci-003
+  - model_name: gpt-3.5-turbo-instruct
     litellm_params:
         model: ollama/zephyr
   - model_name: gpt-4
diff --git a/litellm/tests/test_provider_specific_config.py b/litellm/tests/test_provider_specific_config.py
index 55986ff70e..6c0edf02bd 100644
--- a/litellm/tests/test_provider_specific_config.py
+++ b/litellm/tests/test_provider_specific_config.py
@@ -602,7 +602,7 @@ def openai_text_completion_test():
     try:
         # OVERRIDE WITH DYNAMIC MAX TOKENS
         response_1 = litellm.completion(
-            model="text-davinci-003",
+            model="gpt-3.5-turbo-instruct",
             messages=[
                 {
                     "content": "Hello, how are you? Be as verbose as possible",
@@ -616,7 +616,7 @@ def openai_text_completion_test():
 
         # USE CONFIG TOKENS
         response_2 = litellm.completion(
-            model="text-davinci-003",
+            model="gpt-3.5-turbo-instruct",
             messages=[
                 {
                     "content": "Hello, how are you? Be as verbose as possible",
@@ -630,7 +630,7 @@ def openai_text_completion_test():
         assert len(response_2_text) < len(response_1_text)
 
         response_3 = litellm.completion(
-            model="text-davinci-003",
+            model="gpt-3.5-turbo-instruct",
             messages=[{"content": "Hello, how are you?", "role": "user"}],
             n=2,
         )
diff --git a/litellm/tests/test_text_completion.py b/litellm/tests/test_text_completion.py
index 2aef20322f..254b75399a 100644
--- a/litellm/tests/test_text_completion.py
+++ b/litellm/tests/test_text_completion.py
@@ -2682,7 +2682,7 @@ def test_completion_openai_prompt():
     try:
         print("\n text 003 test\n")
         response = text_completion(
-            model="text-davinci-003", prompt="What's the weather in SF?"
+            model="gpt-3.5-turbo-instruct", prompt="What's the weather in SF?"
         )
         print(response)
         response_str = response["choices"][0]["text"]
@@ -2700,7 +2700,7 @@ def test_completion_openai_engine_and_model():
         print("\n text 003 test\n")
         litellm.set_verbose = True
         response = text_completion(
-            model="text-davinci-003",
+            model="gpt-3.5-turbo-instruct",
             engine="anything",
             prompt="What's the weather in SF?",
             max_tokens=5,
@@ -2721,7 +2721,7 @@ def test_completion_openai_engine():
         print("\n text 003 test\n")
         litellm.set_verbose = True
         response = text_completion(
-            engine="text-davinci-003", prompt="What's the weather in SF?", max_tokens=5
+            engine="gpt-3.5-turbo-instruct", prompt="What's the weather in SF?", max_tokens=5
         )
         print(response)
         response_str = response["choices"][0]["text"]
@@ -2757,7 +2757,7 @@ def test_text_completion_basic():
         print("\n test 003 with echo and logprobs \n")
         litellm.set_verbose = False
         response = text_completion(
-            model="text-davinci-003",
+            model="gpt-3.5-turbo-instruct",
             prompt="good morning",
             max_tokens=10,
             logprobs=10,
@@ -2779,7 +2779,7 @@ def test_completion_text_003_prompt_array():
     try:
         litellm.set_verbose = False
         response = text_completion(
-            model="text-davinci-003",
+            model="gpt-3.5-turbo-instruct",
             prompt=token_prompt,  # token prompt is a 2d list
         )
         print("\n\n response")
@@ -2857,7 +2857,7 @@ def test_text_completion_stream():
 # async def test_text_completion_async_stream():
 #     try:
 #         response = await atext_completion(
-#                 model="text-completion-openai/text-davinci-003",
+#                 model="text-completion-openai/gpt-3.5-turbo-instruct",
 #                 prompt="good morning",
 #                 stream=True,
 #                 max_tokens=10,