From c85f9eaa9bcb8ce0cd362987dcb449c7bed95b9f Mon Sep 17 00:00:00 2001
From: tiennm99 <tiennm99@outlook.com>
Date: Thu, 18 Jun 2026 10:07:16 +0700
Subject: [PATCH] perf(templates): tune litellm defaults for lower RAM/CPU

- num_workers 8 -> 2
- run_gunicorn with max_requests_before_restart 1000
- log_raw_request_response false
- store_prompts_in_spend_logs false
- num_retries 3 -> 1, request_timeout 600 -> 120
- healthcheck interval 5s -> 30s with start_period, urllib instead of requests
---
 templates/compose/litellm.yaml | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/templates/compose/litellm.yaml b/templates/compose/litellm.yaml
index 9fd0a234b..43a54b859 100644
--- a/templates/compose/litellm.yaml
+++ b/templates/compose/litellm.yaml
@@ -40,6 +40,7 @@ services:
         content: |
           general_settings:
             proxy_batch_write_at: 60
+            store_prompts_in_spend_logs: false
 
           router_settings:
             redis_host: os.environ/REDIS_HOST
@@ -50,15 +51,15 @@ services:
           litellm_settings:
             set_verbose: false
             json_logs: true
-            log_raw_request_response: true
+            log_raw_request_response: false
             # turn_off_message_logging: false
             # redact_user_api_key_info: false
             service_callback: ["prometheus_system"]
             drop_params: true
             # max_budget: 100
             # budget_duration: 30d
-            num_retries: 3
-            request_timeout: 600
+            num_retries: 1
+            request_timeout: 120
             telemetry: false
             cache: true
             cache_params:
@@ -131,17 +132,21 @@ services:
         - CMD
         - python
         - "-c"
-        - "import requests as r;r.get('http://127.0.0.1:4000/health/liveliness').raise_for_status()"
-      interval: 5s
+        - "import urllib.request;urllib.request.urlopen('http://127.0.0.1:4000/health/liveliness',timeout=5)"
+      interval: 30s
       timeout: 5s
       retries: 3
+      start_period: 30s
     command:
       - "--config"
       - /app/config.yaml
       - "--port"
       - "4000"
       - "--num_workers"
-      - "8"
+      - "2"
+      - "--run_gunicorn"
+      - "--max_requests_before_restart"
+      - "1000"
   postgres:
     image: "postgres:16-alpine"
     environment: