From c85f9eaa9bcb8ce0cd362987dcb449c7bed95b9f Mon Sep 17 00:00:00 2001 From: tiennm99 Date: Thu, 18 Jun 2026 10:07:16 +0700 Subject: [PATCH] perf(templates): tune litellm defaults for lower RAM/CPU - num_workers 8 -> 2 - run_gunicorn with max_requests_before_restart 1000 - log_raw_request_response false - store_prompts_in_spend_logs false - num_retries 3 -> 1, request_timeout 600 -> 120 - healthcheck interval 5s -> 30s with start_period, urllib instead of requests --- templates/compose/litellm.yaml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/templates/compose/litellm.yaml b/templates/compose/litellm.yaml index 9fd0a234b..43a54b859 100644 --- a/templates/compose/litellm.yaml +++ b/templates/compose/litellm.yaml @@ -40,6 +40,7 @@ services: content: | general_settings: proxy_batch_write_at: 60 + store_prompts_in_spend_logs: false router_settings: redis_host: os.environ/REDIS_HOST @@ -50,15 +51,15 @@ services: litellm_settings: set_verbose: false json_logs: true - log_raw_request_response: true + log_raw_request_response: false # turn_off_message_logging: false # redact_user_api_key_info: false service_callback: ["prometheus_system"] drop_params: true # max_budget: 100 # budget_duration: 30d - num_retries: 3 - request_timeout: 600 + num_retries: 1 + request_timeout: 120 telemetry: false cache: true cache_params: @@ -131,17 +132,21 @@ services: - CMD - python - "-c" - - "import requests as r;r.get('http://127.0.0.1:4000/health/liveliness').raise_for_status()" - interval: 5s + - "import urllib.request;urllib.request.urlopen('http://127.0.0.1:4000/health/liveliness',timeout=5)" + interval: 30s timeout: 5s retries: 3 + start_period: 30s command: - "--config" - /app/config.yaml - "--port" - "4000" - "--num_workers" - - "8" + - "2" + - "--run_gunicorn" + - "--max_requests_before_restart" + - "1000" postgres: image: "postgres:16-alpine" environment: