mirror of
https://github.com/tiennm99/coolify.git
synced 2026-05-12 18:57:54 +00:00
468 lines
17 KiB
YAML
468 lines
17 KiB
YAML
# documentation: https://signoz.io/docs/introduction/
|
|
# slogan: An observability platform native to OpenTelemetry with logs, traces and metrics.
|
|
# tags: telemetry, server, applications, interface, logs, monitoring, traces, metrics
|
|
# logo: svgs/signoz.svg
|
|
# port: 8080
|
|
|
|
services:
|
|
init-clickhouse:
|
|
image: clickhouse/clickhouse-server:24.1.2-alpine
|
|
container_name: signoz-init-clickhouse
|
|
command:
|
|
- bash
|
|
- -c
|
|
- |
|
|
version="v0.0.1"
|
|
node_os=$$(uname -s | tr '[:upper:]' '[:lower:]')
|
|
node_arch=$$(uname -m | sed s/aarch64/arm64/ | sed s/x86_64/amd64/)
|
|
echo "Fetching histogram-binary for $${node_os}/$${node_arch}"
|
|
cd /tmp
|
|
wget -O histogram-quantile.tar.gz "https://github.com/SigNoz/signoz/releases/download/histogram-quantile%2F$${version}/histogram-quantile_$${node_os}_$${node_arch}.tar.gz"
|
|
tar -xvzf histogram-quantile.tar.gz
|
|
mkdir -p /var/lib/clickhouse/user_scripts/histogramQuantile
|
|
mv histogram-quantile /var/lib/clickhouse/user_scripts/histogramQuantile
|
|
restart: on-failure
|
|
logging:
|
|
options:
|
|
max-size: 50m
|
|
max-file: "3"
|
|
|
|
zookeeper-1:
|
|
image: bitnami/zookeeper:3.7.1
|
|
container_name: signoz-zookeeper-1
|
|
user: root
|
|
healthcheck:
|
|
test:
|
|
- CMD-SHELL
|
|
- curl -s -m 2 http://localhost:8080/commands/ruok | grep error | grep null
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
restart: unless-stopped
|
|
logging:
|
|
options:
|
|
max-size: 50m
|
|
max-file: "3"
|
|
volumes:
|
|
- zookeeper-1:/bitnami/zookeeper
|
|
environment:
|
|
- ZOO_SERVER_ID=1
|
|
- ALLOW_ANONYMOUS_LOGIN=yes
|
|
- ZOO_AUTOPURGE_INTERVAL=1
|
|
- ZOO_ENABLE_PROMETHEUS_METRICS=yes
|
|
- ZOO_PROMETHEUS_METRICS_PORT_NUMBER=9141
|
|
|
|
clickhouse:
|
|
# addding non LTS version due to this fix https://github.com/ClickHouse/ClickHouse/commit/32caf8716352f45c1b617274c7508c86b7d1afab
|
|
image: clickhouse/clickhouse-server:24.1.2-alpine
|
|
container_name: signoz-clickhouse
|
|
tty: true
|
|
depends_on:
|
|
init-clickhouse:
|
|
condition: service_completed_successfully
|
|
zookeeper-1:
|
|
condition: service_healthy
|
|
healthcheck:
|
|
test:
|
|
- CMD
|
|
- wget
|
|
- --spider
|
|
- -q
|
|
- 0.0.0.0:8123/ping
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
ulimits:
|
|
nproc: 65535
|
|
nofile:
|
|
soft: 262144
|
|
hard: 262144
|
|
restart: unless-stopped
|
|
logging:
|
|
options:
|
|
max-size: 50m
|
|
max-file: "3"
|
|
volumes:
|
|
- type: bind
|
|
source: ./clickhouse/config.d/config.xml
|
|
target: /etc/clickhouse-server/config.d/config.xml
|
|
content: |
|
|
<clickhouse>
|
|
<logger>
|
|
<level>information</level>
|
|
<formatting>
|
|
<type>json</type>
|
|
</formatting>
|
|
</logger>
|
|
<macros>
|
|
<shard>01</shard>
|
|
<replica>example01-01-1</replica>
|
|
</macros>
|
|
<user_defined_executable_functions_config>*function.xml</user_defined_executable_functions_config>
|
|
</clickhouse>
|
|
- type: bind
|
|
source: ./clickhouse/custom-function.xml
|
|
target: /etc/clickhouse-server/custom-function.xml
|
|
content: |
|
|
<functions>
|
|
<function>
|
|
<type>executable</type>
|
|
<name>histogramQuantile</name>
|
|
<return_type>Float64</return_type>
|
|
<argument>
|
|
<type>Array(Float64)</type>
|
|
<name>buckets</name>
|
|
</argument>
|
|
<argument>
|
|
<type>Array(Float64)</type>
|
|
<name>counts</name>
|
|
</argument>
|
|
<argument>
|
|
<type>Float64</type>
|
|
<name>quantile</name>
|
|
</argument>
|
|
<format>CSV</format>
|
|
<command>./histogramQuantile</command>
|
|
</function>
|
|
</functions>
|
|
- type: bind
|
|
source: ./clickhouse/cluster.xml
|
|
target: /etc/clickhouse-server/config.d/cluster.xml
|
|
content: |
|
|
<?xml version="1.0"?>
|
|
<clickhouse>
|
|
<!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
|
|
Optional. If you don't use replicated tables, you could omit that.
|
|
|
|
See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/
|
|
-->
|
|
<zookeeper>
|
|
<node index="1">
|
|
<host>zookeeper-1</host>
|
|
<port>2181</port>
|
|
</node>
|
|
<!-- <node index="2">
|
|
<host>zookeeper-2</host>
|
|
<port>2181</port>
|
|
</node>
|
|
<node index="3">
|
|
<host>zookeeper-3</host>
|
|
<port>2181</port>
|
|
</node> -->
|
|
</zookeeper>
|
|
|
|
<!-- Configuration of clusters that could be used in Distributed tables.
|
|
https://clickhouse.com/docs/en/operations/table_engines/distributed/
|
|
-->
|
|
<remote_servers>
|
|
<cluster>
|
|
<!-- Inter-server per-cluster secret for Distributed queries
|
|
default: no secret (no authentication will be performed)
|
|
|
|
If set, then Distributed queries will be validated on shards, so at least:
|
|
- such cluster should exist on the shard,
|
|
- such cluster should have the same secret.
|
|
|
|
And also (and which is more important), the initial_user will
|
|
be used as current user for the query.
|
|
|
|
Right now the protocol is pretty simple and it only takes into account:
|
|
- cluster name
|
|
- query
|
|
|
|
Also it will be nice if the following will be implemented:
|
|
- source hostname (see interserver_http_host), but then it will depends from DNS,
|
|
it can use IP address instead, but then the you need to get correct on the initiator node.
|
|
- target hostname / ip address (same notes as for source hostname)
|
|
- time-based security tokens
|
|
-->
|
|
<!-- <secret></secret> -->
|
|
<shard>
|
|
<!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
|
|
<!-- <internal_replication>false</internal_replication> -->
|
|
<!-- Optional. Shard weight when writing data. Default: 1. -->
|
|
<!-- <weight>1</weight> -->
|
|
<replica>
|
|
<host>clickhouse</host>
|
|
<port>9000</port>
|
|
<!-- Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority). -->
|
|
<!-- <priority>1</priority> -->
|
|
</replica>
|
|
</shard>
|
|
<!-- <shard>
|
|
<replica>
|
|
<host>clickhouse-2</host>
|
|
<port>9000</port>
|
|
</replica>
|
|
</shard>
|
|
<shard>
|
|
<replica>
|
|
<host>clickhouse-3</host>
|
|
<port>9000</port>
|
|
</replica>
|
|
</shard> -->
|
|
</cluster>
|
|
</remote_servers>
|
|
</clickhouse>
|
|
- type: volume
|
|
source: clickhouse
|
|
target: /var/lib/clickhouse/
|
|
|
|
signoz:
|
|
image: signoz/signoz:latest
|
|
container_name: signoz
|
|
depends_on:
|
|
clickhouse:
|
|
condition: service_healthy
|
|
schema-migrator-sync:
|
|
condition: service_completed_successfully
|
|
restart: unless-stopped
|
|
logging:
|
|
options:
|
|
max-size: 50m
|
|
max-file: "3"
|
|
command:
|
|
- --config=/root/config/prometheus.yml
|
|
volumes:
|
|
- type: bind
|
|
source: ./prometheus.yml
|
|
target: /root/config/prometheus.yml
|
|
content: |
|
|
# my global config
|
|
global:
|
|
scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
|
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
|
# scrape_timeout is set to the global default (10s).
|
|
|
|
# Alertmanager configuration
|
|
alerting:
|
|
alertmanagers:
|
|
- static_configs:
|
|
- targets:
|
|
- alertmanager:9093
|
|
|
|
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
|
rule_files: []
|
|
# - "first_rules.yml"
|
|
# - "second_rules.yml"
|
|
# - 'alerts.yml'
|
|
|
|
# A scrape configuration containing exactly one endpoint to scrape:
|
|
# Here it's Prometheus itself.
|
|
scrape_configs: []
|
|
|
|
remote_read:
|
|
- url: tcp://clickhouse:9000/signoz_metrics
|
|
- type: volume
|
|
source: sqlite
|
|
target: /var/lib/signoz/
|
|
environment:
|
|
- SERVICE_FQDN_SIGNOZ_8080
|
|
- SIGNOZ_ALERTMANAGER_PROVIDER=signoz
|
|
- SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN=tcp://clickhouse:9000
|
|
- SIGNOZ_SQLSTORE_SQLITE_PATH=/var/lib/signoz/signoz.db
|
|
- DASHBOARDS_PATH=/root/config/dashboards
|
|
- STORAGE=clickhouse
|
|
- GODEBUG=netdns=go
|
|
- DEPLOYMENT_TYPE=docker-standalone-amd
|
|
- TELEMETRY_ENABLED=${TELEMETRY_ENABLED:-true}
|
|
- SMTP_ENABLED=${SMTP_ENABLED:-false}
|
|
- SMTP_FROM=${SMTP_FROM}
|
|
- SMTP_HOST=${SMTP_HOST}
|
|
- SMTP_PORT=${SMTP_PORT}
|
|
- SMTP_USERNAME=${SMTP_USERNAME}
|
|
- SMTP_PASSWORD=${SMTP_PASSWORD}
|
|
- SIGNOZ_ALERTMANAGER_SIGNOZ_GLOBAL_SMTP__AUTH__PASSWORD=${SIGNOZ_ALERTMANAGER_SIGNOZ_GLOBAL_SMTP__AUTH__PASSWORD}
|
|
- SIGNOZ_ALERTMANAGER_SIGNOZ_GLOBAL_SMTP__AUTH__USERNAME=${SIGNOZ_ALERTMANAGER_SIGNOZ_GLOBAL_SMTP__AUTH__USERNAME}
|
|
- SIGNOZ_ALERTMANAGER_SIGNOZ_GLOBAL_SMTP__FROM=${SIGNOZ_ALERTMANAGER_SIGNOZ_GLOBAL_SMTP__FROM}
|
|
- SIGNOZ_ALERTMANAGER_SIGNOZ_GLOBAL_SMTP__SMARTHOST=${SIGNOZ_ALERTMANAGER_SIGNOZ_GLOBAL_SMTP__SMARTHOST}
|
|
healthcheck:
|
|
test:
|
|
- CMD
|
|
- wget
|
|
- --spider
|
|
- -q
|
|
- localhost:8080/api/v1/health
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
|
|
otel-collector:
|
|
image: signoz/signoz-otel-collector:latest
|
|
container_name: signoz-otel-collector
|
|
depends_on:
|
|
clickhouse:
|
|
condition: service_healthy
|
|
schema-migrator-sync:
|
|
condition: service_completed_successfully
|
|
signoz:
|
|
condition: service_healthy
|
|
restart: unless-stopped
|
|
logging:
|
|
options:
|
|
max-size: 50m
|
|
max-file: "3"
|
|
ports:
|
|
- "4317:4317" # OTLP gRPC receiver
|
|
- "4318:4318" # OTLP HTTP receiver
|
|
command:
|
|
- --config=/etc/otel-collector-config.yaml
|
|
- --manager-config=/etc/manager-config.yaml
|
|
- --copy-path=/var/tmp/collector-config.yaml
|
|
- --feature-gates=-pkg.translator.prometheus.NormalizeName
|
|
volumes:
|
|
- type: bind
|
|
source: ./otel-collector-config.yaml
|
|
target: /etc/otel-collector-config.yaml
|
|
content: |
|
|
receivers:
|
|
otlp:
|
|
protocols:
|
|
grpc:
|
|
endpoint: 0.0.0.0:4317
|
|
http:
|
|
endpoint: 0.0.0.0:4318
|
|
prometheus:
|
|
config:
|
|
global:
|
|
scrape_interval: 60s
|
|
scrape_configs:
|
|
- job_name: otel-collector
|
|
static_configs:
|
|
- targets:
|
|
- localhost:8888
|
|
labels:
|
|
job_name: otel-collector
|
|
processors:
|
|
batch:
|
|
send_batch_size: 10000
|
|
send_batch_max_size: 11000
|
|
timeout: 10s
|
|
resourcedetection:
|
|
# Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels.
|
|
detectors: [env, system]
|
|
timeout: 2s
|
|
signozspanmetrics/delta:
|
|
metrics_exporter: clickhousemetricswrite
|
|
metrics_flush_interval: 60s
|
|
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
|
|
dimensions_cache_size: 100000
|
|
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
|
|
enable_exp_histogram: true
|
|
dimensions:
|
|
- name: service.namespace
|
|
default: default
|
|
- name: deployment.environment
|
|
default: default
|
|
# This is added to ensure the uniqueness of the timeseries
|
|
# Otherwise, identical timeseries produced by multiple replicas of
|
|
# collectors result in incorrect APM metrics
|
|
- name: signoz.collector.id
|
|
- name: service.version
|
|
- name: browser.platform
|
|
- name: browser.mobile
|
|
- name: k8s.cluster.name
|
|
- name: k8s.node.name
|
|
- name: k8s.namespace.name
|
|
- name: host.name
|
|
- name: host.type
|
|
- name: container.name
|
|
extensions:
|
|
health_check:
|
|
endpoint: 0.0.0.0:13133
|
|
pprof:
|
|
endpoint: 0.0.0.0:1777
|
|
exporters:
|
|
clickhousetraces:
|
|
datasource: tcp://clickhouse:9000/signoz_traces
|
|
low_cardinal_exception_grouping: ${env:LOW_CARDINAL_EXCEPTION_GROUPING}
|
|
use_new_schema: true
|
|
clickhousemetricswrite:
|
|
endpoint: tcp://clickhouse:9000/signoz_metrics
|
|
resource_to_telemetry_conversion:
|
|
enabled: true
|
|
clickhousemetricswrite/prometheus:
|
|
endpoint: tcp://clickhouse:9000/signoz_metrics
|
|
signozclickhousemetrics:
|
|
dsn: tcp://clickhouse:9000/signoz_metrics
|
|
clickhouselogsexporter:
|
|
dsn: tcp://clickhouse:9000/signoz_logs
|
|
timeout: 10s
|
|
use_new_schema: true
|
|
# debug: {}
|
|
service:
|
|
telemetry:
|
|
logs:
|
|
encoding: json
|
|
metrics:
|
|
address: 0.0.0.0:8888
|
|
extensions:
|
|
- health_check
|
|
- pprof
|
|
pipelines:
|
|
traces:
|
|
receivers: [otlp]
|
|
processors: [signozspanmetrics/delta, batch]
|
|
exporters: [clickhousetraces]
|
|
metrics:
|
|
receivers: [otlp]
|
|
processors: [batch]
|
|
exporters: [clickhousemetricswrite, signozclickhousemetrics]
|
|
metrics/prometheus:
|
|
receivers: [prometheus]
|
|
processors: [batch]
|
|
exporters: [clickhousemetricswrite/prometheus, signozclickhousemetrics]
|
|
logs:
|
|
receivers: [otlp]
|
|
processors: [batch]
|
|
exporters: [clickhouselogsexporter]
|
|
- type: bind
|
|
source: ./otel-collector-opamp-config.yaml
|
|
target: /etc/manager-config.yaml
|
|
content: |
|
|
server_endpoint: ws://signoz:4320/v1/opamp
|
|
environment:
|
|
- SERVICE_FQDN_OTELCOLLECTORGRPC_4317
|
|
- SERVICE_FQDN_OTELCOLLECTORHTTP_4318
|
|
- OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux
|
|
- LOW_CARDINAL_EXCEPTION_GROUPING=false
|
|
healthcheck:
|
|
test: bash -c "exec 6<> /dev/tcp/localhost/13133"
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
|
|
schema-migrator-sync:
|
|
image: signoz/signoz-schema-migrator:latest
|
|
container_name: schema-migrator-sync
|
|
command:
|
|
- sync
|
|
- --dsn=tcp://clickhouse:9000
|
|
- --up=
|
|
depends_on:
|
|
clickhouse:
|
|
condition: service_healthy
|
|
restart: on-failure
|
|
logging:
|
|
options:
|
|
max-size: 50m
|
|
max-file: "3"
|
|
|
|
schema-migrator-async:
|
|
image: signoz/signoz-schema-migrator:latest
|
|
container_name: schema-migrator-async
|
|
depends_on:
|
|
clickhouse:
|
|
condition: service_healthy
|
|
schema-migrator-sync:
|
|
condition: service_completed_successfully
|
|
logging:
|
|
options:
|
|
max-size: 50m
|
|
max-file: "3"
|
|
command:
|
|
- async
|
|
- --dsn=tcp://clickhouse:9000
|
|
- --up=
|
|
restart: on-failure
|