mirror of
https://github.com/tiennm99/coolify.git
synced 2026-04-17 19:21:36 +00:00
fix: correct Sentinel default health status and remove debug logging
This commit addresses container status reporting issues and removes debug logging: **Primary Fix:** - Changed PushServerUpdateJob to default to 'unknown' instead of 'unhealthy' when health_status field is missing from Sentinel data - This ensures containers WITHOUT healthcheck defined are correctly reported as "unknown" not "unhealthy" - Matches SSH path behavior (GetContainersStatus) which already defaulted to 'unknown' **Service Multi-Container Aggregation:** - Implemented service container status aggregation (same pattern as applications) - Added serviceContainerStatuses collection to both Sentinel and SSH paths - Services now aggregate status using priority: unhealthy > unknown > healthy - Prevents race conditions where last-processed container would win **Debug Logging Cleanup:** - Removed all [STATUS-DEBUG] logging statements (25 total) - Removed all ray() debugging calls (3 total) - Removed proof_unknown_preserved and health_status_was_null debug fields - Code is now production-ready **Test Coverage:** - Added 2 new tests for Sentinel default health status behavior - Added 5 new tests for service aggregation in SSH path - All 16 tests pass (66 assertions) **Note:** The root cause was identified as Sentinel (Go binary) also defaulting to "unhealthy". That will need a separate fix in the Sentinel codebase. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -21,7 +21,6 @@ use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\Middleware\WithoutOverlapping;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Support\Collection;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Laravel\Horizon\Contracts\Silenced;
|
||||
|
||||
class PushServerUpdateJob implements ShouldBeEncrypted, ShouldQueue, Silenced
|
||||
@@ -68,6 +67,8 @@ class PushServerUpdateJob implements ShouldBeEncrypted, ShouldQueue, Silenced
|
||||
|
||||
public Collection $applicationContainerStatuses;
|
||||
|
||||
public Collection $serviceContainerStatuses;
|
||||
|
||||
public bool $foundProxy = false;
|
||||
|
||||
public bool $foundLogDrainContainer = false;
|
||||
@@ -91,6 +92,7 @@ class PushServerUpdateJob implements ShouldBeEncrypted, ShouldQueue, Silenced
|
||||
$this->foundApplicationPreviewsIds = collect();
|
||||
$this->foundServiceDatabaseIds = collect();
|
||||
$this->applicationContainerStatuses = collect();
|
||||
$this->serviceContainerStatuses = collect();
|
||||
$this->allApplicationIds = collect();
|
||||
$this->allDatabaseUuids = collect();
|
||||
$this->allTcpProxyUuids = collect();
|
||||
@@ -109,14 +111,6 @@ class PushServerUpdateJob implements ShouldBeEncrypted, ShouldQueue, Silenced
|
||||
$this->server->sentinelHeartbeat();
|
||||
|
||||
$this->containers = collect(data_get($data, 'containers'));
|
||||
|
||||
Log::debug('[STATUS-DEBUG] Raw Sentinel data received', [
|
||||
'source' => 'PushServerUpdateJob',
|
||||
'container_count' => $this->containers->count(),
|
||||
'containers' => $this->containers->toArray(),
|
||||
]);
|
||||
ray('Raw Sentinel containers:', $this->containers->toArray());
|
||||
|
||||
$filesystemUsageRoot = data_get($data, 'filesystem_usage_root.used_percentage');
|
||||
ServerStorageCheckJob::dispatch($this->server, $filesystemUsageRoot);
|
||||
|
||||
@@ -149,25 +143,13 @@ class PushServerUpdateJob implements ShouldBeEncrypted, ShouldQueue, Silenced
|
||||
|
||||
foreach ($this->containers as $container) {
|
||||
$containerStatus = data_get($container, 'state', 'exited');
|
||||
$containerHealth = data_get($container, 'health_status', 'unhealthy');
|
||||
$rawHealthStatus = data_get($container, 'health_status');
|
||||
$containerHealth = $rawHealthStatus ?? 'unknown';
|
||||
$containerStatus = "$containerStatus ($containerHealth)";
|
||||
$labels = collect(data_get($container, 'labels'));
|
||||
$coolify_managed = $labels->has('coolify.managed');
|
||||
|
||||
Log::debug('[STATUS-DEBUG] Processing container from Sentinel', [
|
||||
'source' => 'PushServerUpdateJob (loop)',
|
||||
'container_name' => data_get($container, 'name'),
|
||||
'container_status' => $containerStatus,
|
||||
'labels' => $labels->toArray(),
|
||||
'has_coolify_managed' => $coolify_managed,
|
||||
]);
|
||||
|
||||
if (! $coolify_managed) {
|
||||
Log::debug('[STATUS-DEBUG] Container skipped - not coolify managed', [
|
||||
'source' => 'PushServerUpdateJob',
|
||||
'container_name' => data_get($container, 'name'),
|
||||
]);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -191,19 +173,6 @@ class PushServerUpdateJob implements ShouldBeEncrypted, ShouldQueue, Silenced
|
||||
$containerName = $labels->get('com.docker.compose.service');
|
||||
if ($containerName) {
|
||||
$this->applicationContainerStatuses->get($applicationId)->put($containerName, $containerStatus);
|
||||
Log::debug('[STATUS-DEBUG] Container added to applicationContainerStatuses', [
|
||||
'source' => 'PushServerUpdateJob',
|
||||
'application_id' => $applicationId,
|
||||
'container_name' => $containerName,
|
||||
'container_status' => $containerStatus,
|
||||
]);
|
||||
} else {
|
||||
Log::debug('[STATUS-DEBUG] Container skipped - no com.docker.compose.service label', [
|
||||
'source' => 'PushServerUpdateJob',
|
||||
'container_name' => data_get($container, 'name'),
|
||||
'application_id' => $applicationId,
|
||||
'labels' => $labels->toArray(),
|
||||
]);
|
||||
}
|
||||
} else {
|
||||
$previewKey = $applicationId.':'.$pullRequestId;
|
||||
@@ -218,12 +187,32 @@ class PushServerUpdateJob implements ShouldBeEncrypted, ShouldQueue, Silenced
|
||||
$serviceId = $labels->get('coolify.serviceId');
|
||||
$subType = $labels->get('coolify.service.subType');
|
||||
$subId = $labels->get('coolify.service.subId');
|
||||
if ($subType === 'application' && $this->isRunning($containerStatus)) {
|
||||
$this->foundServiceApplicationIds->push($subId);
|
||||
$this->updateServiceSubStatus($serviceId, $subType, $subId, $containerStatus);
|
||||
} elseif ($subType === 'database' && $this->isRunning($containerStatus)) {
|
||||
$this->foundServiceDatabaseIds->push($subId);
|
||||
$this->updateServiceSubStatus($serviceId, $subType, $subId, $containerStatus);
|
||||
if ($subType === 'application') {
|
||||
if ($this->isRunning($containerStatus)) {
|
||||
$this->foundServiceApplicationIds->push($subId);
|
||||
}
|
||||
// Store container status for aggregation
|
||||
$key = $serviceId.':'.$subType.':'.$subId;
|
||||
if (! $this->serviceContainerStatuses->has($key)) {
|
||||
$this->serviceContainerStatuses->put($key, collect());
|
||||
}
|
||||
$containerName = $labels->get('com.docker.compose.service');
|
||||
if ($containerName) {
|
||||
$this->serviceContainerStatuses->get($key)->put($containerName, $containerStatus);
|
||||
}
|
||||
} elseif ($subType === 'database') {
|
||||
if ($this->isRunning($containerStatus)) {
|
||||
$this->foundServiceDatabaseIds->push($subId);
|
||||
}
|
||||
// Store container status for aggregation
|
||||
$key = $serviceId.':'.$subType.':'.$subId;
|
||||
if (! $this->serviceContainerStatuses->has($key)) {
|
||||
$this->serviceContainerStatuses->put($key, collect());
|
||||
}
|
||||
$containerName = $labels->get('com.docker.compose.service');
|
||||
if ($containerName) {
|
||||
$this->serviceContainerStatuses->get($key)->put($containerName, $containerStatus);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$uuid = $labels->get('com.docker.compose.service');
|
||||
@@ -257,27 +246,20 @@ class PushServerUpdateJob implements ShouldBeEncrypted, ShouldQueue, Silenced
|
||||
// Aggregate multi-container application statuses
|
||||
$this->aggregateMultiContainerStatuses();
|
||||
|
||||
// Aggregate multi-container service statuses
|
||||
$this->aggregateServiceContainerStatuses();
|
||||
|
||||
$this->checkLogDrainContainer();
|
||||
}
|
||||
|
||||
private function aggregateMultiContainerStatuses()
|
||||
{
|
||||
Log::debug('[STATUS-DEBUG] Starting aggregation of multi-container application statuses', [
|
||||
'source' => 'PushServerUpdateJob',
|
||||
]);
|
||||
ray('Starting aggregation of multi-container application statuses');
|
||||
ray($this->applicationContainerStatuses->toArray());
|
||||
if ($this->applicationContainerStatuses->isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
foreach ($this->applicationContainerStatuses as $applicationId => $containerStatuses) {
|
||||
$application = $this->applications->where('id', $applicationId)->first();
|
||||
Log::debug('[STATUS-DEBUG] Processing application for aggregation', [
|
||||
'source' => 'PushServerUpdateJob',
|
||||
'app_id' => $applicationId,
|
||||
'container_statuses' => $containerStatuses->toArray(),
|
||||
]);
|
||||
if (! $application) {
|
||||
continue;
|
||||
}
|
||||
@@ -345,19 +327,7 @@ class PushServerUpdateJob implements ShouldBeEncrypted, ShouldQueue, Silenced
|
||||
// All containers are exited
|
||||
$aggregatedStatus = 'exited (unhealthy)';
|
||||
}
|
||||
Log::debug('[STATUS-DEBUG] Sentinel status change', [
|
||||
'source' => 'PushServerUpdateJob',
|
||||
'app_id' => $application->id,
|
||||
'app_name' => $application->name,
|
||||
'old_status' => $application->status,
|
||||
'new_status' => $aggregatedStatus,
|
||||
'container_statuses' => $relevantStatuses->toArray(),
|
||||
'flags' => [
|
||||
'hasRunning' => $hasRunning,
|
||||
'hasUnhealthy' => $hasUnhealthy,
|
||||
'hasUnknown' => $hasUnknown,
|
||||
],
|
||||
]);
|
||||
|
||||
// Update application status with aggregated result
|
||||
if ($aggregatedStatus && $application->status !== $aggregatedStatus) {
|
||||
|
||||
@@ -367,6 +337,105 @@ class PushServerUpdateJob implements ShouldBeEncrypted, ShouldQueue, Silenced
|
||||
}
|
||||
}
|
||||
|
||||
private function aggregateServiceContainerStatuses()
|
||||
{
|
||||
if ($this->serviceContainerStatuses->isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
foreach ($this->serviceContainerStatuses as $key => $containerStatuses) {
|
||||
// Parse key: serviceId:subType:subId
|
||||
[$serviceId, $subType, $subId] = explode(':', $key);
|
||||
|
||||
$service = $this->services->where('id', $serviceId)->first();
|
||||
if (! $service) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the service sub-resource (ServiceApplication or ServiceDatabase)
|
||||
$subResource = null;
|
||||
if ($subType === 'application') {
|
||||
$subResource = $service->applications()->where('id', $subId)->first();
|
||||
} elseif ($subType === 'database') {
|
||||
$subResource = $service->databases()->where('id', $subId)->first();
|
||||
}
|
||||
|
||||
if (! $subResource) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse docker compose from service to check for excluded containers
|
||||
$dockerComposeRaw = data_get($service, 'docker_compose_raw');
|
||||
$excludedContainers = collect();
|
||||
|
||||
if ($dockerComposeRaw) {
|
||||
try {
|
||||
$dockerCompose = \Symfony\Component\Yaml\Yaml::parse($dockerComposeRaw);
|
||||
$services = data_get($dockerCompose, 'services', []);
|
||||
|
||||
foreach ($services as $serviceName => $serviceConfig) {
|
||||
// Check if container should be excluded
|
||||
$excludeFromHc = data_get($serviceConfig, 'exclude_from_hc', false);
|
||||
$restartPolicy = data_get($serviceConfig, 'restart', 'always');
|
||||
|
||||
if ($excludeFromHc || $restartPolicy === 'no') {
|
||||
$excludedContainers->push($serviceName);
|
||||
}
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
// If we can't parse, treat all containers as included
|
||||
}
|
||||
}
|
||||
|
||||
// Filter out excluded containers
|
||||
$relevantStatuses = $containerStatuses->filter(function ($status, $containerName) use ($excludedContainers) {
|
||||
return ! $excludedContainers->contains($containerName);
|
||||
});
|
||||
|
||||
// If all containers are excluded, don't update status
|
||||
if ($relevantStatuses->isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Aggregate status: if any container is running, service is running
|
||||
$hasRunning = false;
|
||||
$hasUnhealthy = false;
|
||||
$hasUnknown = false;
|
||||
|
||||
foreach ($relevantStatuses as $status) {
|
||||
if (str($status)->contains('running')) {
|
||||
$hasRunning = true;
|
||||
if (str($status)->contains('unhealthy')) {
|
||||
$hasUnhealthy = true;
|
||||
}
|
||||
if (str($status)->contains('unknown')) {
|
||||
$hasUnknown = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$aggregatedStatus = null;
|
||||
if ($hasRunning) {
|
||||
if ($hasUnhealthy) {
|
||||
$aggregatedStatus = 'running (unhealthy)';
|
||||
} elseif ($hasUnknown) {
|
||||
$aggregatedStatus = 'running (unknown)';
|
||||
} else {
|
||||
$aggregatedStatus = 'running (healthy)';
|
||||
}
|
||||
} else {
|
||||
// All containers are exited
|
||||
$aggregatedStatus = 'exited (unhealthy)';
|
||||
}
|
||||
|
||||
// Update service sub-resource status with aggregated result
|
||||
if ($aggregatedStatus && $subResource->status !== $aggregatedStatus) {
|
||||
$subResource->status = $aggregatedStatus;
|
||||
$subResource->save();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function updateApplicationStatus(string $applicationId, string $containerStatus)
|
||||
{
|
||||
$application = $this->applications->where('id', $applicationId)->first();
|
||||
|
||||
Reference in New Issue
Block a user