fix: correct status for excluded health check containers (#7283)

This commit is contained in:
Andras Bacsai
2025-11-21 09:17:26 +01:00
committed by GitHub
32 changed files with 3720 additions and 344 deletions

View File

@@ -8,6 +8,8 @@ use App\Events\ServiceChecked;
use App\Models\ApplicationPreview;
use App\Models\Server;
use App\Models\ServiceDatabase;
use App\Services\ContainerStatusAggregator;
use App\Traits\CalculatesExcludedStatus;
use Illuminate\Support\Arr;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\DB;
@@ -16,6 +18,7 @@ use Lorisleiva\Actions\Concerns\AsAction;
class GetContainersStatus
{
use AsAction;
use CalculatesExcludedStatus;
public string $jobQueue = 'high';
@@ -31,6 +34,8 @@ class GetContainersStatus
protected ?Collection $applicationContainerRestartCounts;
protected ?Collection $serviceContainerStatuses;
public function handle(Server $server, ?Collection $containers = null, ?Collection $containerReplicates = null)
{
$this->containers = $containers;
@@ -98,11 +103,13 @@ class GetContainersStatus
$labels = data_get($container, 'Config.Labels');
}
$containerStatus = data_get($container, 'State.Status');
$containerHealth = data_get($container, 'State.Health.Status', 'unhealthy');
$containerHealth = data_get($container, 'State.Health.Status');
if ($containerStatus === 'restarting') {
$containerStatus = "restarting ($containerHealth)";
$healthSuffix = $containerHealth ?? 'unknown';
$containerStatus = "restarting:$healthSuffix";
} else {
$containerStatus = "$containerStatus ($containerHealth)";
$healthSuffix = $containerHealth ?? 'unknown';
$containerStatus = "$containerStatus:$healthSuffix";
}
$labels = Arr::undot(format_docker_labels_to_json($labels));
$applicationId = data_get($labels, 'coolify.applicationId');
@@ -222,23 +229,34 @@ class GetContainersStatus
if ($serviceLabelId) {
$subType = data_get($labels, 'coolify.service.subType');
$subId = data_get($labels, 'coolify.service.subId');
$service = $services->where('id', $serviceLabelId)->first();
if (! $service) {
$parentService = $services->where('id', $serviceLabelId)->first();
if (! $parentService) {
continue;
}
// Store container status for aggregation
if (! isset($this->serviceContainerStatuses)) {
$this->serviceContainerStatuses = collect();
}
$key = $serviceLabelId.':'.$subType.':'.$subId;
if (! $this->serviceContainerStatuses->has($key)) {
$this->serviceContainerStatuses->put($key, collect());
}
$containerName = data_get($labels, 'com.docker.compose.service');
if ($containerName) {
$this->serviceContainerStatuses->get($key)->put($containerName, $containerStatus);
}
// Mark service as found
if ($subType === 'application') {
$service = $service->applications()->where('id', $subId)->first();
$service = $parentService->applications()->where('id', $subId)->first();
} else {
$service = $service->databases()->where('id', $subId)->first();
$service = $parentService->databases()->where('id', $subId)->first();
}
if ($service) {
$foundServices[] = "$service->id-$service->name";
$statusFromDb = $service->status;
if ($statusFromDb !== $containerStatus) {
$service->update(['status' => $containerStatus]);
} else {
$service->update(['last_online_at' => now()]);
}
}
}
}
@@ -418,6 +436,9 @@ class GetContainersStatus
}
}
// Aggregate multi-container service statuses
$this->aggregateServiceContainerStatuses($services);
ServiceChecked::dispatch($this->server->team->id);
}
@@ -425,74 +446,88 @@ class GetContainersStatus
{
// Parse docker compose to check for excluded containers
$dockerComposeRaw = data_get($application, 'docker_compose_raw');
$excludedContainers = collect();
if ($dockerComposeRaw) {
try {
$dockerCompose = \Symfony\Component\Yaml\Yaml::parse($dockerComposeRaw);
$services = data_get($dockerCompose, 'services', []);
foreach ($services as $serviceName => $serviceConfig) {
// Check if container should be excluded
$excludeFromHc = data_get($serviceConfig, 'exclude_from_hc', false);
$restartPolicy = data_get($serviceConfig, 'restart', 'always');
if ($excludeFromHc || $restartPolicy === 'no') {
$excludedContainers->push($serviceName);
}
}
} catch (\Exception $e) {
// If we can't parse, treat all containers as included
}
}
$excludedContainers = $this->getExcludedContainersFromDockerCompose($dockerComposeRaw);
// Filter out excluded containers
$relevantStatuses = $containerStatuses->filter(function ($status, $containerName) use ($excludedContainers) {
return ! $excludedContainers->contains($containerName);
});
// If all containers are excluded, don't update status
// If all containers are excluded, calculate status from excluded containers
if ($relevantStatuses->isEmpty()) {
return null;
return $this->calculateExcludedStatusFromStrings($containerStatuses);
}
$hasRunning = false;
$hasRestarting = false;
$hasUnhealthy = false;
$hasExited = false;
// Use ContainerStatusAggregator service for state machine logic
$aggregator = new ContainerStatusAggregator;
foreach ($relevantStatuses as $status) {
if (str($status)->contains('restarting')) {
$hasRestarting = true;
} elseif (str($status)->contains('running')) {
$hasRunning = true;
if (str($status)->contains('unhealthy')) {
$hasUnhealthy = true;
return $aggregator->aggregateFromStrings($relevantStatuses, $maxRestartCount);
}
private function aggregateServiceContainerStatuses($services)
{
if (! isset($this->serviceContainerStatuses) || $this->serviceContainerStatuses->isEmpty()) {
return;
}
foreach ($this->serviceContainerStatuses as $key => $containerStatuses) {
// Parse key: serviceId:subType:subId
[$serviceId, $subType, $subId] = explode(':', $key);
$service = $services->where('id', $serviceId)->first();
if (! $service) {
continue;
}
// Get the service sub-resource (ServiceApplication or ServiceDatabase)
$subResource = null;
if ($subType === 'application') {
$subResource = $service->applications()->where('id', $subId)->first();
} elseif ($subType === 'database') {
$subResource = $service->databases()->where('id', $subId)->first();
}
if (! $subResource) {
continue;
}
// Parse docker compose from service to check for excluded containers
$dockerComposeRaw = data_get($service, 'docker_compose_raw');
$excludedContainers = $this->getExcludedContainersFromDockerCompose($dockerComposeRaw);
// Filter out excluded containers
$relevantStatuses = $containerStatuses->filter(function ($status, $containerName) use ($excludedContainers) {
return ! $excludedContainers->contains($containerName);
});
// If all containers are excluded, calculate status from excluded containers
if ($relevantStatuses->isEmpty()) {
$aggregatedStatus = $this->calculateExcludedStatusFromStrings($containerStatuses);
if ($aggregatedStatus) {
$statusFromDb = $subResource->status;
if ($statusFromDb !== $aggregatedStatus) {
$subResource->update(['status' => $aggregatedStatus]);
} else {
$subResource->update(['last_online_at' => now()]);
}
}
continue;
}
// Use ContainerStatusAggregator service for state machine logic
$aggregator = new ContainerStatusAggregator;
$aggregatedStatus = $aggregator->aggregateFromStrings($relevantStatuses);
// Update service sub-resource status with aggregated result
if ($aggregatedStatus) {
$statusFromDb = $subResource->status;
if ($statusFromDb !== $aggregatedStatus) {
$subResource->update(['status' => $aggregatedStatus]);
} else {
$subResource->update(['last_online_at' => now()]);
}
} elseif (str($status)->contains('exited')) {
$hasExited = true;
$hasUnhealthy = true;
}
}
if ($hasRestarting) {
return 'degraded (unhealthy)';
}
// If container is exited but has restart count > 0, it's in a crash loop
if ($hasExited && $maxRestartCount > 0) {
return 'degraded (unhealthy)';
}
if ($hasRunning && $hasExited) {
return 'degraded (unhealthy)';
}
if ($hasRunning) {
return $hasUnhealthy ? 'running (unhealthy)' : 'running (healthy)';
}
// All containers are exited with no restart count - truly stopped
return 'exited (unhealthy)';
}
}

View File

@@ -3,11 +3,14 @@
namespace App\Actions\Shared;
use App\Models\Application;
use App\Services\ContainerStatusAggregator;
use App\Traits\CalculatesExcludedStatus;
use Lorisleiva\Actions\Concerns\AsAction;
class ComplexStatusCheck
{
use AsAction;
use CalculatesExcludedStatus;
public function handle(Application $application)
{
@@ -61,74 +64,25 @@ class ComplexStatusCheck
private function aggregateContainerStatuses($application, $containers)
{
$dockerComposeRaw = data_get($application, 'docker_compose_raw');
$excludedContainers = collect();
$excludedContainers = $this->getExcludedContainersFromDockerCompose($dockerComposeRaw);
if ($dockerComposeRaw) {
try {
$dockerCompose = \Symfony\Component\Yaml\Yaml::parse($dockerComposeRaw);
$services = data_get($dockerCompose, 'services', []);
foreach ($services as $serviceName => $serviceConfig) {
$excludeFromHc = data_get($serviceConfig, 'exclude_from_hc', false);
$restartPolicy = data_get($serviceConfig, 'restart', 'always');
if ($excludeFromHc || $restartPolicy === 'no') {
$excludedContainers->push($serviceName);
}
}
} catch (\Exception $e) {
// If we can't parse, treat all containers as included
}
}
$hasRunning = false;
$hasRestarting = false;
$hasUnhealthy = false;
$hasExited = false;
$relevantContainerCount = 0;
foreach ($containers as $container) {
// Filter non-excluded containers
$relevantContainers = collect($containers)->filter(function ($container) use ($excludedContainers) {
$labels = data_get($container, 'Config.Labels', []);
$serviceName = data_get($labels, 'com.docker.compose.service');
if ($serviceName && $excludedContainers->contains($serviceName)) {
continue;
}
return ! ($serviceName && $excludedContainers->contains($serviceName));
});
$relevantContainerCount++;
$containerStatus = data_get($container, 'State.Status');
$containerHealth = data_get($container, 'State.Health.Status', 'unhealthy');
if ($containerStatus === 'restarting') {
$hasRestarting = true;
$hasUnhealthy = true;
} elseif ($containerStatus === 'running') {
$hasRunning = true;
if ($containerHealth === 'unhealthy') {
$hasUnhealthy = true;
}
} elseif ($containerStatus === 'exited') {
$hasExited = true;
$hasUnhealthy = true;
}
// If all containers are excluded, calculate status from excluded containers
// but mark it with :excluded to indicate monitoring is disabled
if ($relevantContainers->isEmpty()) {
return $this->calculateExcludedStatus($containers, $excludedContainers);
}
if ($relevantContainerCount === 0) {
return 'running:healthy';
}
// Use ContainerStatusAggregator service for state machine logic
$aggregator = new ContainerStatusAggregator;
if ($hasRestarting) {
return 'degraded:unhealthy';
}
if ($hasRunning && $hasExited) {
return 'degraded:unhealthy';
}
if ($hasRunning) {
return $hasUnhealthy ? 'running:unhealthy' : 'running:healthy';
}
return 'exited:unhealthy';
return $aggregator->aggregateFromContainers($relevantContainers);
}
}