mirror of
https://github.com/tiennm99/coolify.git
synced 2026-04-17 19:21:36 +00:00
Fix stale lock issue causing scheduled tasks to stop (#4539)
## Problem Scheduled tasks, backups, and auto-updates stopped working after 1-2 months with error: MaxAttemptsExceededException: App\Jobs\ScheduledJobManager has been attempted too many times. Root cause: ScheduledJobManager used WithoutOverlapping with only releaseAfter(60), causing locks without expiration (TTL=-1) that persisted indefinitely when jobs hung or processes crashed. ## Solution ### Part 1: Prevention (Future Locks) - Added expireAfter(60) to ScheduledJobManager middleware - Lock now auto-expires after 60 seconds (matches everyMinute schedule) - Changed from releaseAfter(60) to expireAfter(60)->dontRelease() - Follows Laravel best practices and matches other Coolify jobs ### Part 2: Recovery (Existing Locks) - Enhanced cleanup:redis command with --clear-locks flag - Scans Redis for stale locks (TTL=-1) and removes them - Called automatically during app:init on startup/upgrade - Provides immediate recovery for affected instances ## Changes - app/Jobs/ScheduledJobManager.php: Added expireAfter(60)->dontRelease() - app/Console/Commands/CleanupRedis.php: Added cleanupCacheLocks() method - app/Console/Commands/Init.php: Auto-clear locks on startup - tests/Unit/ScheduledJobManagerLockTest.php: Test to prevent regression - STALE_LOCK_FIX.md: Complete documentation ## Testing - Unit tests pass (2 tests, 8 assertions) - Code formatted with Pint - Matches pattern used by CleanupInstanceStuffsJob 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -7,9 +7,9 @@ use Illuminate\Support\Facades\Redis;
|
||||
|
||||
class CleanupRedis extends Command
|
||||
{
|
||||
protected $signature = 'cleanup:redis {--dry-run : Show what would be deleted without actually deleting} {--skip-overlapping : Skip overlapping queue cleanup}';
|
||||
protected $signature = 'cleanup:redis {--dry-run : Show what would be deleted without actually deleting} {--skip-overlapping : Skip overlapping queue cleanup} {--clear-locks : Clear stale WithoutOverlapping locks}';
|
||||
|
||||
protected $description = 'Cleanup Redis (Horizon jobs, metrics, overlapping queues, and related data)';
|
||||
protected $description = 'Cleanup Redis (Horizon jobs, metrics, overlapping queues, cache locks, and related data)';
|
||||
|
||||
public function handle()
|
||||
{
|
||||
@@ -56,6 +56,13 @@ class CleanupRedis extends Command
|
||||
$deletedCount += $overlappingCleaned;
|
||||
}
|
||||
|
||||
// Clean up stale cache locks (WithoutOverlapping middleware)
|
||||
if ($this->option('clear-locks')) {
|
||||
$this->info('Cleaning up stale cache locks...');
|
||||
$locksCleaned = $this->cleanupCacheLocks($dryRun);
|
||||
$deletedCount += $locksCleaned;
|
||||
}
|
||||
|
||||
if ($dryRun) {
|
||||
$this->info("DRY RUN: Would delete {$deletedCount} out of {$totalKeys} keys");
|
||||
} else {
|
||||
@@ -273,4 +280,57 @@ class CleanupRedis extends Command
|
||||
|
||||
return $cleanedCount;
|
||||
}
|
||||
|
||||
private function cleanupCacheLocks(bool $dryRun): int
|
||||
{
|
||||
$cleanedCount = 0;
|
||||
|
||||
// Use the default Redis connection (database 0) where cache locks are stored
|
||||
$redis = Redis::connection('default');
|
||||
|
||||
// Get all keys matching WithoutOverlapping lock pattern
|
||||
$allKeys = $redis->keys('*');
|
||||
$lockKeys = [];
|
||||
|
||||
foreach ($allKeys as $key) {
|
||||
// Match cache lock keys: they contain 'laravel-queue-overlap'
|
||||
if (str_contains($key, 'laravel-queue-overlap')) {
|
||||
$lockKeys[] = $key;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($lockKeys)) {
|
||||
$this->info(' No cache locks found.');
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
$this->info(' Found '.count($lockKeys).' cache lock(s)');
|
||||
|
||||
foreach ($lockKeys as $lockKey) {
|
||||
// Check TTL to identify stale locks
|
||||
$ttl = $redis->ttl($lockKey);
|
||||
|
||||
// TTL = -1 means no expiration (stale lock!)
|
||||
// TTL = -2 means key doesn't exist
|
||||
// TTL > 0 means lock is valid and will expire
|
||||
if ($ttl === -1) {
|
||||
if ($dryRun) {
|
||||
$this->warn(" Would delete STALE lock (no expiration): {$lockKey}");
|
||||
} else {
|
||||
$redis->del($lockKey);
|
||||
$this->info(" ✓ Deleted STALE lock: {$lockKey}");
|
||||
}
|
||||
$cleanedCount++;
|
||||
} elseif ($ttl > 0) {
|
||||
$this->line(" Skipping active lock (expires in {$ttl}s): {$lockKey}");
|
||||
}
|
||||
}
|
||||
|
||||
if ($cleanedCount === 0) {
|
||||
$this->info(' No stale locks found (all locks have expiration set)');
|
||||
}
|
||||
|
||||
return $cleanedCount;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user