admin: fix capacity leak in maintenance system by preserving Task IDs (#8214)
* admin: fix capacity leak in maintenance system by preserving Task IDs Preserve the original TaskID generated during detection and sync task states (Assign/Complete/Retry) with ActiveTopology. This ensures that capacity reserved during task assignment is properly released when a task completes or fails, preventing 'need 9, have 0' capacity exhaustion. Fixes https://github.com/seaweedfs/seaweedfs/issues/8202 * Update weed/admin/maintenance/maintenance_queue.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update weed/admin/maintenance/maintenance_queue.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * test: rename ActiveTopologySync to TaskIDPreservation Rename the test case to more accurately reflect its scope, as suggested by the code review bot. * Add TestMaintenanceQueue_ActiveTopologySync to verify task state synchronization and capacity management * Implement task assignment rollback and add verification test * Enhance ActiveTopology.CompleteTask to support pending tasks * Populate storage impact in MaintenanceIntegration.SyncTask * Release capacity in RemoveStaleWorkers when worker becomes unavailable * Release capacity in MaintenanceManager.CancelTask when pending task is cancelled * Sync reloaded tasks with ActiveTopology in LoadTasksFromPersistence * Add verification tests for consistent capacity management lifecycle * Add TestMaintenanceQueue_RetryCapacitySync to verify capacity tracking during retries --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -66,11 +66,17 @@ func (at *ActiveTopology) CompleteTask(taskID string) error {
|
||||
|
||||
task, exists := at.assignedTasks[taskID]
|
||||
if !exists {
|
||||
return fmt.Errorf("assigned task %s not found", taskID)
|
||||
// If not in assigned tasks, check pending tasks
|
||||
if task, exists = at.pendingTasks[taskID]; exists {
|
||||
delete(at.pendingTasks, taskID)
|
||||
} else {
|
||||
return fmt.Errorf("task %s not found in assigned or pending tasks", taskID)
|
||||
}
|
||||
} else {
|
||||
delete(at.assignedTasks, taskID)
|
||||
}
|
||||
|
||||
// Release reserved capacity by moving task to completed state
|
||||
delete(at.assignedTasks, taskID)
|
||||
task.Status = TaskStatusCompleted
|
||||
task.CompletedAt = time.Now()
|
||||
at.recentTasks[taskID] = task
|
||||
|
||||
Reference in New Issue
Block a user