admin: fix capacity leak in maintenance system by preserving Task IDs (#8214)
* admin: fix capacity leak in maintenance system by preserving Task IDs Preserve the original TaskID generated during detection and sync task states (Assign/Complete/Retry) with ActiveTopology. This ensures that capacity reserved during task assignment is properly released when a task completes or fails, preventing 'need 9, have 0' capacity exhaustion. Fixes https://github.com/seaweedfs/seaweedfs/issues/8202 * Update weed/admin/maintenance/maintenance_queue.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update weed/admin/maintenance/maintenance_queue.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * test: rename ActiveTopologySync to TaskIDPreservation Rename the test case to more accurately reflect its scope, as suggested by the code review bot. * Add TestMaintenanceQueue_ActiveTopologySync to verify task state synchronization and capacity management * Implement task assignment rollback and add verification test * Enhance ActiveTopology.CompleteTask to support pending tasks * Populate storage impact in MaintenanceIntegration.SyncTask * Release capacity in RemoveStaleWorkers when worker becomes unavailable * Release capacity in MaintenanceManager.CancelTask when pending task is cancelled * Sync reloaded tasks with ActiveTopology in LoadTasksFromPersistence * Add verification tests for consistent capacity management lifecycle * Add TestMaintenanceQueue_RetryCapacitySync to verify capacity tracking during retries --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -305,6 +305,7 @@ func (s *MaintenanceIntegration) convertToExistingFormat(result *types.TaskDetec
|
||||
}
|
||||
|
||||
return &TaskDetectionResult{
|
||||
TaskID: result.TaskID,
|
||||
TaskType: existingType,
|
||||
VolumeID: result.VolumeID,
|
||||
Server: result.Server,
|
||||
@@ -523,19 +524,25 @@ func (s *MaintenanceIntegration) SyncTask(task *MaintenanceTask) {
|
||||
var estimatedSize int64
|
||||
|
||||
if task.TypedParams != nil {
|
||||
// Calculate storage impact for this task type
|
||||
// Volume size is not currently used for Balance/Vacuum impact and is not stored in MaintenanceTask
|
||||
sourceImpact, targetImpact := topology.CalculateTaskStorageImpact(topology.TaskType(string(taskType)), 0)
|
||||
|
||||
// Use unified sources and targets from TaskParams
|
||||
for _, src := range task.TypedParams.Sources {
|
||||
sources = append(sources, topology.TaskSource{
|
||||
SourceServer: src.Node,
|
||||
SourceDisk: src.DiskId,
|
||||
SourceServer: src.Node,
|
||||
SourceDisk: src.DiskId,
|
||||
StorageChange: sourceImpact,
|
||||
})
|
||||
// Sum estimated size from all sources
|
||||
estimatedSize += int64(src.EstimatedSize)
|
||||
}
|
||||
for _, target := range task.TypedParams.Targets {
|
||||
destinations = append(destinations, topology.TaskDestination{
|
||||
TargetServer: target.Node,
|
||||
TargetDisk: target.DiskId,
|
||||
TargetServer: target.Node,
|
||||
TargetDisk: target.DiskId,
|
||||
StorageChange: targetImpact,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user