admin: Refactor task destination planning (#7063)

* refactor planning into task detection * refactoring worker tasks * refactor * compiles, but only balance task is registered * compiles, but has nil exception * avoid nil logger * add back ec task * setting ec log directory * implement balance and vacuum tasks * EC tasks will no longer fail with "file not found" errors * Use ReceiveFile API to send locally generated shards * distributing shard files and ecx,ecj,vif files * generate .ecx files correctly * do not mount all possible EC shards (0-13) on every destination * use constants * delete all replicas * rename files * pass in volume size to tasks
2025-08-01 11:18:32 -07:00
parent 1cba609bfa
commit 0975968e71
43 changed files with 2910 additions and 2385 deletions
--- a/weed/admin/maintenance/maintenance_scanner.go
+++ b/weed/admin/maintenance/maintenance_scanner.go
@@ -73,20 +73,10 @@ func (ms *MaintenanceScanner) ScanForMaintenanceTasks() ([]*TaskDetectionResult,
 // getVolumeHealthMetrics collects health information for all volumes
 func (ms *MaintenanceScanner) getVolumeHealthMetrics() ([]*VolumeHealthMetrics, error) {
 	var metrics []*VolumeHealthMetrics
-	var volumeSizeLimitMB uint64

 	glog.V(1).Infof("Collecting volume health metrics from master")
 	err := ms.adminClient.WithMasterClient(func(client master_pb.SeaweedClient) error {
-		// First, get volume size limit from master configuration
-		configResp, err := client.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{})
-		if err != nil {
-			glog.Warningf("Failed to get volume size limit from master: %v", err)
-			volumeSizeLimitMB = 30000 // Default to 30GB if we can't get from master
-		} else {
-			volumeSizeLimitMB = uint64(configResp.VolumeSizeLimitMB)
-		}

-		// Now get volume list
 		resp, err := client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
 		if err != nil {
 			return err
@@ -97,7 +87,7 @@ func (ms *MaintenanceScanner) getVolumeHealthMetrics() ([]*VolumeHealthMetrics,
 			return nil
 		}

-		volumeSizeLimitBytes := volumeSizeLimitMB * 1024 * 1024 // Convert MB to bytes
+		volumeSizeLimitBytes := uint64(resp.VolumeSizeLimitMb) * 1024 * 1024 // Convert MB to bytes

 		// Track all nodes discovered in topology
 		var allNodesInTopology []string
@@ -166,7 +156,6 @@ func (ms *MaintenanceScanner) getVolumeHealthMetrics() ([]*VolumeHealthMetrics,
 		glog.Infof("  - Total volume servers in topology: %d (%v)", len(allNodesInTopology), allNodesInTopology)
 		glog.Infof("  - Volume servers with volumes: %d (%v)", len(nodesWithVolumes), nodesWithVolumes)
 		glog.Infof("  - Volume servers without volumes: %d (%v)", len(nodesWithoutVolumes), nodesWithoutVolumes)
-		glog.Infof("Note: Maintenance system will track empty servers separately from volume metrics.")

 		// Store topology info for volume shard tracker
 		ms.lastTopologyInfo = resp.TopologyInfo
@@ -187,11 +176,6 @@ func (ms *MaintenanceScanner) getVolumeHealthMetrics() ([]*VolumeHealthMetrics,
 	return metrics, nil
 }

-// getTopologyInfo returns the last collected topology information
-func (ms *MaintenanceScanner) getTopologyInfo() *master_pb.TopologyInfo {
-	return ms.lastTopologyInfo
-}
-
 // enrichVolumeMetrics adds additional information like replica counts
 func (ms *MaintenanceScanner) enrichVolumeMetrics(metrics []*VolumeHealthMetrics) {
 	// Group volumes by ID to count replicas