admin: Refactor task destination planning (#7063)

* refactor planning into task detection

* refactoring worker tasks

* refactor

* compiles, but only balance task is registered

* compiles, but has nil exception

* avoid nil logger

* add back ec task

* setting ec log directory

* implement balance and vacuum tasks

* EC tasks will no longer fail with "file not found" errors

* Use ReceiveFile API to send locally generated shards

* distributing shard files and ecx,ecj,vif files

* generate .ecx files correctly

* do not mount all possible EC shards (0-13) on every destination

* use constants

* delete all replicas

* rename files

* pass in volume size to tasks
This commit is contained in:
Chris Lu
2025-08-01 11:18:32 -07:00
committed by GitHub
parent 1cba609bfa
commit 0975968e71
43 changed files with 2910 additions and 2385 deletions

View File

@@ -332,307 +332,6 @@ type MultiDestinationPlan struct {
SuccessfulDCs int `json:"successful_dcs"`
}
// PlanBalanceDestination finds the best destination for a balance operation
func (at *ActiveTopology) PlanBalanceDestination(volumeID uint32, sourceNode string, sourceRack string, sourceDC string, volumeSize uint64) (*DestinationPlan, error) {
at.mutex.RLock()
defer at.mutex.RUnlock()
// Get available disks, excluding the source node
availableDisks := at.getAvailableDisksForPlanning(TaskTypeBalance, sourceNode)
if len(availableDisks) == 0 {
return nil, fmt.Errorf("no available disks for balance operation")
}
// Score each disk for balance placement
bestDisk := at.selectBestBalanceDestination(availableDisks, sourceRack, sourceDC, volumeSize)
if bestDisk == nil {
return nil, fmt.Errorf("no suitable destination found for balance operation")
}
return &DestinationPlan{
TargetNode: bestDisk.NodeID,
TargetDisk: bestDisk.DiskID,
TargetRack: bestDisk.Rack,
TargetDC: bestDisk.DataCenter,
ExpectedSize: volumeSize,
PlacementScore: at.calculatePlacementScore(bestDisk, sourceRack, sourceDC),
Conflicts: at.checkPlacementConflicts(bestDisk, TaskTypeBalance),
}, nil
}
// PlanECDestinations finds multiple destinations for EC shard distribution
func (at *ActiveTopology) PlanECDestinations(volumeID uint32, sourceNode string, sourceRack string, sourceDC string, shardsNeeded int) (*MultiDestinationPlan, error) {
at.mutex.RLock()
defer at.mutex.RUnlock()
// Get available disks for EC placement
availableDisks := at.getAvailableDisksForPlanning(TaskTypeErasureCoding, "")
if len(availableDisks) < shardsNeeded {
return nil, fmt.Errorf("insufficient disks for EC placement: need %d, have %d", shardsNeeded, len(availableDisks))
}
// Select best disks for EC placement with rack/DC diversity
selectedDisks := at.selectBestECDestinations(availableDisks, sourceRack, sourceDC, shardsNeeded)
if len(selectedDisks) < shardsNeeded {
return nil, fmt.Errorf("could not find %d suitable destinations for EC placement", shardsNeeded)
}
var plans []*DestinationPlan
rackCount := make(map[string]int)
dcCount := make(map[string]int)
for _, disk := range selectedDisks {
plan := &DestinationPlan{
TargetNode: disk.NodeID,
TargetDisk: disk.DiskID,
TargetRack: disk.Rack,
TargetDC: disk.DataCenter,
ExpectedSize: 0, // EC shards don't have predetermined size
PlacementScore: at.calculatePlacementScore(disk, sourceRack, sourceDC),
Conflicts: at.checkPlacementConflicts(disk, TaskTypeErasureCoding),
}
plans = append(plans, plan)
// Count rack and DC diversity
rackKey := fmt.Sprintf("%s:%s", disk.DataCenter, disk.Rack)
rackCount[rackKey]++
dcCount[disk.DataCenter]++
}
return &MultiDestinationPlan{
Plans: plans,
TotalShards: len(plans),
SuccessfulRack: len(rackCount),
SuccessfulDCs: len(dcCount),
}, nil
}
// getAvailableDisksForPlanning returns disks available for destination planning
func (at *ActiveTopology) getAvailableDisksForPlanning(taskType TaskType, excludeNodeID string) []*activeDisk {
var available []*activeDisk
for _, disk := range at.disks {
if excludeNodeID != "" && disk.NodeID == excludeNodeID {
continue // Skip excluded node
}
if at.isDiskAvailable(disk, taskType) {
available = append(available, disk)
}
}
return available
}
// selectBestBalanceDestination selects the best disk for balance operation
func (at *ActiveTopology) selectBestBalanceDestination(disks []*activeDisk, sourceRack string, sourceDC string, volumeSize uint64) *activeDisk {
if len(disks) == 0 {
return nil
}
var bestDisk *activeDisk
bestScore := -1.0
for _, disk := range disks {
score := at.calculateBalanceScore(disk, sourceRack, sourceDC, volumeSize)
if score > bestScore {
bestScore = score
bestDisk = disk
}
}
return bestDisk
}
// selectBestECDestinations selects multiple disks for EC shard placement with diversity
func (at *ActiveTopology) selectBestECDestinations(disks []*activeDisk, sourceRack string, sourceDC string, shardsNeeded int) []*activeDisk {
if len(disks) == 0 {
return nil
}
// Group disks by rack and DC for diversity
rackGroups := make(map[string][]*activeDisk)
for _, disk := range disks {
rackKey := fmt.Sprintf("%s:%s", disk.DataCenter, disk.Rack)
rackGroups[rackKey] = append(rackGroups[rackKey], disk)
}
var selected []*activeDisk
usedRacks := make(map[string]bool)
// First pass: select one disk from each rack for maximum diversity
for rackKey, rackDisks := range rackGroups {
if len(selected) >= shardsNeeded {
break
}
// Select best disk from this rack
bestDisk := at.selectBestFromRack(rackDisks, sourceRack, sourceDC)
if bestDisk != nil {
selected = append(selected, bestDisk)
usedRacks[rackKey] = true
}
}
// Second pass: if we need more disks, select from racks we've already used
if len(selected) < shardsNeeded {
for _, disk := range disks {
if len(selected) >= shardsNeeded {
break
}
// Skip if already selected
alreadySelected := false
for _, sel := range selected {
if sel.NodeID == disk.NodeID && sel.DiskID == disk.DiskID {
alreadySelected = true
break
}
}
if !alreadySelected && at.isDiskAvailable(disk, TaskTypeErasureCoding) {
selected = append(selected, disk)
}
}
}
return selected
}
// selectBestFromRack selects the best disk from a rack
func (at *ActiveTopology) selectBestFromRack(disks []*activeDisk, sourceRack string, sourceDC string) *activeDisk {
if len(disks) == 0 {
return nil
}
var bestDisk *activeDisk
bestScore := -1.0
for _, disk := range disks {
if !at.isDiskAvailable(disk, TaskTypeErasureCoding) {
continue
}
score := at.calculateECScore(disk, sourceRack, sourceDC)
if score > bestScore {
bestScore = score
bestDisk = disk
}
}
return bestDisk
}
// calculateBalanceScore calculates placement score for balance operations
func (at *ActiveTopology) calculateBalanceScore(disk *activeDisk, sourceRack string, sourceDC string, volumeSize uint64) float64 {
score := 0.0
// Prefer disks with lower load
activeLoad := len(disk.pendingTasks) + len(disk.assignedTasks)
score += (2.0 - float64(activeLoad)) * 40.0 // Max 80 points for load
// Prefer disks with more free space
if disk.DiskInfo.DiskInfo.MaxVolumeCount > 0 {
freeRatio := float64(disk.DiskInfo.DiskInfo.MaxVolumeCount-disk.DiskInfo.DiskInfo.VolumeCount) / float64(disk.DiskInfo.DiskInfo.MaxVolumeCount)
score += freeRatio * 20.0 // Max 20 points for free space
}
// Rack diversity bonus (prefer different rack)
if disk.Rack != sourceRack {
score += 10.0
}
// DC diversity bonus (prefer different DC)
if disk.DataCenter != sourceDC {
score += 5.0
}
return score
}
// calculateECScore calculates placement score for EC operations
func (at *ActiveTopology) calculateECScore(disk *activeDisk, sourceRack string, sourceDC string) float64 {
score := 0.0
// Prefer disks with lower load
activeLoad := len(disk.pendingTasks) + len(disk.assignedTasks)
score += (2.0 - float64(activeLoad)) * 30.0 // Max 60 points for load
// Prefer disks with more free space
if disk.DiskInfo.DiskInfo.MaxVolumeCount > 0 {
freeRatio := float64(disk.DiskInfo.DiskInfo.MaxVolumeCount-disk.DiskInfo.DiskInfo.VolumeCount) / float64(disk.DiskInfo.DiskInfo.MaxVolumeCount)
score += freeRatio * 20.0 // Max 20 points for free space
}
// Strong rack diversity preference for EC
if disk.Rack != sourceRack {
score += 20.0
}
// Strong DC diversity preference for EC
if disk.DataCenter != sourceDC {
score += 15.0
}
return score
}
// calculatePlacementScore calculates overall placement quality score
func (at *ActiveTopology) calculatePlacementScore(disk *activeDisk, sourceRack string, sourceDC string) float64 {
score := 0.0
// Load factor
activeLoad := len(disk.pendingTasks) + len(disk.assignedTasks)
loadScore := (2.0 - float64(activeLoad)) / 2.0 // Normalize to 0-1
score += loadScore * 0.4
// Capacity factor
if disk.DiskInfo.DiskInfo.MaxVolumeCount > 0 {
freeRatio := float64(disk.DiskInfo.DiskInfo.MaxVolumeCount-disk.DiskInfo.DiskInfo.VolumeCount) / float64(disk.DiskInfo.DiskInfo.MaxVolumeCount)
score += freeRatio * 0.3
}
// Diversity factor
diversityScore := 0.0
if disk.Rack != sourceRack {
diversityScore += 0.5
}
if disk.DataCenter != sourceDC {
diversityScore += 0.5
}
score += diversityScore * 0.3
return score // Score between 0.0 and 1.0
}
// checkPlacementConflicts checks for placement rule violations
func (at *ActiveTopology) checkPlacementConflicts(disk *activeDisk, taskType TaskType) []string {
var conflicts []string
// Check load limits
activeLoad := len(disk.pendingTasks) + len(disk.assignedTasks)
if activeLoad >= 2 {
conflicts = append(conflicts, fmt.Sprintf("disk_load_high_%d", activeLoad))
}
// Check capacity limits
if disk.DiskInfo.DiskInfo.MaxVolumeCount > 0 {
usageRatio := float64(disk.DiskInfo.DiskInfo.VolumeCount) / float64(disk.DiskInfo.DiskInfo.MaxVolumeCount)
if usageRatio > 0.9 {
conflicts = append(conflicts, "disk_capacity_high")
}
}
// Check for conflicting task types
for _, task := range disk.assignedTasks {
if at.areTaskTypesConflicting(task.TaskType, taskType) {
conflicts = append(conflicts, fmt.Sprintf("task_conflict_%s", task.TaskType))
}
}
return conflicts
}
// Private methods
// reassignTaskStates assigns tasks to the appropriate disks

View File

@@ -4,7 +4,6 @@ import (
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -478,177 +477,31 @@ func createTopologyWithConflicts() *ActiveTopology {
return topology
}
// TestDestinationPlanning tests destination planning functionality
// TestDestinationPlanning tests that the public interface works correctly
// NOTE: Destination planning is now done in task detection phase, not in ActiveTopology
func TestDestinationPlanning(t *testing.T) {
topology := NewActiveTopology(10)
topology.UpdateTopology(createSampleTopology())
// Test balance destination planning
t.Run("Balance destination planning", func(t *testing.T) {
plan, err := topology.PlanBalanceDestination(1001, "10.0.0.1:8080", "rack1", "dc1", 1024*1024) // 1MB
require.NoError(t, err)
require.NotNil(t, plan)
// Test that GetAvailableDisks works for destination planning
t.Run("GetAvailableDisks functionality", func(t *testing.T) {
availableDisks := topology.GetAvailableDisks(TaskTypeBalance, "10.0.0.1:8080")
assert.Greater(t, len(availableDisks), 0)
// Should not target the source node
assert.NotEqual(t, "10.0.0.1:8080", plan.TargetNode)
assert.Equal(t, "10.0.0.2:8080", plan.TargetNode)
assert.NotEmpty(t, plan.TargetRack)
assert.NotEmpty(t, plan.TargetDC)
assert.Greater(t, plan.PlacementScore, 0.0)
})
// Test EC destination planning
t.Run("EC destination planning", func(t *testing.T) {
multiPlan, err := topology.PlanECDestinations(1002, "10.0.0.1:8080", "rack1", "dc1", 3) // Ask for 3 shards - source node can be included
require.NoError(t, err)
require.NotNil(t, multiPlan)
assert.Greater(t, len(multiPlan.Plans), 0)
assert.LessOrEqual(t, len(multiPlan.Plans), 3) // Should get at most 3 shards
assert.Equal(t, len(multiPlan.Plans), multiPlan.TotalShards)
// Check that all plans have valid target nodes
for _, plan := range multiPlan.Plans {
assert.NotEmpty(t, plan.TargetNode)
assert.NotEmpty(t, plan.TargetRack)
assert.NotEmpty(t, plan.TargetDC)
assert.GreaterOrEqual(t, plan.PlacementScore, 0.0)
// Should exclude the source node
for _, disk := range availableDisks {
assert.NotEqual(t, "10.0.0.1:8080", disk.NodeID)
}
// Check diversity metrics
assert.GreaterOrEqual(t, multiPlan.SuccessfulRack, 1)
assert.GreaterOrEqual(t, multiPlan.SuccessfulDCs, 1)
})
// Test destination planning with load
t.Run("Destination planning considers load", func(t *testing.T) {
// Add load to one disk
topology.AddPendingTask("task1", TaskTypeBalance, 2001,
"10.0.0.2:8080", 0, "", 0)
// Test that topology state can be used for planning
t.Run("Topology provides planning information", func(t *testing.T) {
topologyInfo := topology.GetTopologyInfo()
assert.NotNil(t, topologyInfo)
assert.Greater(t, len(topologyInfo.DataCenterInfos), 0)
plan, err := topology.PlanBalanceDestination(1003, "10.0.0.1:8080", "rack1", "dc1", 1024*1024)
require.NoError(t, err)
require.NotNil(t, plan)
// Should prefer less loaded disk (disk 1 over disk 0 on node2)
assert.Equal(t, "10.0.0.2:8080", plan.TargetNode)
assert.Equal(t, uint32(1), plan.TargetDisk) // Should prefer SSD (disk 1) which has no load
})
// Test insufficient destinations
t.Run("Handle insufficient destinations", func(t *testing.T) {
// Try to plan for more EC shards than available disks
multiPlan, err := topology.PlanECDestinations(1004, "10.0.0.1:8080", "rack1", "dc1", 100)
// Should get an error for insufficient disks
assert.Error(t, err)
assert.Nil(t, multiPlan)
})
}
// TestDestinationPlanningWithActiveTopology tests the integration between task detection and destination planning
func TestDestinationPlanningWithActiveTopology(t *testing.T) {
topology := NewActiveTopology(10)
topology.UpdateTopology(createUnbalancedTopology())
// Test that tasks are created with destinations
t.Run("Balance task with destination", func(t *testing.T) {
// Simulate what the balance detector would create
sourceNode := "10.0.0.1:8080" // Overloaded node
volumeID := uint32(1001)
plan, err := topology.PlanBalanceDestination(volumeID, sourceNode, "rack1", "dc1", 1024*1024)
require.NoError(t, err)
require.NotNil(t, plan)
// Verify the destination is different from source
assert.NotEqual(t, sourceNode, plan.TargetNode)
assert.Equal(t, "10.0.0.2:8080", plan.TargetNode) // Should be the lightly loaded node
// Verify placement quality
assert.Greater(t, plan.PlacementScore, 0.0)
assert.LessOrEqual(t, plan.PlacementScore, 1.0)
})
// Test task state integration
t.Run("Task state affects future planning", func(t *testing.T) {
volumeID := uint32(1002)
sourceNode := "10.0.0.1:8080"
targetNode := "10.0.0.2:8080"
// Plan first destination
plan1, err := topology.PlanBalanceDestination(volumeID, sourceNode, "rack1", "dc1", 1024*1024)
require.NoError(t, err)
require.NotNil(t, plan1)
// Add a pending task to the target
topology.AddPendingTask("task1", TaskTypeBalance, volumeID, sourceNode, 0, targetNode, 0)
// Plan another destination - should consider the pending task load
plan2, err := topology.PlanBalanceDestination(1003, sourceNode, "rack1", "dc1", 1024*1024)
require.NoError(t, err)
require.NotNil(t, plan2)
// The placement score should reflect the increased load
// (This test might need adjustment based on the actual scoring algorithm)
glog.V(1).Infof("Plan1 score: %.3f, Plan2 score: %.3f", plan1.PlacementScore, plan2.PlacementScore)
})
}
// TestECDestinationPlanningDetailed tests the EC destination planning with multiple shards
func TestECDestinationPlanningDetailed(t *testing.T) {
topology := NewActiveTopology(10)
topology.UpdateTopology(createSampleTopology())
t.Run("EC multiple destinations", func(t *testing.T) {
// Plan for 3 EC shards (now including source node, we have 4 disks total)
multiPlan, err := topology.PlanECDestinations(1005, "10.0.0.1:8080", "rack1", "dc1", 3)
require.NoError(t, err)
require.NotNil(t, multiPlan)
// Should get 3 destinations (can include source node's disks)
assert.Equal(t, 3, len(multiPlan.Plans))
assert.Equal(t, 3, multiPlan.TotalShards)
// Count node distribution - source node can now be included
nodeCount := make(map[string]int)
for _, plan := range multiPlan.Plans {
nodeCount[plan.TargetNode]++
}
// Should distribute across available nodes (both nodes can be used)
assert.GreaterOrEqual(t, len(nodeCount), 1, "Should use at least 1 node")
assert.LessOrEqual(t, len(nodeCount), 2, "Should use at most 2 nodes")
glog.V(1).Infof("EC destinations node distribution: %v", nodeCount)
glog.V(1).Infof("EC destinations: %d plans across %d racks, %d DCs",
multiPlan.TotalShards, multiPlan.SuccessfulRack, multiPlan.SuccessfulDCs)
})
t.Run("EC destination planning with task conflicts", func(t *testing.T) {
// Create a fresh topology for this test to avoid conflicts from previous test
freshTopology := NewActiveTopology(10)
freshTopology.UpdateTopology(createSampleTopology())
// Add tasks to create conflicts on some disks
freshTopology.AddPendingTask("conflict1", TaskTypeVacuum, 2001, "10.0.0.2:8080", 0, "", 0)
freshTopology.AddPendingTask("conflict2", TaskTypeBalance, 2002, "10.0.0.1:8080", 0, "", 0)
freshTopology.AssignTask("conflict1")
freshTopology.AssignTask("conflict2")
// Plan EC destinations - should still succeed using available disks
multiPlan, err := freshTopology.PlanECDestinations(1006, "10.0.0.1:8080", "rack1", "dc1", 2)
require.NoError(t, err)
require.NotNil(t, multiPlan)
// Should get destinations (using disks that don't have conflicts)
assert.GreaterOrEqual(t, len(multiPlan.Plans), 1)
assert.LessOrEqual(t, len(multiPlan.Plans), 2)
// Available disks should be: node1/disk1 and node2/disk1 (since disk0 on both nodes have conflicts)
for _, plan := range multiPlan.Plans {
assert.Equal(t, uint32(1), plan.TargetDisk, "Should prefer disk 1 which has no conflicts")
}
glog.V(1).Infof("EC destination planning with conflicts: found %d destinations", len(multiPlan.Plans))
// Test getting node disks
disks := topology.GetNodeDisks("10.0.0.1:8080")
assert.Greater(t, len(disks), 0)
})
}