admin: Refactor task destination planning (#7063)
* refactor planning into task detection * refactoring worker tasks * refactor * compiles, but only balance task is registered * compiles, but has nil exception * avoid nil logger * add back ec task * setting ec log directory * implement balance and vacuum tasks * EC tasks will no longer fail with "file not found" errors * Use ReceiveFile API to send locally generated shards * distributing shard files and ecx,ecj,vif files * generate .ecx files correctly * do not mount all possible EC shards (0-13) on every destination * use constants * delete all replicas * rename files * pass in volume size to tasks
This commit is contained in:
@@ -4,7 +4,6 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -478,177 +477,31 @@ func createTopologyWithConflicts() *ActiveTopology {
|
||||
return topology
|
||||
}
|
||||
|
||||
// TestDestinationPlanning tests destination planning functionality
|
||||
// TestDestinationPlanning tests that the public interface works correctly
|
||||
// NOTE: Destination planning is now done in task detection phase, not in ActiveTopology
|
||||
func TestDestinationPlanning(t *testing.T) {
|
||||
topology := NewActiveTopology(10)
|
||||
topology.UpdateTopology(createSampleTopology())
|
||||
|
||||
// Test balance destination planning
|
||||
t.Run("Balance destination planning", func(t *testing.T) {
|
||||
plan, err := topology.PlanBalanceDestination(1001, "10.0.0.1:8080", "rack1", "dc1", 1024*1024) // 1MB
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, plan)
|
||||
// Test that GetAvailableDisks works for destination planning
|
||||
t.Run("GetAvailableDisks functionality", func(t *testing.T) {
|
||||
availableDisks := topology.GetAvailableDisks(TaskTypeBalance, "10.0.0.1:8080")
|
||||
assert.Greater(t, len(availableDisks), 0)
|
||||
|
||||
// Should not target the source node
|
||||
assert.NotEqual(t, "10.0.0.1:8080", plan.TargetNode)
|
||||
assert.Equal(t, "10.0.0.2:8080", plan.TargetNode)
|
||||
assert.NotEmpty(t, plan.TargetRack)
|
||||
assert.NotEmpty(t, plan.TargetDC)
|
||||
assert.Greater(t, plan.PlacementScore, 0.0)
|
||||
})
|
||||
|
||||
// Test EC destination planning
|
||||
t.Run("EC destination planning", func(t *testing.T) {
|
||||
multiPlan, err := topology.PlanECDestinations(1002, "10.0.0.1:8080", "rack1", "dc1", 3) // Ask for 3 shards - source node can be included
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, multiPlan)
|
||||
assert.Greater(t, len(multiPlan.Plans), 0)
|
||||
assert.LessOrEqual(t, len(multiPlan.Plans), 3) // Should get at most 3 shards
|
||||
assert.Equal(t, len(multiPlan.Plans), multiPlan.TotalShards)
|
||||
|
||||
// Check that all plans have valid target nodes
|
||||
for _, plan := range multiPlan.Plans {
|
||||
assert.NotEmpty(t, plan.TargetNode)
|
||||
assert.NotEmpty(t, plan.TargetRack)
|
||||
assert.NotEmpty(t, plan.TargetDC)
|
||||
assert.GreaterOrEqual(t, plan.PlacementScore, 0.0)
|
||||
// Should exclude the source node
|
||||
for _, disk := range availableDisks {
|
||||
assert.NotEqual(t, "10.0.0.1:8080", disk.NodeID)
|
||||
}
|
||||
|
||||
// Check diversity metrics
|
||||
assert.GreaterOrEqual(t, multiPlan.SuccessfulRack, 1)
|
||||
assert.GreaterOrEqual(t, multiPlan.SuccessfulDCs, 1)
|
||||
})
|
||||
|
||||
// Test destination planning with load
|
||||
t.Run("Destination planning considers load", func(t *testing.T) {
|
||||
// Add load to one disk
|
||||
topology.AddPendingTask("task1", TaskTypeBalance, 2001,
|
||||
"10.0.0.2:8080", 0, "", 0)
|
||||
// Test that topology state can be used for planning
|
||||
t.Run("Topology provides planning information", func(t *testing.T) {
|
||||
topologyInfo := topology.GetTopologyInfo()
|
||||
assert.NotNil(t, topologyInfo)
|
||||
assert.Greater(t, len(topologyInfo.DataCenterInfos), 0)
|
||||
|
||||
plan, err := topology.PlanBalanceDestination(1003, "10.0.0.1:8080", "rack1", "dc1", 1024*1024)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, plan)
|
||||
|
||||
// Should prefer less loaded disk (disk 1 over disk 0 on node2)
|
||||
assert.Equal(t, "10.0.0.2:8080", plan.TargetNode)
|
||||
assert.Equal(t, uint32(1), plan.TargetDisk) // Should prefer SSD (disk 1) which has no load
|
||||
})
|
||||
|
||||
// Test insufficient destinations
|
||||
t.Run("Handle insufficient destinations", func(t *testing.T) {
|
||||
// Try to plan for more EC shards than available disks
|
||||
multiPlan, err := topology.PlanECDestinations(1004, "10.0.0.1:8080", "rack1", "dc1", 100)
|
||||
|
||||
// Should get an error for insufficient disks
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, multiPlan)
|
||||
})
|
||||
}
|
||||
|
||||
// TestDestinationPlanningWithActiveTopology tests the integration between task detection and destination planning
|
||||
func TestDestinationPlanningWithActiveTopology(t *testing.T) {
|
||||
topology := NewActiveTopology(10)
|
||||
topology.UpdateTopology(createUnbalancedTopology())
|
||||
|
||||
// Test that tasks are created with destinations
|
||||
t.Run("Balance task with destination", func(t *testing.T) {
|
||||
// Simulate what the balance detector would create
|
||||
sourceNode := "10.0.0.1:8080" // Overloaded node
|
||||
volumeID := uint32(1001)
|
||||
|
||||
plan, err := topology.PlanBalanceDestination(volumeID, sourceNode, "rack1", "dc1", 1024*1024)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, plan)
|
||||
|
||||
// Verify the destination is different from source
|
||||
assert.NotEqual(t, sourceNode, plan.TargetNode)
|
||||
assert.Equal(t, "10.0.0.2:8080", plan.TargetNode) // Should be the lightly loaded node
|
||||
|
||||
// Verify placement quality
|
||||
assert.Greater(t, plan.PlacementScore, 0.0)
|
||||
assert.LessOrEqual(t, plan.PlacementScore, 1.0)
|
||||
})
|
||||
|
||||
// Test task state integration
|
||||
t.Run("Task state affects future planning", func(t *testing.T) {
|
||||
volumeID := uint32(1002)
|
||||
sourceNode := "10.0.0.1:8080"
|
||||
targetNode := "10.0.0.2:8080"
|
||||
|
||||
// Plan first destination
|
||||
plan1, err := topology.PlanBalanceDestination(volumeID, sourceNode, "rack1", "dc1", 1024*1024)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, plan1)
|
||||
|
||||
// Add a pending task to the target
|
||||
topology.AddPendingTask("task1", TaskTypeBalance, volumeID, sourceNode, 0, targetNode, 0)
|
||||
|
||||
// Plan another destination - should consider the pending task load
|
||||
plan2, err := topology.PlanBalanceDestination(1003, sourceNode, "rack1", "dc1", 1024*1024)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, plan2)
|
||||
|
||||
// The placement score should reflect the increased load
|
||||
// (This test might need adjustment based on the actual scoring algorithm)
|
||||
glog.V(1).Infof("Plan1 score: %.3f, Plan2 score: %.3f", plan1.PlacementScore, plan2.PlacementScore)
|
||||
})
|
||||
}
|
||||
|
||||
// TestECDestinationPlanningDetailed tests the EC destination planning with multiple shards
|
||||
func TestECDestinationPlanningDetailed(t *testing.T) {
|
||||
topology := NewActiveTopology(10)
|
||||
topology.UpdateTopology(createSampleTopology())
|
||||
|
||||
t.Run("EC multiple destinations", func(t *testing.T) {
|
||||
// Plan for 3 EC shards (now including source node, we have 4 disks total)
|
||||
multiPlan, err := topology.PlanECDestinations(1005, "10.0.0.1:8080", "rack1", "dc1", 3)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, multiPlan)
|
||||
|
||||
// Should get 3 destinations (can include source node's disks)
|
||||
assert.Equal(t, 3, len(multiPlan.Plans))
|
||||
assert.Equal(t, 3, multiPlan.TotalShards)
|
||||
|
||||
// Count node distribution - source node can now be included
|
||||
nodeCount := make(map[string]int)
|
||||
for _, plan := range multiPlan.Plans {
|
||||
nodeCount[plan.TargetNode]++
|
||||
}
|
||||
|
||||
// Should distribute across available nodes (both nodes can be used)
|
||||
assert.GreaterOrEqual(t, len(nodeCount), 1, "Should use at least 1 node")
|
||||
assert.LessOrEqual(t, len(nodeCount), 2, "Should use at most 2 nodes")
|
||||
glog.V(1).Infof("EC destinations node distribution: %v", nodeCount)
|
||||
|
||||
glog.V(1).Infof("EC destinations: %d plans across %d racks, %d DCs",
|
||||
multiPlan.TotalShards, multiPlan.SuccessfulRack, multiPlan.SuccessfulDCs)
|
||||
})
|
||||
|
||||
t.Run("EC destination planning with task conflicts", func(t *testing.T) {
|
||||
// Create a fresh topology for this test to avoid conflicts from previous test
|
||||
freshTopology := NewActiveTopology(10)
|
||||
freshTopology.UpdateTopology(createSampleTopology())
|
||||
|
||||
// Add tasks to create conflicts on some disks
|
||||
freshTopology.AddPendingTask("conflict1", TaskTypeVacuum, 2001, "10.0.0.2:8080", 0, "", 0)
|
||||
freshTopology.AddPendingTask("conflict2", TaskTypeBalance, 2002, "10.0.0.1:8080", 0, "", 0)
|
||||
freshTopology.AssignTask("conflict1")
|
||||
freshTopology.AssignTask("conflict2")
|
||||
|
||||
// Plan EC destinations - should still succeed using available disks
|
||||
multiPlan, err := freshTopology.PlanECDestinations(1006, "10.0.0.1:8080", "rack1", "dc1", 2)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, multiPlan)
|
||||
|
||||
// Should get destinations (using disks that don't have conflicts)
|
||||
assert.GreaterOrEqual(t, len(multiPlan.Plans), 1)
|
||||
assert.LessOrEqual(t, len(multiPlan.Plans), 2)
|
||||
|
||||
// Available disks should be: node1/disk1 and node2/disk1 (since disk0 on both nodes have conflicts)
|
||||
for _, plan := range multiPlan.Plans {
|
||||
assert.Equal(t, uint32(1), plan.TargetDisk, "Should prefer disk 1 which has no conflicts")
|
||||
}
|
||||
|
||||
glog.V(1).Infof("EC destination planning with conflicts: found %d destinations", len(multiPlan.Plans))
|
||||
// Test getting node disks
|
||||
disks := topology.GetNodeDisks("10.0.0.1:8080")
|
||||
assert.Greater(t, len(disks), 0)
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user