chore: remove ~50k lines of unreachable dead code (#8913)

* chore: remove unreachable dead code across the codebase

Remove ~50,000 lines of unreachable code identified by static analysis.

Major removals:
- weed/filer/redis_lua: entire unused Redis Lua filer store implementation
- weed/wdclient/net2, resource_pool: unused connection/resource pool packages
- weed/plugin/worker/lifecycle: unused lifecycle plugin worker
- weed/s3api: unused S3 policy templates, presigned URL IAM, streaming copy,
  multipart IAM, key rotation, and various SSE helper functions
- weed/mq/kafka: unused partition mapping, compression, schema, and protocol functions
- weed/mq/offset: unused SQL storage and migration code
- weed/worker: unused registry, task, and monitoring functions
- weed/query: unused SQL engine, parquet scanner, and type functions
- weed/shell: unused EC proportional rebalance functions
- weed/storage/erasure_coding/distribution: unused distribution analysis functions
- Individual unreachable functions removed from 150+ files across admin,
  credential, filer, iam, kms, mount, mq, operation, pb, s3api, server,
  shell, storage, topology, and util packages

* fix(s3): reset shared memory store in IAM test to prevent flaky failure

TestLoadIAMManagerFromConfig_EmptyConfigWithFallbackKey was flaky because
the MemoryStore credential backend is a singleton registered via init().
Earlier tests that create anonymous identities pollute the shared store,
causing LookupAnonymous() to unexpectedly return true.

Fix by calling Reset() on the memory store before the test runs.

* style: run gofmt on changed files

* fix: restore KMS functions used by integration tests

* fix(plugin): prevent panic on send to closed worker session channel

The Plugin.sendToWorker method could panic with "send on closed channel"
when a worker disconnected while a message was being sent. The race was
between streamSession.close() closing the outgoing channel and sendToWorker
writing to it concurrently.

Add a done channel to streamSession that is closed before the outgoing
channel, and check it in sendToWorker's select to safely detect closed
sessions without panicking.
This commit is contained in:
Chris Lu
2026-04-03 16:04:27 -07:00
committed by GitHub
parent 8fad85aed7
commit 995dfc4d5d
264 changed files with 62 additions and 46027 deletions

View File

@@ -1,10 +1,5 @@
package distribution
import (
"fmt"
"slices"
)
// ShardLocation represents where a shard is located in the topology
type ShardLocation struct {
ShardID int
@@ -47,101 +42,6 @@ type TopologyAnalysis struct {
TotalDCs int
}
// NewTopologyAnalysis creates a new empty analysis
func NewTopologyAnalysis() *TopologyAnalysis {
return &TopologyAnalysis{
ShardsByDC: make(map[string]int),
ShardsByRack: make(map[string]int),
ShardsByNode: make(map[string]int),
DCToShards: make(map[string][]int),
RackToShards: make(map[string][]int),
NodeToShards: make(map[string][]int),
DCToRacks: make(map[string][]string),
RackToNodes: make(map[string][]*TopologyNode),
AllNodes: make(map[string]*TopologyNode),
}
}
// AddShardLocation adds a shard location to the analysis
func (a *TopologyAnalysis) AddShardLocation(loc ShardLocation) {
// Update counts
a.ShardsByDC[loc.DataCenter]++
a.ShardsByRack[loc.Rack]++
a.ShardsByNode[loc.NodeID]++
// Update shard lists
a.DCToShards[loc.DataCenter] = append(a.DCToShards[loc.DataCenter], loc.ShardID)
a.RackToShards[loc.Rack] = append(a.RackToShards[loc.Rack], loc.ShardID)
a.NodeToShards[loc.NodeID] = append(a.NodeToShards[loc.NodeID], loc.ShardID)
a.TotalShards++
}
// AddNode adds a node to the topology (even if it has no shards)
func (a *TopologyAnalysis) AddNode(node *TopologyNode) {
if _, exists := a.AllNodes[node.NodeID]; exists {
return // Already added
}
a.AllNodes[node.NodeID] = node
a.TotalNodes++
// Update topology structure
if !slices.Contains(a.DCToRacks[node.DataCenter], node.Rack) {
a.DCToRacks[node.DataCenter] = append(a.DCToRacks[node.DataCenter], node.Rack)
}
a.RackToNodes[node.Rack] = append(a.RackToNodes[node.Rack], node)
// Update counts
if _, exists := a.ShardsByDC[node.DataCenter]; !exists {
a.TotalDCs++
}
if _, exists := a.ShardsByRack[node.Rack]; !exists {
a.TotalRacks++
}
}
// Finalize computes final statistics after all data is added
func (a *TopologyAnalysis) Finalize() {
// Ensure we have accurate DC and rack counts
dcSet := make(map[string]bool)
rackSet := make(map[string]bool)
for _, node := range a.AllNodes {
dcSet[node.DataCenter] = true
rackSet[node.Rack] = true
}
a.TotalDCs = len(dcSet)
a.TotalRacks = len(rackSet)
a.TotalNodes = len(a.AllNodes)
}
// String returns a summary of the analysis
func (a *TopologyAnalysis) String() string {
return fmt.Sprintf("TopologyAnalysis{shards:%d, nodes:%d, racks:%d, dcs:%d}",
a.TotalShards, a.TotalNodes, a.TotalRacks, a.TotalDCs)
}
// DetailedString returns a detailed multi-line summary
func (a *TopologyAnalysis) DetailedString() string {
s := fmt.Sprintf("Topology Analysis:\n")
s += fmt.Sprintf(" Total Shards: %d\n", a.TotalShards)
s += fmt.Sprintf(" Data Centers: %d\n", a.TotalDCs)
for dc, count := range a.ShardsByDC {
s += fmt.Sprintf(" %s: %d shards\n", dc, count)
}
s += fmt.Sprintf(" Racks: %d\n", a.TotalRacks)
for rack, count := range a.ShardsByRack {
s += fmt.Sprintf(" %s: %d shards\n", rack, count)
}
s += fmt.Sprintf(" Nodes: %d\n", a.TotalNodes)
for nodeID, count := range a.ShardsByNode {
if count > 0 {
s += fmt.Sprintf(" %s: %d shards\n", nodeID, count)
}
}
return s
}
// TopologyExcess represents a topology level (DC/rack/node) with excess shards
type TopologyExcess struct {
ID string // DC/rack/node ID
@@ -150,91 +50,3 @@ type TopologyExcess struct {
Shards []int // Shard IDs at this level
Nodes []*TopologyNode // Nodes at this level (for finding sources)
}
// CalculateDCExcess returns DCs with more shards than the target
func CalculateDCExcess(analysis *TopologyAnalysis, dist *ECDistribution) []TopologyExcess {
var excess []TopologyExcess
for dc, count := range analysis.ShardsByDC {
if count > dist.TargetShardsPerDC {
// Collect nodes in this DC
var nodes []*TopologyNode
for _, rack := range analysis.DCToRacks[dc] {
nodes = append(nodes, analysis.RackToNodes[rack]...)
}
excess = append(excess, TopologyExcess{
ID: dc,
Level: "dc",
Excess: count - dist.TargetShardsPerDC,
Shards: analysis.DCToShards[dc],
Nodes: nodes,
})
}
}
// Sort by excess (most excess first)
slices.SortFunc(excess, func(a, b TopologyExcess) int {
return b.Excess - a.Excess
})
return excess
}
// CalculateRackExcess returns racks with more shards than the target (within a DC)
func CalculateRackExcess(analysis *TopologyAnalysis, dc string, targetPerRack int) []TopologyExcess {
var excess []TopologyExcess
for _, rack := range analysis.DCToRacks[dc] {
count := analysis.ShardsByRack[rack]
if count > targetPerRack {
excess = append(excess, TopologyExcess{
ID: rack,
Level: "rack",
Excess: count - targetPerRack,
Shards: analysis.RackToShards[rack],
Nodes: analysis.RackToNodes[rack],
})
}
}
slices.SortFunc(excess, func(a, b TopologyExcess) int {
return b.Excess - a.Excess
})
return excess
}
// CalculateUnderservedDCs returns DCs that have fewer shards than target
func CalculateUnderservedDCs(analysis *TopologyAnalysis, dist *ECDistribution) []string {
var underserved []string
// Check existing DCs
for dc, count := range analysis.ShardsByDC {
if count < dist.TargetShardsPerDC {
underserved = append(underserved, dc)
}
}
// Check DCs with nodes but no shards
for dc := range analysis.DCToRacks {
if _, exists := analysis.ShardsByDC[dc]; !exists {
underserved = append(underserved, dc)
}
}
return underserved
}
// CalculateUnderservedRacks returns racks that have fewer shards than target
func CalculateUnderservedRacks(analysis *TopologyAnalysis, dc string, targetPerRack int) []string {
var underserved []string
for _, rack := range analysis.DCToRacks[dc] {
count := analysis.ShardsByRack[rack]
if count < targetPerRack {
underserved = append(underserved, rack)
}
}
return underserved
}

View File

@@ -1,12 +1,6 @@
// Package distribution provides EC shard distribution algorithms with configurable EC ratios.
package distribution
import (
"fmt"
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
)
// ECConfig holds erasure coding configuration parameters.
// This replaces hard-coded constants like DataShardsCount=10, ParityShardsCount=4.
type ECConfig struct {
@@ -14,113 +8,6 @@ type ECConfig struct {
ParityShards int // Number of parity shards (e.g., 4)
}
// DefaultECConfig returns the standard 10+4 EC configuration
func DefaultECConfig() ECConfig {
return ECConfig{
DataShards: 10,
ParityShards: 4,
}
}
// NewECConfig creates a new EC configuration with validation
func NewECConfig(dataShards, parityShards int) (ECConfig, error) {
if dataShards <= 0 {
return ECConfig{}, fmt.Errorf("dataShards must be positive, got %d", dataShards)
}
if parityShards <= 0 {
return ECConfig{}, fmt.Errorf("parityShards must be positive, got %d", parityShards)
}
if dataShards+parityShards > 32 {
return ECConfig{}, fmt.Errorf("total shards (%d+%d=%d) exceeds maximum of 32",
dataShards, parityShards, dataShards+parityShards)
}
return ECConfig{
DataShards: dataShards,
ParityShards: parityShards,
}, nil
}
// TotalShards returns the total number of shards (data + parity)
func (c ECConfig) TotalShards() int {
return c.DataShards + c.ParityShards
}
// MaxTolerableLoss returns the maximum number of shards that can be lost
// while still being able to reconstruct the data
func (c ECConfig) MaxTolerableLoss() int {
return c.ParityShards
}
// MinShardsForReconstruction returns the minimum number of shards needed
// to reconstruct the original data
func (c ECConfig) MinShardsForReconstruction() int {
return c.DataShards
}
// String returns a human-readable representation
func (c ECConfig) String() string {
return fmt.Sprintf("%d+%d (total: %d, can lose: %d)",
c.DataShards, c.ParityShards, c.TotalShards(), c.MaxTolerableLoss())
}
// IsDataShard returns true if the shard ID is a data shard (0 to DataShards-1)
func (c ECConfig) IsDataShard(shardID int) bool {
return shardID >= 0 && shardID < c.DataShards
}
// IsParityShard returns true if the shard ID is a parity shard (DataShards to TotalShards-1)
func (c ECConfig) IsParityShard(shardID int) bool {
return shardID >= c.DataShards && shardID < c.TotalShards()
}
// SortShardsDataFirst returns a copy of shards sorted with data shards first.
// This is useful for initial placement where data shards should be spread out first.
func (c ECConfig) SortShardsDataFirst(shards []int) []int {
result := make([]int, len(shards))
copy(result, shards)
// Partition: data shards first, then parity shards
dataIdx := 0
parityIdx := len(result) - 1
sorted := make([]int, len(result))
for _, s := range result {
if c.IsDataShard(s) {
sorted[dataIdx] = s
dataIdx++
} else {
sorted[parityIdx] = s
parityIdx--
}
}
return sorted
}
// SortShardsParityFirst returns a copy of shards sorted with parity shards first.
// This is useful for rebalancing where we prefer to move parity shards.
func (c ECConfig) SortShardsParityFirst(shards []int) []int {
result := make([]int, len(shards))
copy(result, shards)
// Partition: parity shards first, then data shards
parityIdx := 0
dataIdx := len(result) - 1
sorted := make([]int, len(result))
for _, s := range result {
if c.IsParityShard(s) {
sorted[parityIdx] = s
parityIdx++
} else {
sorted[dataIdx] = s
dataIdx--
}
}
return sorted
}
// ReplicationConfig holds the parsed replication policy
type ReplicationConfig struct {
MinDataCenters int // X+1 from XYZ replication (minimum DCs to use)
@@ -130,42 +17,3 @@ type ReplicationConfig struct {
// Original replication string (for logging/debugging)
Original string
}
// NewReplicationConfig creates a ReplicationConfig from a ReplicaPlacement
func NewReplicationConfig(rp *super_block.ReplicaPlacement) ReplicationConfig {
if rp == nil {
return ReplicationConfig{
MinDataCenters: 1,
MinRacksPerDC: 1,
MinNodesPerRack: 1,
Original: "000",
}
}
return ReplicationConfig{
MinDataCenters: rp.DiffDataCenterCount + 1,
MinRacksPerDC: rp.DiffRackCount + 1,
MinNodesPerRack: rp.SameRackCount + 1,
Original: rp.String(),
}
}
// NewReplicationConfigFromString creates a ReplicationConfig from a replication string
func NewReplicationConfigFromString(replication string) (ReplicationConfig, error) {
rp, err := super_block.NewReplicaPlacementFromString(replication)
if err != nil {
return ReplicationConfig{}, err
}
return NewReplicationConfig(rp), nil
}
// TotalPlacementSlots returns the minimum number of unique placement locations
// based on the replication policy
func (r ReplicationConfig) TotalPlacementSlots() int {
return r.MinDataCenters * r.MinRacksPerDC * r.MinNodesPerRack
}
// String returns a human-readable representation
func (r ReplicationConfig) String() string {
return fmt.Sprintf("replication=%s (DCs:%d, Racks/DC:%d, Nodes/Rack:%d)",
r.Original, r.MinDataCenters, r.MinRacksPerDC, r.MinNodesPerRack)
}

View File

@@ -1,9 +1,5 @@
package distribution
import (
"fmt"
)
// ECDistribution represents the target distribution of EC shards
// based on EC configuration and replication policy.
type ECDistribution struct {
@@ -24,137 +20,3 @@ type ECDistribution struct {
MaxShardsPerRack int
MaxShardsPerNode int
}
// CalculateDistribution computes the target EC shard distribution based on
// EC configuration and replication policy.
//
// The algorithm:
// 1. Uses replication policy to determine minimum topology spread
// 2. Calculates target shards per level (evenly distributed)
// 3. Calculates max shards per level (for fault tolerance)
func CalculateDistribution(ec ECConfig, rep ReplicationConfig) *ECDistribution {
totalShards := ec.TotalShards()
// Target distribution (balanced, rounded up to ensure all shards placed)
targetShardsPerDC := ceilDivide(totalShards, rep.MinDataCenters)
targetShardsPerRack := ceilDivide(targetShardsPerDC, rep.MinRacksPerDC)
targetShardsPerNode := ceilDivide(targetShardsPerRack, rep.MinNodesPerRack)
// Maximum limits for fault tolerance
// The key constraint: losing one failure domain shouldn't lose more than parityShards
// So max shards per domain = totalShards - parityShards + tolerance
// We add small tolerance (+2) to allow for imbalanced topologies
faultToleranceLimit := totalShards - ec.ParityShards + 1
maxShardsPerDC := min(faultToleranceLimit, targetShardsPerDC+2)
maxShardsPerRack := min(faultToleranceLimit, targetShardsPerRack+2)
maxShardsPerNode := min(faultToleranceLimit, targetShardsPerNode+2)
return &ECDistribution{
ECConfig: ec,
ReplicationConfig: rep,
TargetShardsPerDC: targetShardsPerDC,
TargetShardsPerRack: targetShardsPerRack,
TargetShardsPerNode: targetShardsPerNode,
MaxShardsPerDC: maxShardsPerDC,
MaxShardsPerRack: maxShardsPerRack,
MaxShardsPerNode: maxShardsPerNode,
}
}
// String returns a human-readable description of the distribution
func (d *ECDistribution) String() string {
return fmt.Sprintf(
"ECDistribution{EC:%s, DCs:%d (target:%d/max:%d), Racks/DC:%d (target:%d/max:%d), Nodes/Rack:%d (target:%d/max:%d)}",
d.ECConfig.String(),
d.ReplicationConfig.MinDataCenters, d.TargetShardsPerDC, d.MaxShardsPerDC,
d.ReplicationConfig.MinRacksPerDC, d.TargetShardsPerRack, d.MaxShardsPerRack,
d.ReplicationConfig.MinNodesPerRack, d.TargetShardsPerNode, d.MaxShardsPerNode,
)
}
// Summary returns a multi-line summary of the distribution plan
func (d *ECDistribution) Summary() string {
summary := fmt.Sprintf("EC Configuration: %s\n", d.ECConfig.String())
summary += fmt.Sprintf("Replication: %s\n", d.ReplicationConfig.String())
summary += fmt.Sprintf("Distribution Plan:\n")
summary += fmt.Sprintf(" Data Centers: %d (target %d shards each, max %d)\n",
d.ReplicationConfig.MinDataCenters, d.TargetShardsPerDC, d.MaxShardsPerDC)
summary += fmt.Sprintf(" Racks per DC: %d (target %d shards each, max %d)\n",
d.ReplicationConfig.MinRacksPerDC, d.TargetShardsPerRack, d.MaxShardsPerRack)
summary += fmt.Sprintf(" Nodes per Rack: %d (target %d shards each, max %d)\n",
d.ReplicationConfig.MinNodesPerRack, d.TargetShardsPerNode, d.MaxShardsPerNode)
return summary
}
// CanSurviveDCFailure returns true if the distribution can survive
// complete loss of one data center
func (d *ECDistribution) CanSurviveDCFailure() bool {
// After losing one DC with max shards, check if remaining shards are enough
remainingAfterDCLoss := d.ECConfig.TotalShards() - d.TargetShardsPerDC
return remainingAfterDCLoss >= d.ECConfig.MinShardsForReconstruction()
}
// CanSurviveRackFailure returns true if the distribution can survive
// complete loss of one rack
func (d *ECDistribution) CanSurviveRackFailure() bool {
remainingAfterRackLoss := d.ECConfig.TotalShards() - d.TargetShardsPerRack
return remainingAfterRackLoss >= d.ECConfig.MinShardsForReconstruction()
}
// MinDCsForDCFaultTolerance calculates the minimum number of DCs needed
// to survive complete DC failure with this EC configuration
func (d *ECDistribution) MinDCsForDCFaultTolerance() int {
// To survive DC failure, max shards per DC = parityShards
maxShardsPerDC := d.ECConfig.MaxTolerableLoss()
if maxShardsPerDC == 0 {
return d.ECConfig.TotalShards() // Would need one DC per shard
}
return ceilDivide(d.ECConfig.TotalShards(), maxShardsPerDC)
}
// FaultToleranceAnalysis returns a detailed analysis of fault tolerance
func (d *ECDistribution) FaultToleranceAnalysis() string {
analysis := fmt.Sprintf("Fault Tolerance Analysis for %s:\n", d.ECConfig.String())
// DC failure
dcSurvive := d.CanSurviveDCFailure()
shardsAfterDC := d.ECConfig.TotalShards() - d.TargetShardsPerDC
analysis += fmt.Sprintf(" DC Failure: %s\n", boolToResult(dcSurvive))
analysis += fmt.Sprintf(" - Losing one DC loses ~%d shards\n", d.TargetShardsPerDC)
analysis += fmt.Sprintf(" - Remaining: %d shards (need %d)\n", shardsAfterDC, d.ECConfig.DataShards)
if !dcSurvive {
analysis += fmt.Sprintf(" - Need at least %d DCs for DC fault tolerance\n", d.MinDCsForDCFaultTolerance())
}
// Rack failure
rackSurvive := d.CanSurviveRackFailure()
shardsAfterRack := d.ECConfig.TotalShards() - d.TargetShardsPerRack
analysis += fmt.Sprintf(" Rack Failure: %s\n", boolToResult(rackSurvive))
analysis += fmt.Sprintf(" - Losing one rack loses ~%d shards\n", d.TargetShardsPerRack)
analysis += fmt.Sprintf(" - Remaining: %d shards (need %d)\n", shardsAfterRack, d.ECConfig.DataShards)
// Node failure (usually survivable)
shardsAfterNode := d.ECConfig.TotalShards() - d.TargetShardsPerNode
nodeSurvive := shardsAfterNode >= d.ECConfig.DataShards
analysis += fmt.Sprintf(" Node Failure: %s\n", boolToResult(nodeSurvive))
analysis += fmt.Sprintf(" - Losing one node loses ~%d shards\n", d.TargetShardsPerNode)
analysis += fmt.Sprintf(" - Remaining: %d shards (need %d)\n", shardsAfterNode, d.ECConfig.DataShards)
return analysis
}
func boolToResult(b bool) string {
if b {
return "SURVIVABLE ✓"
}
return "NOT SURVIVABLE ✗"
}
// ceilDivide performs ceiling division
func ceilDivide(a, b int) int {
if b <= 0 {
return a
}
return (a + b - 1) / b
}

View File

@@ -1,565 +0,0 @@
package distribution
import (
"testing"
)
func TestNewECConfig(t *testing.T) {
tests := []struct {
name string
dataShards int
parityShards int
wantErr bool
}{
{"valid 10+4", 10, 4, false},
{"valid 8+4", 8, 4, false},
{"valid 6+3", 6, 3, false},
{"valid 4+2", 4, 2, false},
{"invalid data=0", 0, 4, true},
{"invalid parity=0", 10, 0, true},
{"invalid total>32", 20, 15, true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
config, err := NewECConfig(tt.dataShards, tt.parityShards)
if (err != nil) != tt.wantErr {
t.Errorf("NewECConfig() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !tt.wantErr {
if config.DataShards != tt.dataShards {
t.Errorf("DataShards = %d, want %d", config.DataShards, tt.dataShards)
}
if config.ParityShards != tt.parityShards {
t.Errorf("ParityShards = %d, want %d", config.ParityShards, tt.parityShards)
}
if config.TotalShards() != tt.dataShards+tt.parityShards {
t.Errorf("TotalShards() = %d, want %d", config.TotalShards(), tt.dataShards+tt.parityShards)
}
}
})
}
}
func TestCalculateDistribution(t *testing.T) {
tests := []struct {
name string
ecConfig ECConfig
replication string
expectedMinDCs int
expectedMinRacksPerDC int
expectedMinNodesPerRack int
expectedTargetPerDC int
expectedTargetPerRack int
expectedTargetPerNode int
}{
{
name: "10+4 with 000",
ecConfig: DefaultECConfig(),
replication: "000",
expectedMinDCs: 1,
expectedMinRacksPerDC: 1,
expectedMinNodesPerRack: 1,
expectedTargetPerDC: 14,
expectedTargetPerRack: 14,
expectedTargetPerNode: 14,
},
{
name: "10+4 with 100",
ecConfig: DefaultECConfig(),
replication: "100",
expectedMinDCs: 2,
expectedMinRacksPerDC: 1,
expectedMinNodesPerRack: 1,
expectedTargetPerDC: 7,
expectedTargetPerRack: 7,
expectedTargetPerNode: 7,
},
{
name: "10+4 with 110",
ecConfig: DefaultECConfig(),
replication: "110",
expectedMinDCs: 2,
expectedMinRacksPerDC: 2,
expectedMinNodesPerRack: 1,
expectedTargetPerDC: 7,
expectedTargetPerRack: 4,
expectedTargetPerNode: 4,
},
{
name: "10+4 with 200",
ecConfig: DefaultECConfig(),
replication: "200",
expectedMinDCs: 3,
expectedMinRacksPerDC: 1,
expectedMinNodesPerRack: 1,
expectedTargetPerDC: 5,
expectedTargetPerRack: 5,
expectedTargetPerNode: 5,
},
{
name: "8+4 with 110",
ecConfig: ECConfig{
DataShards: 8,
ParityShards: 4,
},
replication: "110",
expectedMinDCs: 2,
expectedMinRacksPerDC: 2,
expectedMinNodesPerRack: 1,
expectedTargetPerDC: 6, // 12/2 = 6
expectedTargetPerRack: 3, // 6/2 = 3
expectedTargetPerNode: 3,
},
{
name: "6+3 with 100",
ecConfig: ECConfig{
DataShards: 6,
ParityShards: 3,
},
replication: "100",
expectedMinDCs: 2,
expectedMinRacksPerDC: 1,
expectedMinNodesPerRack: 1,
expectedTargetPerDC: 5, // ceil(9/2) = 5
expectedTargetPerRack: 5,
expectedTargetPerNode: 5,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rep, err := NewReplicationConfigFromString(tt.replication)
if err != nil {
t.Fatalf("Failed to parse replication %s: %v", tt.replication, err)
}
dist := CalculateDistribution(tt.ecConfig, rep)
if dist.ReplicationConfig.MinDataCenters != tt.expectedMinDCs {
t.Errorf("MinDataCenters = %d, want %d", dist.ReplicationConfig.MinDataCenters, tt.expectedMinDCs)
}
if dist.ReplicationConfig.MinRacksPerDC != tt.expectedMinRacksPerDC {
t.Errorf("MinRacksPerDC = %d, want %d", dist.ReplicationConfig.MinRacksPerDC, tt.expectedMinRacksPerDC)
}
if dist.ReplicationConfig.MinNodesPerRack != tt.expectedMinNodesPerRack {
t.Errorf("MinNodesPerRack = %d, want %d", dist.ReplicationConfig.MinNodesPerRack, tt.expectedMinNodesPerRack)
}
if dist.TargetShardsPerDC != tt.expectedTargetPerDC {
t.Errorf("TargetShardsPerDC = %d, want %d", dist.TargetShardsPerDC, tt.expectedTargetPerDC)
}
if dist.TargetShardsPerRack != tt.expectedTargetPerRack {
t.Errorf("TargetShardsPerRack = %d, want %d", dist.TargetShardsPerRack, tt.expectedTargetPerRack)
}
if dist.TargetShardsPerNode != tt.expectedTargetPerNode {
t.Errorf("TargetShardsPerNode = %d, want %d", dist.TargetShardsPerNode, tt.expectedTargetPerNode)
}
t.Logf("Distribution for %s: %s", tt.name, dist.String())
})
}
}
func TestFaultToleranceAnalysis(t *testing.T) {
tests := []struct {
name string
ecConfig ECConfig
replication string
canSurviveDC bool
canSurviveRack bool
}{
// 10+4 = 14 shards, need 10 to reconstruct, can lose 4
{"10+4 000", DefaultECConfig(), "000", false, false}, // All in one, any failure is fatal
{"10+4 100", DefaultECConfig(), "100", false, false}, // 7 per DC/rack, 7 remaining < 10
{"10+4 200", DefaultECConfig(), "200", false, false}, // 5 per DC/rack, 9 remaining < 10
{"10+4 110", DefaultECConfig(), "110", false, true}, // 4 per rack, 10 remaining = enough for rack
// 8+4 = 12 shards, need 8 to reconstruct, can lose 4
{"8+4 100", ECConfig{8, 4}, "100", false, false}, // 6 per DC/rack, 6 remaining < 8
{"8+4 200", ECConfig{8, 4}, "200", true, true}, // 4 per DC/rack, 8 remaining = enough!
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rep, _ := NewReplicationConfigFromString(tt.replication)
dist := CalculateDistribution(tt.ecConfig, rep)
if dist.CanSurviveDCFailure() != tt.canSurviveDC {
t.Errorf("CanSurviveDCFailure() = %v, want %v", dist.CanSurviveDCFailure(), tt.canSurviveDC)
}
if dist.CanSurviveRackFailure() != tt.canSurviveRack {
t.Errorf("CanSurviveRackFailure() = %v, want %v", dist.CanSurviveRackFailure(), tt.canSurviveRack)
}
t.Log(dist.FaultToleranceAnalysis())
})
}
}
func TestMinDCsForDCFaultTolerance(t *testing.T) {
tests := []struct {
name string
ecConfig ECConfig
minDCs int
}{
// 10+4: can lose 4, so max 4 per DC, 14/4 = 4 DCs needed
{"10+4", DefaultECConfig(), 4},
// 8+4: can lose 4, so max 4 per DC, 12/4 = 3 DCs needed
{"8+4", ECConfig{8, 4}, 3},
// 6+3: can lose 3, so max 3 per DC, 9/3 = 3 DCs needed
{"6+3", ECConfig{6, 3}, 3},
// 4+2: can lose 2, so max 2 per DC, 6/2 = 3 DCs needed
{"4+2", ECConfig{4, 2}, 3},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rep, _ := NewReplicationConfigFromString("000")
dist := CalculateDistribution(tt.ecConfig, rep)
if dist.MinDCsForDCFaultTolerance() != tt.minDCs {
t.Errorf("MinDCsForDCFaultTolerance() = %d, want %d",
dist.MinDCsForDCFaultTolerance(), tt.minDCs)
}
t.Logf("%s: needs %d DCs for DC fault tolerance", tt.name, dist.MinDCsForDCFaultTolerance())
})
}
}
func TestTopologyAnalysis(t *testing.T) {
analysis := NewTopologyAnalysis()
// Add nodes to topology
node1 := &TopologyNode{
NodeID: "node1",
DataCenter: "dc1",
Rack: "rack1",
FreeSlots: 5,
}
node2 := &TopologyNode{
NodeID: "node2",
DataCenter: "dc1",
Rack: "rack2",
FreeSlots: 10,
}
node3 := &TopologyNode{
NodeID: "node3",
DataCenter: "dc2",
Rack: "rack3",
FreeSlots: 10,
}
analysis.AddNode(node1)
analysis.AddNode(node2)
analysis.AddNode(node3)
// Add shard locations (all on node1)
for i := 0; i < 14; i++ {
analysis.AddShardLocation(ShardLocation{
ShardID: i,
NodeID: "node1",
DataCenter: "dc1",
Rack: "rack1",
})
}
analysis.Finalize()
// Verify counts
if analysis.TotalShards != 14 {
t.Errorf("TotalShards = %d, want 14", analysis.TotalShards)
}
if analysis.ShardsByDC["dc1"] != 14 {
t.Errorf("ShardsByDC[dc1] = %d, want 14", analysis.ShardsByDC["dc1"])
}
if analysis.ShardsByRack["rack1"] != 14 {
t.Errorf("ShardsByRack[rack1] = %d, want 14", analysis.ShardsByRack["rack1"])
}
if analysis.ShardsByNode["node1"] != 14 {
t.Errorf("ShardsByNode[node1] = %d, want 14", analysis.ShardsByNode["node1"])
}
t.Log(analysis.DetailedString())
}
func TestRebalancer(t *testing.T) {
// Build topology: 2 DCs, 2 racks each, all shards on one node
analysis := NewTopologyAnalysis()
// Add nodes
nodes := []*TopologyNode{
{NodeID: "dc1-rack1-node1", DataCenter: "dc1", Rack: "dc1-rack1", FreeSlots: 0},
{NodeID: "dc1-rack2-node1", DataCenter: "dc1", Rack: "dc1-rack2", FreeSlots: 10},
{NodeID: "dc2-rack1-node1", DataCenter: "dc2", Rack: "dc2-rack1", FreeSlots: 10},
{NodeID: "dc2-rack2-node1", DataCenter: "dc2", Rack: "dc2-rack2", FreeSlots: 10},
}
for _, node := range nodes {
analysis.AddNode(node)
}
// Add all 14 shards to first node
for i := 0; i < 14; i++ {
analysis.AddShardLocation(ShardLocation{
ShardID: i,
NodeID: "dc1-rack1-node1",
DataCenter: "dc1",
Rack: "dc1-rack1",
})
}
analysis.Finalize()
// Create rebalancer with 110 replication (2 DCs, 2 racks each)
ec := DefaultECConfig()
rep, _ := NewReplicationConfigFromString("110")
rebalancer := NewRebalancer(ec, rep)
plan, err := rebalancer.PlanRebalance(analysis)
if err != nil {
t.Fatalf("PlanRebalance failed: %v", err)
}
t.Logf("Planned %d moves", plan.TotalMoves)
t.Log(plan.DetailedString())
// Verify we're moving shards to dc2
movedToDC2 := 0
for _, move := range plan.Moves {
if move.DestNode.DataCenter == "dc2" {
movedToDC2++
}
}
if movedToDC2 == 0 {
t.Error("Expected some moves to dc2")
}
// With "110" replication, target is 7 shards per DC
// Starting with 14 in dc1, should plan to move 7 to dc2
if plan.MovesAcrossDC < 7 {
t.Errorf("Expected at least 7 cross-DC moves for 110 replication, got %d", plan.MovesAcrossDC)
}
}
func TestCustomECRatios(t *testing.T) {
// Test various custom EC ratios that seaweed-enterprise might use
ratios := []struct {
name string
data int
parity int
}{
{"4+2", 4, 2},
{"6+3", 6, 3},
{"8+2", 8, 2},
{"8+4", 8, 4},
{"10+4", 10, 4},
{"12+4", 12, 4},
{"16+4", 16, 4},
}
for _, ratio := range ratios {
t.Run(ratio.name, func(t *testing.T) {
ec, err := NewECConfig(ratio.data, ratio.parity)
if err != nil {
t.Fatalf("Failed to create EC config: %v", err)
}
rep, _ := NewReplicationConfigFromString("110")
dist := CalculateDistribution(ec, rep)
t.Logf("EC %s with replication 110:", ratio.name)
t.Logf(" Total shards: %d", ec.TotalShards())
t.Logf(" Can lose: %d shards", ec.MaxTolerableLoss())
t.Logf(" Target per DC: %d", dist.TargetShardsPerDC)
t.Logf(" Target per rack: %d", dist.TargetShardsPerRack)
t.Logf(" Min DCs for DC fault tolerance: %d", dist.MinDCsForDCFaultTolerance())
// Verify basic sanity
if dist.TargetShardsPerDC*2 < ec.TotalShards() {
t.Errorf("Target per DC (%d) * 2 should be >= total (%d)",
dist.TargetShardsPerDC, ec.TotalShards())
}
})
}
}
func TestShardClassification(t *testing.T) {
ec := DefaultECConfig() // 10+4
// Test IsDataShard
for i := 0; i < 10; i++ {
if !ec.IsDataShard(i) {
t.Errorf("Shard %d should be a data shard", i)
}
if ec.IsParityShard(i) {
t.Errorf("Shard %d should not be a parity shard", i)
}
}
// Test IsParityShard
for i := 10; i < 14; i++ {
if ec.IsDataShard(i) {
t.Errorf("Shard %d should not be a data shard", i)
}
if !ec.IsParityShard(i) {
t.Errorf("Shard %d should be a parity shard", i)
}
}
// Test with custom 8+4 EC
ec84, _ := NewECConfig(8, 4)
for i := 0; i < 8; i++ {
if !ec84.IsDataShard(i) {
t.Errorf("8+4 EC: Shard %d should be a data shard", i)
}
}
for i := 8; i < 12; i++ {
if !ec84.IsParityShard(i) {
t.Errorf("8+4 EC: Shard %d should be a parity shard", i)
}
}
}
func TestSortShardsDataFirst(t *testing.T) {
ec := DefaultECConfig() // 10+4
// Mixed shards: [0, 10, 5, 11, 2, 12, 7, 13]
shards := []int{0, 10, 5, 11, 2, 12, 7, 13}
sorted := ec.SortShardsDataFirst(shards)
t.Logf("Original: %v", shards)
t.Logf("Sorted (data first): %v", sorted)
// First 4 should be data shards (0, 5, 2, 7)
for i := 0; i < 4; i++ {
if !ec.IsDataShard(sorted[i]) {
t.Errorf("Position %d should be a data shard, got %d", i, sorted[i])
}
}
// Last 4 should be parity shards (10, 11, 12, 13)
for i := 4; i < 8; i++ {
if !ec.IsParityShard(sorted[i]) {
t.Errorf("Position %d should be a parity shard, got %d", i, sorted[i])
}
}
}
func TestSortShardsParityFirst(t *testing.T) {
ec := DefaultECConfig() // 10+4
// Mixed shards: [0, 10, 5, 11, 2, 12, 7, 13]
shards := []int{0, 10, 5, 11, 2, 12, 7, 13}
sorted := ec.SortShardsParityFirst(shards)
t.Logf("Original: %v", shards)
t.Logf("Sorted (parity first): %v", sorted)
// First 4 should be parity shards (10, 11, 12, 13)
for i := 0; i < 4; i++ {
if !ec.IsParityShard(sorted[i]) {
t.Errorf("Position %d should be a parity shard, got %d", i, sorted[i])
}
}
// Last 4 should be data shards (0, 5, 2, 7)
for i := 4; i < 8; i++ {
if !ec.IsDataShard(sorted[i]) {
t.Errorf("Position %d should be a data shard, got %d", i, sorted[i])
}
}
}
func TestRebalancerPrefersMovingParityShards(t *testing.T) {
// Build topology where one node has all shards including mix of data and parity
analysis := NewTopologyAnalysis()
// Node 1: Has all 14 shards (mixed data and parity)
node1 := &TopologyNode{
NodeID: "node1",
DataCenter: "dc1",
Rack: "rack1",
FreeSlots: 0,
}
analysis.AddNode(node1)
// Node 2: Empty, ready to receive
node2 := &TopologyNode{
NodeID: "node2",
DataCenter: "dc1",
Rack: "rack1",
FreeSlots: 10,
}
analysis.AddNode(node2)
// Add all 14 shards to node1
for i := 0; i < 14; i++ {
analysis.AddShardLocation(ShardLocation{
ShardID: i,
NodeID: "node1",
DataCenter: "dc1",
Rack: "rack1",
})
}
analysis.Finalize()
// Create rebalancer
ec := DefaultECConfig()
rep, _ := NewReplicationConfigFromString("000")
rebalancer := NewRebalancer(ec, rep)
plan, err := rebalancer.PlanRebalance(analysis)
if err != nil {
t.Fatalf("PlanRebalance failed: %v", err)
}
t.Logf("Planned %d moves", len(plan.Moves))
// Check that parity shards are moved first
parityMovesFirst := 0
dataMovesFirst := 0
seenDataMove := false
for _, move := range plan.Moves {
isParity := ec.IsParityShard(move.ShardID)
t.Logf("Move shard %d (parity=%v): %s -> %s",
move.ShardID, isParity, move.SourceNode.NodeID, move.DestNode.NodeID)
if isParity && !seenDataMove {
parityMovesFirst++
} else if !isParity {
seenDataMove = true
dataMovesFirst++
}
}
t.Logf("Parity moves before first data move: %d", parityMovesFirst)
t.Logf("Data moves: %d", dataMovesFirst)
// With 10+4 EC, there are 4 parity shards
// They should be moved before data shards when possible
if parityMovesFirst < 4 && len(plan.Moves) >= 4 {
t.Logf("Note: Expected parity shards to be moved first, but got %d parity moves before data moves", parityMovesFirst)
}
}
func TestDistributionSummary(t *testing.T) {
ec := DefaultECConfig()
rep, _ := NewReplicationConfigFromString("110")
dist := CalculateDistribution(ec, rep)
summary := dist.Summary()
t.Log(summary)
if len(summary) == 0 {
t.Error("Summary should not be empty")
}
analysis := dist.FaultToleranceAnalysis()
t.Log(analysis)
if len(analysis) == 0 {
t.Error("Fault tolerance analysis should not be empty")
}
}

View File

@@ -1,10 +1,5 @@
package distribution
import (
"fmt"
"slices"
)
// ShardMove represents a planned shard move
type ShardMove struct {
ShardID int
@@ -13,12 +8,6 @@ type ShardMove struct {
Reason string
}
// String returns a human-readable description of the move
func (m ShardMove) String() string {
return fmt.Sprintf("shard %d: %s -> %s (%s)",
m.ShardID, m.SourceNode.NodeID, m.DestNode.NodeID, m.Reason)
}
// RebalancePlan contains the complete plan for rebalancing EC shards
type RebalancePlan struct {
Moves []ShardMove
@@ -32,346 +21,8 @@ type RebalancePlan struct {
MovesWithinRack int
}
// String returns a summary of the plan
func (p *RebalancePlan) String() string {
return fmt.Sprintf("RebalancePlan{moves:%d, acrossDC:%d, acrossRack:%d, withinRack:%d}",
p.TotalMoves, p.MovesAcrossDC, p.MovesAcrossRack, p.MovesWithinRack)
}
// DetailedString returns a detailed multi-line summary
func (p *RebalancePlan) DetailedString() string {
s := fmt.Sprintf("Rebalance Plan:\n")
s += fmt.Sprintf(" Total Moves: %d\n", p.TotalMoves)
s += fmt.Sprintf(" Across DC: %d\n", p.MovesAcrossDC)
s += fmt.Sprintf(" Across Rack: %d\n", p.MovesAcrossRack)
s += fmt.Sprintf(" Within Rack: %d\n", p.MovesWithinRack)
s += fmt.Sprintf("\nMoves:\n")
for i, move := range p.Moves {
s += fmt.Sprintf(" %d. %s\n", i+1, move.String())
}
return s
}
// Rebalancer plans shard moves to achieve proportional distribution
type Rebalancer struct {
ecConfig ECConfig
repConfig ReplicationConfig
}
// NewRebalancer creates a new rebalancer with the given configuration
func NewRebalancer(ec ECConfig, rep ReplicationConfig) *Rebalancer {
return &Rebalancer{
ecConfig: ec,
repConfig: rep,
}
}
// PlanRebalance creates a rebalancing plan based on current topology analysis
func (r *Rebalancer) PlanRebalance(analysis *TopologyAnalysis) (*RebalancePlan, error) {
dist := CalculateDistribution(r.ecConfig, r.repConfig)
plan := &RebalancePlan{
Distribution: dist,
Analysis: analysis,
}
// Step 1: Balance across data centers
dcMoves := r.planDCMoves(analysis, dist)
for _, move := range dcMoves {
plan.Moves = append(plan.Moves, move)
plan.MovesAcrossDC++
}
// Update analysis after DC moves (for planning purposes)
r.applyMovesToAnalysis(analysis, dcMoves)
// Step 2: Balance across racks within each DC
rackMoves := r.planRackMoves(analysis, dist)
for _, move := range rackMoves {
plan.Moves = append(plan.Moves, move)
plan.MovesAcrossRack++
}
// Update analysis after rack moves
r.applyMovesToAnalysis(analysis, rackMoves)
// Step 3: Balance across nodes within each rack
nodeMoves := r.planNodeMoves(analysis, dist)
for _, move := range nodeMoves {
plan.Moves = append(plan.Moves, move)
plan.MovesWithinRack++
}
plan.TotalMoves = len(plan.Moves)
return plan, nil
}
// planDCMoves plans moves to balance shards across data centers
func (r *Rebalancer) planDCMoves(analysis *TopologyAnalysis, dist *ECDistribution) []ShardMove {
var moves []ShardMove
overDCs := CalculateDCExcess(analysis, dist)
underDCs := CalculateUnderservedDCs(analysis, dist)
underIdx := 0
for _, over := range overDCs {
for over.Excess > 0 && underIdx < len(underDCs) {
destDC := underDCs[underIdx]
// Find a shard and source node
shardID, srcNode := r.pickShardToMove(analysis, over.Nodes)
if srcNode == nil {
break
}
// Find destination node in target DC
destNode := r.pickBestDestination(analysis, destDC, "", dist)
if destNode == nil {
underIdx++
continue
}
moves = append(moves, ShardMove{
ShardID: shardID,
SourceNode: srcNode,
DestNode: destNode,
Reason: fmt.Sprintf("balance DC: %s -> %s", srcNode.DataCenter, destDC),
})
over.Excess--
analysis.ShardsByDC[srcNode.DataCenter]--
analysis.ShardsByDC[destDC]++
// Check if destDC reached target
if analysis.ShardsByDC[destDC] >= dist.TargetShardsPerDC {
underIdx++
}
}
}
return moves
}
// planRackMoves plans moves to balance shards across racks within each DC
func (r *Rebalancer) planRackMoves(analysis *TopologyAnalysis, dist *ECDistribution) []ShardMove {
var moves []ShardMove
for dc := range analysis.DCToRacks {
dcShards := analysis.ShardsByDC[dc]
numRacks := len(analysis.DCToRacks[dc])
if numRacks == 0 {
continue
}
targetPerRack := ceilDivide(dcShards, max(numRacks, dist.ReplicationConfig.MinRacksPerDC))
overRacks := CalculateRackExcess(analysis, dc, targetPerRack)
underRacks := CalculateUnderservedRacks(analysis, dc, targetPerRack)
underIdx := 0
for _, over := range overRacks {
for over.Excess > 0 && underIdx < len(underRacks) {
destRack := underRacks[underIdx]
// Find shard and source node
shardID, srcNode := r.pickShardToMove(analysis, over.Nodes)
if srcNode == nil {
break
}
// Find destination node in target rack
destNode := r.pickBestDestination(analysis, dc, destRack, dist)
if destNode == nil {
underIdx++
continue
}
moves = append(moves, ShardMove{
ShardID: shardID,
SourceNode: srcNode,
DestNode: destNode,
Reason: fmt.Sprintf("balance rack: %s -> %s", srcNode.Rack, destRack),
})
over.Excess--
analysis.ShardsByRack[srcNode.Rack]--
analysis.ShardsByRack[destRack]++
if analysis.ShardsByRack[destRack] >= targetPerRack {
underIdx++
}
}
}
}
return moves
}
// planNodeMoves plans moves to balance shards across nodes within each rack
func (r *Rebalancer) planNodeMoves(analysis *TopologyAnalysis, dist *ECDistribution) []ShardMove {
var moves []ShardMove
for rack, nodes := range analysis.RackToNodes {
if len(nodes) <= 1 {
continue
}
rackShards := analysis.ShardsByRack[rack]
targetPerNode := ceilDivide(rackShards, max(len(nodes), dist.ReplicationConfig.MinNodesPerRack))
// Find over and under nodes
var overNodes []*TopologyNode
var underNodes []*TopologyNode
for _, node := range nodes {
count := analysis.ShardsByNode[node.NodeID]
if count > targetPerNode {
overNodes = append(overNodes, node)
} else if count < targetPerNode {
underNodes = append(underNodes, node)
}
}
// Sort by excess/deficit
slices.SortFunc(overNodes, func(a, b *TopologyNode) int {
return analysis.ShardsByNode[b.NodeID] - analysis.ShardsByNode[a.NodeID]
})
underIdx := 0
for _, srcNode := range overNodes {
excess := analysis.ShardsByNode[srcNode.NodeID] - targetPerNode
for excess > 0 && underIdx < len(underNodes) {
destNode := underNodes[underIdx]
// Pick a shard from this node, preferring parity shards
shards := analysis.NodeToShards[srcNode.NodeID]
if len(shards) == 0 {
break
}
// Find a parity shard first, fallback to data shard
shardID := -1
shardIdx := -1
for i, s := range shards {
if r.ecConfig.IsParityShard(s) {
shardID = s
shardIdx = i
break
}
}
if shardID == -1 {
shardID = shards[0]
shardIdx = 0
}
moves = append(moves, ShardMove{
ShardID: shardID,
SourceNode: srcNode,
DestNode: destNode,
Reason: fmt.Sprintf("balance node: %s -> %s", srcNode.NodeID, destNode.NodeID),
})
excess--
analysis.ShardsByNode[srcNode.NodeID]--
analysis.ShardsByNode[destNode.NodeID]++
// Update shard lists - remove the specific shard we picked
analysis.NodeToShards[srcNode.NodeID] = append(
shards[:shardIdx], shards[shardIdx+1:]...)
analysis.NodeToShards[destNode.NodeID] = append(
analysis.NodeToShards[destNode.NodeID], shardID)
if analysis.ShardsByNode[destNode.NodeID] >= targetPerNode {
underIdx++
}
}
}
}
return moves
}
// pickShardToMove selects a shard and its node from the given nodes.
// It prefers to move parity shards first, keeping data shards spread out
// since data shards serve read requests while parity shards are only for reconstruction.
func (r *Rebalancer) pickShardToMove(analysis *TopologyAnalysis, nodes []*TopologyNode) (int, *TopologyNode) {
// Sort by shard count (most shards first)
slices.SortFunc(nodes, func(a, b *TopologyNode) int {
return analysis.ShardsByNode[b.NodeID] - analysis.ShardsByNode[a.NodeID]
})
// First pass: try to find a parity shard to move (prefer moving parity)
for _, node := range nodes {
shards := analysis.NodeToShards[node.NodeID]
for _, shardID := range shards {
if r.ecConfig.IsParityShard(shardID) {
return shardID, node
}
}
}
// Second pass: if no parity shards, move a data shard
for _, node := range nodes {
shards := analysis.NodeToShards[node.NodeID]
if len(shards) > 0 {
return shards[0], node
}
}
return -1, nil
}
// pickBestDestination selects the best destination node
func (r *Rebalancer) pickBestDestination(analysis *TopologyAnalysis, targetDC, targetRack string, dist *ECDistribution) *TopologyNode {
var candidates []*TopologyNode
// Collect candidates
for _, node := range analysis.AllNodes {
// Filter by DC if specified
if targetDC != "" && node.DataCenter != targetDC {
continue
}
// Filter by rack if specified
if targetRack != "" && node.Rack != targetRack {
continue
}
// Check capacity
if node.FreeSlots <= 0 {
continue
}
// Check max shards limit
if analysis.ShardsByNode[node.NodeID] >= dist.MaxShardsPerNode {
continue
}
candidates = append(candidates, node)
}
if len(candidates) == 0 {
return nil
}
// Sort by: 1) fewer shards, 2) more free slots
slices.SortFunc(candidates, func(a, b *TopologyNode) int {
aShards := analysis.ShardsByNode[a.NodeID]
bShards := analysis.ShardsByNode[b.NodeID]
if aShards != bShards {
return aShards - bShards
}
return b.FreeSlots - a.FreeSlots
})
return candidates[0]
}
// applyMovesToAnalysis is a no-op placeholder for potential future use.
// Note: All planners (planDCMoves, planRackMoves, planNodeMoves) update
// their respective counts (ShardsByDC, ShardsByRack, ShardsByNode) and
// shard lists (NodeToShards) inline during planning. This avoids duplicate
// updates that would occur if we also updated counts here.
func (r *Rebalancer) applyMovesToAnalysis(analysis *TopologyAnalysis, moves []ShardMove) {
// Counts are already updated by the individual planners.
// This function is kept for API compatibility and potential future use.
}

View File

@@ -53,19 +53,6 @@ func NewShardsInfo() *ShardsInfo {
}
}
// Initializes a ShardsInfo from a ECVolume.
func ShardsInfoFromVolume(ev *EcVolume) *ShardsInfo {
res := &ShardsInfo{
shards: make([]ShardInfo, len(ev.Shards)),
}
// Build shards directly to avoid locking in Set() since res is not yet shared
for i, s := range ev.Shards {
res.shards[i] = NewShardInfo(s.ShardId, ShardSize(s.Size()))
res.shardBits = res.shardBits.Set(s.ShardId)
}
return res
}
// Initializes a ShardsInfo from a VolumeEcShardInformationMessage proto.
func ShardsInfoFromVolumeEcShardInformationMessage(vi *master_pb.VolumeEcShardInformationMessage) *ShardsInfo {
res := NewShardsInfo()

View File

@@ -64,18 +64,6 @@ type PlacementRequest struct {
PreferDifferentRacks bool
}
// DefaultPlacementRequest returns the default placement configuration
func DefaultPlacementRequest() PlacementRequest {
return PlacementRequest{
ShardsNeeded: 14,
MaxShardsPerServer: 0,
MaxShardsPerRack: 0,
MaxTaskLoad: 5,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
}
// PlacementResult contains the selected destinations for EC shards
type PlacementResult struct {
SelectedDisks []*DiskCandidate
@@ -270,15 +258,6 @@ func groupDisksByRack(disks []*DiskCandidate) map[string][]*DiskCandidate {
return result
}
// groupDisksByServer groups disks by their server
func groupDisksByServer(disks []*DiskCandidate) map[string][]*DiskCandidate {
result := make(map[string][]*DiskCandidate)
for _, disk := range disks {
result[disk.NodeID] = append(result[disk.NodeID], disk)
}
return result
}
// getRackKey returns the unique key for a rack (dc:rack)
func getRackKey(disk *DiskCandidate) string {
return fmt.Sprintf("%s:%s", disk.DataCenter, disk.Rack)
@@ -393,28 +372,3 @@ func addDiskToResult(result *PlacementResult, disk *DiskCandidate,
result.ShardsPerRack[rackKey]++
result.ShardsPerDC[disk.DataCenter]++
}
// VerifySpread checks if the placement result meets diversity requirements
func VerifySpread(result *PlacementResult, minServers, minRacks int) error {
if result.ServersUsed < minServers {
return fmt.Errorf("only %d servers used, need at least %d", result.ServersUsed, minServers)
}
if result.RacksUsed < minRacks {
return fmt.Errorf("only %d racks used, need at least %d", result.RacksUsed, minRacks)
}
return nil
}
// CalculateIdealDistribution returns the ideal number of shards per server
// when we have a certain number of shards and servers
func CalculateIdealDistribution(totalShards, numServers int) (min, max int) {
if numServers <= 0 {
return 0, totalShards
}
min = totalShards / numServers
max = min
if totalShards%numServers != 0 {
max = min + 1
}
return
}

View File

@@ -1,517 +0,0 @@
package placement
import (
"testing"
)
// Helper function to create disk candidates for testing
func makeDisk(nodeID string, diskID uint32, dc, rack string, freeSlots int) *DiskCandidate {
return &DiskCandidate{
NodeID: nodeID,
DiskID: diskID,
DataCenter: dc,
Rack: rack,
VolumeCount: 0,
MaxVolumeCount: 100,
ShardCount: 0,
FreeSlots: freeSlots,
LoadCount: 0,
}
}
func TestSelectDestinations_SingleRack(t *testing.T) {
// Test: 3 servers in same rack, each with 2 disks, need 6 shards
// Expected: Should spread across all 6 disks (one per disk)
disks := []*DiskCandidate{
makeDisk("server1", 0, "dc1", "rack1", 10),
makeDisk("server1", 1, "dc1", "rack1", 10),
makeDisk("server2", 0, "dc1", "rack1", 10),
makeDisk("server2", 1, "dc1", "rack1", 10),
makeDisk("server3", 0, "dc1", "rack1", 10),
makeDisk("server3", 1, "dc1", "rack1", 10),
}
config := PlacementRequest{
ShardsNeeded: 6,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
result, err := SelectDestinations(disks, config)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.SelectedDisks) != 6 {
t.Errorf("expected 6 selected disks, got %d", len(result.SelectedDisks))
}
// Verify all 3 servers are used
if result.ServersUsed != 3 {
t.Errorf("expected 3 servers used, got %d", result.ServersUsed)
}
// Verify each disk is unique
diskSet := make(map[string]bool)
for _, disk := range result.SelectedDisks {
key := getDiskKey(disk)
if diskSet[key] {
t.Errorf("disk %s selected multiple times", key)
}
diskSet[key] = true
}
}
func TestSelectDestinations_MultipleRacks(t *testing.T) {
// Test: 2 racks with 2 servers each, each server has 2 disks
// Need 8 shards
// Expected: Should spread across all 8 disks
disks := []*DiskCandidate{
makeDisk("server1", 0, "dc1", "rack1", 10),
makeDisk("server1", 1, "dc1", "rack1", 10),
makeDisk("server2", 0, "dc1", "rack1", 10),
makeDisk("server2", 1, "dc1", "rack1", 10),
makeDisk("server3", 0, "dc1", "rack2", 10),
makeDisk("server3", 1, "dc1", "rack2", 10),
makeDisk("server4", 0, "dc1", "rack2", 10),
makeDisk("server4", 1, "dc1", "rack2", 10),
}
config := PlacementRequest{
ShardsNeeded: 8,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
result, err := SelectDestinations(disks, config)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.SelectedDisks) != 8 {
t.Errorf("expected 8 selected disks, got %d", len(result.SelectedDisks))
}
// Verify all 4 servers are used
if result.ServersUsed != 4 {
t.Errorf("expected 4 servers used, got %d", result.ServersUsed)
}
// Verify both racks are used
if result.RacksUsed != 2 {
t.Errorf("expected 2 racks used, got %d", result.RacksUsed)
}
}
func TestSelectDestinations_PrefersDifferentServers(t *testing.T) {
// Test: 4 servers with 4 disks each, need 4 shards
// Expected: Should use one disk from each server
disks := []*DiskCandidate{
makeDisk("server1", 0, "dc1", "rack1", 10),
makeDisk("server1", 1, "dc1", "rack1", 10),
makeDisk("server1", 2, "dc1", "rack1", 10),
makeDisk("server1", 3, "dc1", "rack1", 10),
makeDisk("server2", 0, "dc1", "rack1", 10),
makeDisk("server2", 1, "dc1", "rack1", 10),
makeDisk("server2", 2, "dc1", "rack1", 10),
makeDisk("server2", 3, "dc1", "rack1", 10),
makeDisk("server3", 0, "dc1", "rack1", 10),
makeDisk("server3", 1, "dc1", "rack1", 10),
makeDisk("server3", 2, "dc1", "rack1", 10),
makeDisk("server3", 3, "dc1", "rack1", 10),
makeDisk("server4", 0, "dc1", "rack1", 10),
makeDisk("server4", 1, "dc1", "rack1", 10),
makeDisk("server4", 2, "dc1", "rack1", 10),
makeDisk("server4", 3, "dc1", "rack1", 10),
}
config := PlacementRequest{
ShardsNeeded: 4,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
result, err := SelectDestinations(disks, config)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.SelectedDisks) != 4 {
t.Errorf("expected 4 selected disks, got %d", len(result.SelectedDisks))
}
// Verify all 4 servers are used (one shard per server)
if result.ServersUsed != 4 {
t.Errorf("expected 4 servers used, got %d", result.ServersUsed)
}
// Each server should have exactly 1 shard
for server, count := range result.ShardsPerServer {
if count != 1 {
t.Errorf("server %s has %d shards, expected 1", server, count)
}
}
}
func TestSelectDestinations_SpilloverToMultipleDisksPerServer(t *testing.T) {
// Test: 2 servers with 4 disks each, need 6 shards
// Expected: First pick one from each server (2 shards), then one more from each (4 shards),
// then fill remaining from any server (6 shards)
disks := []*DiskCandidate{
makeDisk("server1", 0, "dc1", "rack1", 10),
makeDisk("server1", 1, "dc1", "rack1", 10),
makeDisk("server1", 2, "dc1", "rack1", 10),
makeDisk("server1", 3, "dc1", "rack1", 10),
makeDisk("server2", 0, "dc1", "rack1", 10),
makeDisk("server2", 1, "dc1", "rack1", 10),
makeDisk("server2", 2, "dc1", "rack1", 10),
makeDisk("server2", 3, "dc1", "rack1", 10),
}
config := PlacementRequest{
ShardsNeeded: 6,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
result, err := SelectDestinations(disks, config)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.SelectedDisks) != 6 {
t.Errorf("expected 6 selected disks, got %d", len(result.SelectedDisks))
}
// Both servers should be used
if result.ServersUsed != 2 {
t.Errorf("expected 2 servers used, got %d", result.ServersUsed)
}
// Each server should have exactly 3 shards (balanced)
for server, count := range result.ShardsPerServer {
if count != 3 {
t.Errorf("server %s has %d shards, expected 3", server, count)
}
}
}
func TestSelectDestinations_MaxShardsPerServer(t *testing.T) {
// Test: 2 servers with 4 disks each, need 6 shards, max 2 per server
// Expected: Should only select 4 shards (2 per server limit)
disks := []*DiskCandidate{
makeDisk("server1", 0, "dc1", "rack1", 10),
makeDisk("server1", 1, "dc1", "rack1", 10),
makeDisk("server1", 2, "dc1", "rack1", 10),
makeDisk("server1", 3, "dc1", "rack1", 10),
makeDisk("server2", 0, "dc1", "rack1", 10),
makeDisk("server2", 1, "dc1", "rack1", 10),
makeDisk("server2", 2, "dc1", "rack1", 10),
makeDisk("server2", 3, "dc1", "rack1", 10),
}
config := PlacementRequest{
ShardsNeeded: 6,
MaxShardsPerServer: 2,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
result, err := SelectDestinations(disks, config)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Should only get 4 shards due to server limit
if len(result.SelectedDisks) != 4 {
t.Errorf("expected 4 selected disks (limit 2 per server), got %d", len(result.SelectedDisks))
}
// No server should exceed the limit
for server, count := range result.ShardsPerServer {
if count > 2 {
t.Errorf("server %s has %d shards, exceeds limit of 2", server, count)
}
}
}
func TestSelectDestinations_14ShardsAcross7Servers(t *testing.T) {
// Test: Real-world EC scenario - 14 shards across 7 servers with 2 disks each
// Expected: Should spread evenly (2 shards per server)
var disks []*DiskCandidate
for i := 1; i <= 7; i++ {
serverID := "server" + string(rune('0'+i))
disks = append(disks, makeDisk(serverID, 0, "dc1", "rack1", 10))
disks = append(disks, makeDisk(serverID, 1, "dc1", "rack1", 10))
}
config := PlacementRequest{
ShardsNeeded: 14,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
result, err := SelectDestinations(disks, config)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.SelectedDisks) != 14 {
t.Errorf("expected 14 selected disks, got %d", len(result.SelectedDisks))
}
// All 7 servers should be used
if result.ServersUsed != 7 {
t.Errorf("expected 7 servers used, got %d", result.ServersUsed)
}
// Each server should have exactly 2 shards
for server, count := range result.ShardsPerServer {
if count != 2 {
t.Errorf("server %s has %d shards, expected 2", server, count)
}
}
}
func TestSelectDestinations_FewerServersThanShards(t *testing.T) {
// Test: Only 3 servers but need 6 shards
// Expected: Should distribute evenly (2 per server)
disks := []*DiskCandidate{
makeDisk("server1", 0, "dc1", "rack1", 10),
makeDisk("server1", 1, "dc1", "rack1", 10),
makeDisk("server1", 2, "dc1", "rack1", 10),
makeDisk("server2", 0, "dc1", "rack1", 10),
makeDisk("server2", 1, "dc1", "rack1", 10),
makeDisk("server2", 2, "dc1", "rack1", 10),
makeDisk("server3", 0, "dc1", "rack1", 10),
makeDisk("server3", 1, "dc1", "rack1", 10),
makeDisk("server3", 2, "dc1", "rack1", 10),
}
config := PlacementRequest{
ShardsNeeded: 6,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
result, err := SelectDestinations(disks, config)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.SelectedDisks) != 6 {
t.Errorf("expected 6 selected disks, got %d", len(result.SelectedDisks))
}
// All 3 servers should be used
if result.ServersUsed != 3 {
t.Errorf("expected 3 servers used, got %d", result.ServersUsed)
}
// Each server should have exactly 2 shards
for server, count := range result.ShardsPerServer {
if count != 2 {
t.Errorf("server %s has %d shards, expected 2", server, count)
}
}
}
func TestSelectDestinations_NoSuitableDisks(t *testing.T) {
// Test: All disks have no free slots
disks := []*DiskCandidate{
{NodeID: "server1", DiskID: 0, DataCenter: "dc1", Rack: "rack1", FreeSlots: 0},
{NodeID: "server2", DiskID: 0, DataCenter: "dc1", Rack: "rack1", FreeSlots: 0},
}
config := PlacementRequest{
ShardsNeeded: 4,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
_, err := SelectDestinations(disks, config)
if err == nil {
t.Error("expected error for no suitable disks, got nil")
}
}
func TestSelectDestinations_EmptyInput(t *testing.T) {
config := DefaultPlacementRequest()
_, err := SelectDestinations([]*DiskCandidate{}, config)
if err == nil {
t.Error("expected error for empty input, got nil")
}
}
func TestSelectDestinations_FiltersByLoad(t *testing.T) {
// Test: Some disks have too high load
disks := []*DiskCandidate{
{NodeID: "server1", DiskID: 0, DataCenter: "dc1", Rack: "rack1", FreeSlots: 10, LoadCount: 10},
{NodeID: "server2", DiskID: 0, DataCenter: "dc1", Rack: "rack1", FreeSlots: 10, LoadCount: 2},
{NodeID: "server3", DiskID: 0, DataCenter: "dc1", Rack: "rack1", FreeSlots: 10, LoadCount: 1},
}
config := PlacementRequest{
ShardsNeeded: 2,
MaxTaskLoad: 5,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
result, err := SelectDestinations(disks, config)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Should only select from server2 and server3 (server1 has too high load)
for _, disk := range result.SelectedDisks {
if disk.NodeID == "server1" {
t.Errorf("disk from server1 should not be selected (load too high)")
}
}
}
func TestCalculateDiskScore(t *testing.T) {
// Test that score calculation works as expected
lowUtilDisk := &DiskCandidate{
VolumeCount: 10,
MaxVolumeCount: 100,
ShardCount: 0,
LoadCount: 0,
}
highUtilDisk := &DiskCandidate{
VolumeCount: 90,
MaxVolumeCount: 100,
ShardCount: 5,
LoadCount: 5,
}
lowScore := calculateDiskScore(lowUtilDisk)
highScore := calculateDiskScore(highUtilDisk)
if lowScore <= highScore {
t.Errorf("low utilization disk should have higher score: low=%f, high=%f", lowScore, highScore)
}
}
func TestCalculateIdealDistribution(t *testing.T) {
tests := []struct {
totalShards int
numServers int
expectedMin int
expectedMax int
}{
{14, 7, 2, 2}, // Even distribution
{14, 4, 3, 4}, // Uneven: 14/4 = 3 remainder 2
{6, 3, 2, 2}, // Even distribution
{7, 3, 2, 3}, // Uneven: 7/3 = 2 remainder 1
{10, 0, 0, 10}, // Edge case: no servers
{0, 5, 0, 0}, // Edge case: no shards
}
for _, tt := range tests {
min, max := CalculateIdealDistribution(tt.totalShards, tt.numServers)
if min != tt.expectedMin || max != tt.expectedMax {
t.Errorf("CalculateIdealDistribution(%d, %d) = (%d, %d), want (%d, %d)",
tt.totalShards, tt.numServers, min, max, tt.expectedMin, tt.expectedMax)
}
}
}
func TestVerifySpread(t *testing.T) {
result := &PlacementResult{
ServersUsed: 3,
RacksUsed: 2,
}
// Should pass
if err := VerifySpread(result, 3, 2); err != nil {
t.Errorf("unexpected error: %v", err)
}
// Should fail - not enough servers
if err := VerifySpread(result, 4, 2); err == nil {
t.Error("expected error for insufficient servers")
}
// Should fail - not enough racks
if err := VerifySpread(result, 3, 3); err == nil {
t.Error("expected error for insufficient racks")
}
}
func TestSelectDestinations_MultiDC(t *testing.T) {
// Test: 2 DCs, each with 2 racks, each rack has 2 servers
disks := []*DiskCandidate{
// DC1, Rack1
makeDisk("dc1-r1-s1", 0, "dc1", "rack1", 10),
makeDisk("dc1-r1-s1", 1, "dc1", "rack1", 10),
makeDisk("dc1-r1-s2", 0, "dc1", "rack1", 10),
makeDisk("dc1-r1-s2", 1, "dc1", "rack1", 10),
// DC1, Rack2
makeDisk("dc1-r2-s1", 0, "dc1", "rack2", 10),
makeDisk("dc1-r2-s1", 1, "dc1", "rack2", 10),
makeDisk("dc1-r2-s2", 0, "dc1", "rack2", 10),
makeDisk("dc1-r2-s2", 1, "dc1", "rack2", 10),
// DC2, Rack1
makeDisk("dc2-r1-s1", 0, "dc2", "rack1", 10),
makeDisk("dc2-r1-s1", 1, "dc2", "rack1", 10),
makeDisk("dc2-r1-s2", 0, "dc2", "rack1", 10),
makeDisk("dc2-r1-s2", 1, "dc2", "rack1", 10),
// DC2, Rack2
makeDisk("dc2-r2-s1", 0, "dc2", "rack2", 10),
makeDisk("dc2-r2-s1", 1, "dc2", "rack2", 10),
makeDisk("dc2-r2-s2", 0, "dc2", "rack2", 10),
makeDisk("dc2-r2-s2", 1, "dc2", "rack2", 10),
}
config := PlacementRequest{
ShardsNeeded: 8,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
result, err := SelectDestinations(disks, config)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.SelectedDisks) != 8 {
t.Errorf("expected 8 selected disks, got %d", len(result.SelectedDisks))
}
// Should use all 4 racks
if result.RacksUsed != 4 {
t.Errorf("expected 4 racks used, got %d", result.RacksUsed)
}
// Should use both DCs
if result.DCsUsed != 2 {
t.Errorf("expected 2 DCs used, got %d", result.DCsUsed)
}
}
func TestSelectDestinations_SameRackDifferentDC(t *testing.T) {
// Test: Same rack name in different DCs should be treated as different racks
disks := []*DiskCandidate{
makeDisk("dc1-s1", 0, "dc1", "rack1", 10),
makeDisk("dc2-s1", 0, "dc2", "rack1", 10),
}
config := PlacementRequest{
ShardsNeeded: 2,
PreferDifferentServers: true,
PreferDifferentRacks: true,
}
result, err := SelectDestinations(disks, config)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Should use 2 racks (dc1:rack1 and dc2:rack1 are different)
if result.RacksUsed != 2 {
t.Errorf("expected 2 racks used (different DCs), got %d", result.RacksUsed)
}
}