SeaweedFS tracks EC shard sizes on topology data stuctures, but this information is never
relayed to master servers :( The end result is that commands reporting disk usage, such
as `volume.list` and `cluster.status`, yield incorrect figures when EC shards are present.
As an example for a simple 5-node test cluster, before...
```
> volume.list
Topology volumeSizeLimit:30000 MB hdd(volume:6/40 active:6 free:33 remote:0)
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9001 hdd(volume:1/8 active:1 free:7 remote:0)
Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
volume id:3 size:88967096 file_count:172 replica_placement:2 version:3 modified_at_second:1766349617
ec volume id:1 collection: shards:[1 5]
Disk hdd total size:88967096 file_count:172
DataNode 192.168.10.111:9001 total size:88967096 file_count:172
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9002 hdd(volume:2/8 active:2 free:6 remote:0)
Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
volume id:2 size:77267536 file_count:166 replica_placement:2 version:3 modified_at_second:1766349617
volume id:3 size:88967096 file_count:172 replica_placement:2 version:3 modified_at_second:1766349617
ec volume id:1 collection: shards:[0 4]
Disk hdd total size:166234632 file_count:338
DataNode 192.168.10.111:9002 total size:166234632 file_count:338
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9003 hdd(volume:1/8 active:1 free:7 remote:0)
Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
volume id:2 size:77267536 file_count:166 replica_placement:2 version:3 modified_at_second:1766349617
ec volume id:1 collection: shards:[2 6]
Disk hdd total size:77267536 file_count:166
DataNode 192.168.10.111:9003 total size:77267536 file_count:166
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9004 hdd(volume:2/8 active:2 free:6 remote:0)
Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
volume id:2 size:77267536 file_count:166 replica_placement:2 version:3 modified_at_second:1766349617
volume id:3 size:88967096 file_count:172 replica_placement:2 version:3 modified_at_second:1766349617
ec volume id:1 collection: shards:[3 7]
Disk hdd total size:166234632 file_count:338
DataNode 192.168.10.111:9004 total size:166234632 file_count:338
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9005 hdd(volume:0/8 active:0 free:8 remote:0)
Disk hdd(volume:0/8 active:0 free:8 remote:0) id:0
ec volume id:1 collection: shards:[8 9 10 11 12 13]
Disk hdd total size:0 file_count:0
Rack DefaultRack total size:498703896 file_count:1014
DataCenter DefaultDataCenter total size:498703896 file_count:1014
total size:498703896 file_count:1014
```
...and after:
```
> volume.list
Topology volumeSizeLimit:30000 MB hdd(volume:6/40 active:6 free:33 remote:0)
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9001 hdd(volume:1/8 active:1 free:7 remote:0)
Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
volume id:2 size:81761800 file_count:161 replica_placement:2 version:3 modified_at_second:1766349495
ec volume id:1 collection: shards:[1 5 9] sizes:[1:8.00 MiB 5:8.00 MiB 9:8.00 MiB] total:24.00 MiB
Disk hdd total size:81761800 file_count:161
DataNode 192.168.10.111:9001 total size:81761800 file_count:161
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9002 hdd(volume:1/8 active:1 free:7 remote:0)
Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
volume id:3 size:88678712 file_count:170 replica_placement:2 version:3 modified_at_second:1766349495
ec volume id:1 collection: shards:[11 12 13] sizes:[11:8.00 MiB 12:8.00 MiB 13:8.00 MiB] total:24.00 MiB
Disk hdd total size:88678712 file_count:170
DataNode 192.168.10.111:9002 total size:88678712 file_count:170
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9003 hdd(volume:2/8 active:2 free:6 remote:0)
Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
volume id:2 size:81761800 file_count:161 replica_placement:2 version:3 modified_at_second:1766349495
volume id:3 size:88678712 file_count:170 replica_placement:2 version:3 modified_at_second:1766349495
ec volume id:1 collection: shards:[0 4 8] sizes:[0:8.00 MiB 4:8.00 MiB 8:8.00 MiB] total:24.00 MiB
Disk hdd total size:170440512 file_count:331
DataNode 192.168.10.111:9003 total size:170440512 file_count:331
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9004 hdd(volume:2/8 active:2 free:6 remote:0)
Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
volume id:2 size:81761800 file_count:161 replica_placement:2 version:3 modified_at_second:1766349495
volume id:3 size:88678712 file_count:170 replica_placement:2 version:3 modified_at_second:1766349495
ec volume id:1 collection: shards:[2 6 10] sizes:[2:8.00 MiB 6:8.00 MiB 10:8.00 MiB] total:24.00 MiB
Disk hdd total size:170440512 file_count:331
DataNode 192.168.10.111:9004 total size:170440512 file_count:331
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9005 hdd(volume:0/8 active:0 free:8 remote:0)
Disk hdd(volume:0/8 active:0 free:8 remote:0) id:0
ec volume id:1 collection: shards:[3 7] sizes:[3:8.00 MiB 7:8.00 MiB] total:16.00 MiB
Disk hdd total size:0 file_count:0
Rack DefaultRack total size:511321536 file_count:993
DataCenter DefaultDataCenter total size:511321536 file_count:993
total size:511321536 file_count:993
```
282 lines
8.6 KiB
Go
282 lines
8.6 KiB
Go
package shell
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding/distribution"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/types"
|
|
)
|
|
|
|
// ECDistribution is an alias to the distribution package type for backward compatibility
|
|
type ECDistribution = distribution.ECDistribution
|
|
|
|
// CalculateECDistribution computes the target EC shard distribution based on replication policy.
|
|
// This is a convenience wrapper that uses the default 10+4 EC configuration.
|
|
// For custom EC ratios, use the distribution package directly.
|
|
func CalculateECDistribution(totalShards, parityShards int, rp *super_block.ReplicaPlacement) *ECDistribution {
|
|
ec := distribution.ECConfig{
|
|
DataShards: totalShards - parityShards,
|
|
ParityShards: parityShards,
|
|
}
|
|
rep := distribution.NewReplicationConfig(rp)
|
|
return distribution.CalculateDistribution(ec, rep)
|
|
}
|
|
|
|
// TopologyDistributionAnalysis holds the current shard distribution analysis
|
|
// This wraps the distribution package's TopologyAnalysis with shell-specific EcNode handling
|
|
type TopologyDistributionAnalysis struct {
|
|
inner *distribution.TopologyAnalysis
|
|
|
|
// Shell-specific mappings
|
|
nodeMap map[string]*EcNode // nodeID -> EcNode
|
|
}
|
|
|
|
// NewTopologyDistributionAnalysis creates a new analysis structure
|
|
func NewTopologyDistributionAnalysis() *TopologyDistributionAnalysis {
|
|
return &TopologyDistributionAnalysis{
|
|
inner: distribution.NewTopologyAnalysis(),
|
|
nodeMap: make(map[string]*EcNode),
|
|
}
|
|
}
|
|
|
|
// AddNode adds a node and its shards to the analysis
|
|
func (a *TopologyDistributionAnalysis) AddNode(node *EcNode, shardsInfo *erasure_coding.ShardsInfo) {
|
|
nodeId := node.info.Id
|
|
|
|
// Create distribution.TopologyNode from EcNode
|
|
topoNode := &distribution.TopologyNode{
|
|
NodeID: nodeId,
|
|
DataCenter: string(node.dc),
|
|
Rack: string(node.rack),
|
|
FreeSlots: node.freeEcSlot,
|
|
TotalShards: shardsInfo.Count(),
|
|
ShardIDs: shardsInfo.IdsInt(),
|
|
}
|
|
|
|
a.inner.AddNode(topoNode)
|
|
a.nodeMap[nodeId] = node
|
|
|
|
// Add shard locations
|
|
for _, shardId := range shardsInfo.Ids() {
|
|
a.inner.AddShardLocation(distribution.ShardLocation{
|
|
ShardID: int(shardId),
|
|
NodeID: nodeId,
|
|
DataCenter: string(node.dc),
|
|
Rack: string(node.rack),
|
|
})
|
|
}
|
|
}
|
|
|
|
// Finalize completes the analysis
|
|
func (a *TopologyDistributionAnalysis) Finalize() {
|
|
a.inner.Finalize()
|
|
}
|
|
|
|
// String returns a summary
|
|
func (a *TopologyDistributionAnalysis) String() string {
|
|
return a.inner.String()
|
|
}
|
|
|
|
// DetailedString returns detailed analysis
|
|
func (a *TopologyDistributionAnalysis) DetailedString() string {
|
|
return a.inner.DetailedString()
|
|
}
|
|
|
|
// GetShardsByDC returns shard counts by DC
|
|
func (a *TopologyDistributionAnalysis) GetShardsByDC() map[DataCenterId]int {
|
|
result := make(map[DataCenterId]int)
|
|
for dc, count := range a.inner.ShardsByDC {
|
|
result[DataCenterId(dc)] = count
|
|
}
|
|
return result
|
|
}
|
|
|
|
// GetShardsByRack returns shard counts by rack
|
|
func (a *TopologyDistributionAnalysis) GetShardsByRack() map[RackId]int {
|
|
result := make(map[RackId]int)
|
|
for rack, count := range a.inner.ShardsByRack {
|
|
result[RackId(rack)] = count
|
|
}
|
|
return result
|
|
}
|
|
|
|
// GetShardsByNode returns shard counts by node
|
|
func (a *TopologyDistributionAnalysis) GetShardsByNode() map[EcNodeId]int {
|
|
result := make(map[EcNodeId]int)
|
|
for nodeId, count := range a.inner.ShardsByNode {
|
|
result[EcNodeId(nodeId)] = count
|
|
}
|
|
return result
|
|
}
|
|
|
|
// AnalyzeVolumeDistribution creates an analysis of current shard distribution for a volume
|
|
func AnalyzeVolumeDistribution(volumeId needle.VolumeId, locations []*EcNode, diskType types.DiskType) *TopologyDistributionAnalysis {
|
|
analysis := NewTopologyDistributionAnalysis()
|
|
|
|
for _, node := range locations {
|
|
si := findEcVolumeShardsInfo(node, volumeId, diskType)
|
|
if si.Count() > 0 {
|
|
analysis.AddNode(node, si)
|
|
}
|
|
}
|
|
|
|
analysis.Finalize()
|
|
return analysis
|
|
}
|
|
|
|
// ECShardMove represents a planned shard move (shell-specific with EcNode references)
|
|
type ECShardMove struct {
|
|
VolumeId needle.VolumeId
|
|
ShardId erasure_coding.ShardId
|
|
SourceNode *EcNode
|
|
DestNode *EcNode
|
|
Reason string
|
|
}
|
|
|
|
// String returns a human-readable description
|
|
func (m ECShardMove) String() string {
|
|
return fmt.Sprintf("volume %d shard %d: %s -> %s (%s)",
|
|
m.VolumeId, m.ShardId, m.SourceNode.info.Id, m.DestNode.info.Id, m.Reason)
|
|
}
|
|
|
|
// ProportionalECRebalancer implements proportional shard distribution for shell commands
|
|
type ProportionalECRebalancer struct {
|
|
ecNodes []*EcNode
|
|
replicaPlacement *super_block.ReplicaPlacement
|
|
diskType types.DiskType
|
|
ecConfig distribution.ECConfig
|
|
}
|
|
|
|
// NewProportionalECRebalancer creates a new proportional rebalancer with default EC config
|
|
func NewProportionalECRebalancer(
|
|
ecNodes []*EcNode,
|
|
rp *super_block.ReplicaPlacement,
|
|
diskType types.DiskType,
|
|
) *ProportionalECRebalancer {
|
|
return NewProportionalECRebalancerWithConfig(
|
|
ecNodes,
|
|
rp,
|
|
diskType,
|
|
distribution.DefaultECConfig(),
|
|
)
|
|
}
|
|
|
|
// NewProportionalECRebalancerWithConfig creates a rebalancer with custom EC configuration
|
|
func NewProportionalECRebalancerWithConfig(
|
|
ecNodes []*EcNode,
|
|
rp *super_block.ReplicaPlacement,
|
|
diskType types.DiskType,
|
|
ecConfig distribution.ECConfig,
|
|
) *ProportionalECRebalancer {
|
|
return &ProportionalECRebalancer{
|
|
ecNodes: ecNodes,
|
|
replicaPlacement: rp,
|
|
diskType: diskType,
|
|
ecConfig: ecConfig,
|
|
}
|
|
}
|
|
|
|
// PlanMoves generates a plan for moving shards to achieve proportional distribution
|
|
func (r *ProportionalECRebalancer) PlanMoves(
|
|
volumeId needle.VolumeId,
|
|
locations []*EcNode,
|
|
) ([]ECShardMove, error) {
|
|
// Build topology analysis
|
|
analysis := distribution.NewTopologyAnalysis()
|
|
nodeMap := make(map[string]*EcNode)
|
|
|
|
// Add all EC nodes to the analysis (even those without shards)
|
|
for _, node := range r.ecNodes {
|
|
nodeId := node.info.Id
|
|
topoNode := &distribution.TopologyNode{
|
|
NodeID: nodeId,
|
|
DataCenter: string(node.dc),
|
|
Rack: string(node.rack),
|
|
FreeSlots: node.freeEcSlot,
|
|
}
|
|
analysis.AddNode(topoNode)
|
|
nodeMap[nodeId] = node
|
|
}
|
|
|
|
// Add shard locations from nodes that have shards
|
|
for _, node := range locations {
|
|
nodeId := node.info.Id
|
|
si := findEcVolumeShardsInfo(node, volumeId, r.diskType)
|
|
for _, shardId := range si.Ids() {
|
|
analysis.AddShardLocation(distribution.ShardLocation{
|
|
ShardID: int(shardId),
|
|
NodeID: nodeId,
|
|
DataCenter: string(node.dc),
|
|
Rack: string(node.rack),
|
|
})
|
|
}
|
|
if _, exists := nodeMap[nodeId]; !exists {
|
|
nodeMap[nodeId] = node
|
|
}
|
|
}
|
|
|
|
analysis.Finalize()
|
|
|
|
// Create rebalancer and plan moves
|
|
rep := distribution.NewReplicationConfig(r.replicaPlacement)
|
|
rebalancer := distribution.NewRebalancer(r.ecConfig, rep)
|
|
|
|
plan, err := rebalancer.PlanRebalance(analysis)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Convert distribution moves to shell moves
|
|
var moves []ECShardMove
|
|
for _, move := range plan.Moves {
|
|
srcNode := nodeMap[move.SourceNode.NodeID]
|
|
destNode := nodeMap[move.DestNode.NodeID]
|
|
if srcNode == nil || destNode == nil {
|
|
continue
|
|
}
|
|
|
|
moves = append(moves, ECShardMove{
|
|
VolumeId: volumeId,
|
|
ShardId: erasure_coding.ShardId(move.ShardID),
|
|
SourceNode: srcNode,
|
|
DestNode: destNode,
|
|
Reason: move.Reason,
|
|
})
|
|
}
|
|
|
|
return moves, nil
|
|
}
|
|
|
|
// GetDistributionSummary returns a summary of the planned distribution
|
|
func GetDistributionSummary(rp *super_block.ReplicaPlacement) string {
|
|
ec := distribution.DefaultECConfig()
|
|
rep := distribution.NewReplicationConfig(rp)
|
|
dist := distribution.CalculateDistribution(ec, rep)
|
|
return dist.Summary()
|
|
}
|
|
|
|
// GetDistributionSummaryWithConfig returns a summary with custom EC configuration
|
|
func GetDistributionSummaryWithConfig(rp *super_block.ReplicaPlacement, ecConfig distribution.ECConfig) string {
|
|
rep := distribution.NewReplicationConfig(rp)
|
|
dist := distribution.CalculateDistribution(ecConfig, rep)
|
|
return dist.Summary()
|
|
}
|
|
|
|
// GetFaultToleranceAnalysis returns fault tolerance analysis for the given configuration
|
|
func GetFaultToleranceAnalysis(rp *super_block.ReplicaPlacement) string {
|
|
ec := distribution.DefaultECConfig()
|
|
rep := distribution.NewReplicationConfig(rp)
|
|
dist := distribution.CalculateDistribution(ec, rep)
|
|
return dist.FaultToleranceAnalysis()
|
|
}
|
|
|
|
// GetFaultToleranceAnalysisWithConfig returns fault tolerance analysis with custom EC configuration
|
|
func GetFaultToleranceAnalysisWithConfig(rp *super_block.ReplicaPlacement, ecConfig distribution.ECConfig) string {
|
|
rep := distribution.NewReplicationConfig(rp)
|
|
dist := distribution.CalculateDistribution(ecConfig, rep)
|
|
return dist.FaultToleranceAnalysis()
|
|
}
|