Fix reporting of EC shard sizes from nodes to masters. (#7835)

SeaweedFS tracks EC shard sizes on topology data stuctures, but this information is never
relayed to master servers :( The end result is that commands reporting disk usage, such
as `volume.list` and `cluster.status`, yield incorrect figures when EC shards are present.

As an example for a simple 5-node test cluster, before...

```
> volume.list
Topology volumeSizeLimit:30000 MB hdd(volume:6/40 active:6 free:33 remote:0)
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9001 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:3  size:88967096  file_count:172  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[1 5]
        Disk hdd total size:88967096 file_count:172
      DataNode 192.168.10.111:9001 total size:88967096 file_count:172
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9002 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:77267536  file_count:166  replica_placement:2  version:3  modified_at_second:1766349617
          volume id:3  size:88967096  file_count:172  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[0 4]
        Disk hdd total size:166234632 file_count:338
      DataNode 192.168.10.111:9002 total size:166234632 file_count:338
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9003 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:2  size:77267536  file_count:166  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[2 6]
        Disk hdd total size:77267536 file_count:166
      DataNode 192.168.10.111:9003 total size:77267536 file_count:166
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9004 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:77267536  file_count:166  replica_placement:2  version:3  modified_at_second:1766349617
          volume id:3  size:88967096  file_count:172  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[3 7]
        Disk hdd total size:166234632 file_count:338
      DataNode 192.168.10.111:9004 total size:166234632 file_count:338
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9005 hdd(volume:0/8 active:0 free:8 remote:0)
        Disk hdd(volume:0/8 active:0 free:8 remote:0) id:0
          ec volume id:1 collection: shards:[8 9 10 11 12 13]
        Disk hdd total size:0 file_count:0
    Rack DefaultRack total size:498703896 file_count:1014
  DataCenter DefaultDataCenter total size:498703896 file_count:1014
total size:498703896 file_count:1014
```

...and after:

```
> volume.list
Topology volumeSizeLimit:30000 MB hdd(volume:6/40 active:6 free:33 remote:0)
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9001 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:2  size:81761800  file_count:161  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[1 5 9] sizes:[1:8.00 MiB 5:8.00 MiB 9:8.00 MiB] total:24.00 MiB
        Disk hdd total size:81761800 file_count:161
      DataNode 192.168.10.111:9001 total size:81761800 file_count:161
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9002 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:3  size:88678712  file_count:170  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[11 12 13] sizes:[11:8.00 MiB 12:8.00 MiB 13:8.00 MiB] total:24.00 MiB
        Disk hdd total size:88678712 file_count:170
      DataNode 192.168.10.111:9002 total size:88678712 file_count:170
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9003 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:81761800  file_count:161  replica_placement:2  version:3  modified_at_second:1766349495
          volume id:3  size:88678712  file_count:170  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[0 4 8] sizes:[0:8.00 MiB 4:8.00 MiB 8:8.00 MiB] total:24.00 MiB
        Disk hdd total size:170440512 file_count:331
      DataNode 192.168.10.111:9003 total size:170440512 file_count:331
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9004 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:81761800  file_count:161  replica_placement:2  version:3  modified_at_second:1766349495
          volume id:3  size:88678712  file_count:170  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[2 6 10] sizes:[2:8.00 MiB 6:8.00 MiB 10:8.00 MiB] total:24.00 MiB
        Disk hdd total size:170440512 file_count:331
      DataNode 192.168.10.111:9004 total size:170440512 file_count:331
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9005 hdd(volume:0/8 active:0 free:8 remote:0)
        Disk hdd(volume:0/8 active:0 free:8 remote:0) id:0
          ec volume id:1 collection: shards:[3 7] sizes:[3:8.00 MiB 7:8.00 MiB] total:16.00 MiB
        Disk hdd total size:0 file_count:0
    Rack DefaultRack total size:511321536 file_count:993
  DataCenter DefaultDataCenter total size:511321536 file_count:993
total size:511321536 file_count:993
```
This commit is contained in:
Lisandro Pin
2025-12-29 04:30:42 +01:00
committed by GitHub
parent 2b529e310d
commit 6b98b52acc
28 changed files with 801 additions and 773 deletions

View File

@@ -1,260 +1,294 @@
package erasure_coding
import (
"math/bits"
"fmt"
"sort"
"github.com/dustin/go-humanize"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
)
// ShardsInfo encapsulates information for EC shards
type ShardSize int64
type ShardInfo struct {
Id ShardId
Size ShardSize
}
type ShardsInfo struct {
shards map[ShardId]*ShardInfo
}
func NewShardsInfo() *ShardsInfo {
return &ShardsInfo{
shards: map[ShardId]*ShardInfo{},
}
}
// Initializes a ShardsInfo from a ECVolume.
func ShardsInfoFromVolume(ev *EcVolume) *ShardsInfo {
res := &ShardsInfo{
shards: map[ShardId]*ShardInfo{},
}
for _, s := range ev.Shards {
res.Set(s.ShardId, ShardSize(s.Size()))
}
return res
}
// Initializes a ShardsInfo from a VolumeEcShardInformationMessage proto.
func ShardsInfoFromVolumeEcShardInformationMessage(vi *master_pb.VolumeEcShardInformationMessage) *ShardsInfo {
res := NewShardsInfo()
if vi == nil {
return res
}
var id ShardId
var j int
for bitmap := vi.EcIndexBits; bitmap != 0; bitmap >>= 1 {
if bitmap&1 != 0 {
var size ShardSize
if j < len(vi.ShardSizes) {
size = ShardSize(vi.ShardSizes[j])
}
j++
res.shards[id] = &ShardInfo{
Id: id,
Size: size,
}
}
id++
}
return res
}
// Returns a count of shards from a VolumeEcShardInformationMessage proto.
func ShardsCountFromVolumeEcShardInformationMessage(vi *master_pb.VolumeEcShardInformationMessage) int {
if vi == nil {
return 0
}
return ShardsInfoFromVolumeEcShardInformationMessage(vi).Count()
}
// Returns a string representation for a ShardsInfo.
func (sp *ShardsInfo) String() string {
var res string
ids := sp.Ids()
for i, id := range sp.Ids() {
res += fmt.Sprintf("%d:%s", id, humanize.Bytes(uint64(sp.shards[id].Size)))
if i < len(ids)-1 {
res += " "
}
}
return res
}
// AsSlice converts a ShardsInfo to a slice of ShardInfo structs, ordered by shard ID.
func (si *ShardsInfo) AsSlice() []*ShardInfo {
res := make([]*ShardInfo, len(si.shards))
i := 0
for _, id := range si.Ids() {
res[i] = si.shards[id]
i++
}
return res
}
// Count returns the number of EC shards.
func (si *ShardsInfo) Count() int {
return len(si.shards)
}
// Has verifies if a shard ID is present.
func (si *ShardsInfo) Has(id ShardId) bool {
_, ok := si.shards[id]
return ok
}
// Ids returns a list of shard IDs, in ascending order.
func (si *ShardsInfo) Ids() []ShardId {
ids := []ShardId{}
for id := range si.shards {
ids = append(ids, id)
}
sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] })
return ids
}
// IdsInt returns a list of shards ID as int, in ascending order.
func (si *ShardsInfo) IdsInt() []int {
ids := si.Ids()
res := make([]int, len(ids))
for i, id := range ids {
res[i] = int(id)
}
return res
}
// Ids returns a list of shards ID as uint32, in ascending order.
func (si *ShardsInfo) IdsUint32() []uint32 {
return ShardIdsToUint32(si.Ids())
}
// Set sets the size for a given shard ID.
func (si *ShardsInfo) Set(id ShardId, size ShardSize) {
if id >= MaxShardCount {
return
}
si.shards[id] = &ShardInfo{
Id: id,
Size: size,
}
}
// Delete deletes a shard by ID.
func (si *ShardsInfo) Delete(id ShardId) {
if id >= MaxShardCount {
return
}
if _, ok := si.shards[id]; ok {
delete(si.shards, id)
}
}
// Bitmap returns a bitmap for all existing shard IDs (bit 0 = shard #0... bit 31 = shard #31), in little endian.
func (si *ShardsInfo) Bitmap() uint32 {
var bits uint32
for id := range si.shards {
bits |= (1 << id)
}
return bits
}
// Size returns the size of a given shard ID, if present.
func (si *ShardsInfo) Size(id ShardId) ShardSize {
if s, ok := si.shards[id]; ok {
return s.Size
}
return 0
}
// TotalSize returns the size for all shards.
func (si *ShardsInfo) TotalSize() ShardSize {
var total ShardSize
for _, s := range si.shards {
total += s.Size
}
return total
}
// Sizes returns a compact slice of present shard sizes, from first to last.
func (si *ShardsInfo) Sizes() []ShardSize {
ids := si.Ids()
res := make([]ShardSize, len(ids))
if len(res) != 0 {
var i int
for _, id := range ids {
res[i] = si.shards[id].Size
i++
}
}
return res
}
// SizesInt64 returns a compact slice of present shard sizes, from first to last, as int64.
func (si *ShardsInfo) SizesInt64() []int64 {
res := make([]int64, si.Count())
for i, s := range si.Sizes() {
res[i] = int64(s)
}
return res
}
// Copy creates a copy of a ShardInfo.
func (si *ShardsInfo) Copy() *ShardsInfo {
new := NewShardsInfo()
for _, s := range si.shards {
new.Set(s.Id, s.Size)
}
return new
}
// DeleteParityShards removes party shards from a ShardInfo.
// Assumes default 10+4 EC layout where parity shards are IDs 10-13.
func (si *ShardsInfo) DeleteParityShards() {
for id := DataShardsCount; id < TotalShardsCount; id++ {
si.Delete(ShardId(id))
}
}
// MinusParityShards creates a ShardInfo copy, but with parity shards removed.
func (si *ShardsInfo) MinusParityShards() *ShardsInfo {
new := si.Copy()
new.DeleteParityShards()
return new
}
// Add merges all shards from another ShardInfo into this one.
func (si *ShardsInfo) Add(other *ShardsInfo) {
for _, s := range other.shards {
si.Set(s.Id, s.Size)
}
}
// Subtract removes all shards present on another ShardInfo.
func (si *ShardsInfo) Subtract(other *ShardsInfo) {
for _, s := range other.shards {
si.Delete(s.Id)
}
}
// Plus returns a new ShardInfo consisting of (this + other).
func (si *ShardsInfo) Plus(other *ShardsInfo) *ShardsInfo {
new := si.Copy()
new.Add(other)
return new
}
// Minus returns a new ShardInfo consisting of (this - other).
func (si *ShardsInfo) Minus(other *ShardsInfo) *ShardsInfo {
new := si.Copy()
new.Subtract(other)
return new
}
// data structure used in master
type EcVolumeInfo struct {
VolumeId needle.VolumeId
Collection string
ShardBits ShardBits
DiskType string
DiskId uint32 // ID of the disk this EC volume is on
ExpireAtSec uint64 // ec volume destroy time, calculated from the ec volume was created
ShardSizes []int64 // optimized: sizes for shards in order of set bits in ShardBits
}
func (ecInfo *EcVolumeInfo) AddShardId(id ShardId) {
oldBits := ecInfo.ShardBits
ecInfo.ShardBits = ecInfo.ShardBits.AddShardId(id)
// If shard was actually added, resize ShardSizes array
if oldBits != ecInfo.ShardBits {
ecInfo.resizeShardSizes(oldBits)
}
}
func (ecInfo *EcVolumeInfo) RemoveShardId(id ShardId) {
oldBits := ecInfo.ShardBits
ecInfo.ShardBits = ecInfo.ShardBits.RemoveShardId(id)
// If shard was actually removed, resize ShardSizes array
if oldBits != ecInfo.ShardBits {
ecInfo.resizeShardSizes(oldBits)
}
}
func (ecInfo *EcVolumeInfo) SetShardSize(id ShardId, size int64) {
ecInfo.ensureShardSizesInitialized()
if index, found := ecInfo.ShardBits.ShardIdToIndex(id); found && index < len(ecInfo.ShardSizes) {
ecInfo.ShardSizes[index] = size
}
}
func (ecInfo *EcVolumeInfo) GetShardSize(id ShardId) (int64, bool) {
if index, found := ecInfo.ShardBits.ShardIdToIndex(id); found && index < len(ecInfo.ShardSizes) {
return ecInfo.ShardSizes[index], true
}
return 0, false
}
func (ecInfo *EcVolumeInfo) GetTotalSize() int64 {
var total int64
for _, size := range ecInfo.ShardSizes {
total += size
}
return total
}
func (ecInfo *EcVolumeInfo) HasShardId(id ShardId) bool {
return ecInfo.ShardBits.HasShardId(id)
}
func (ecInfo *EcVolumeInfo) ShardIds() (ret []ShardId) {
return ecInfo.ShardBits.ShardIds()
}
func (ecInfo *EcVolumeInfo) ShardIdCount() (count int) {
return ecInfo.ShardBits.ShardIdCount()
DiskId uint32 // ID of the disk this EC volume is on
ExpireAtSec uint64 // ec volume destroy time, calculated from the ec volume was created
ShardsInfo *ShardsInfo
}
func (ecInfo *EcVolumeInfo) Minus(other *EcVolumeInfo) *EcVolumeInfo {
ret := &EcVolumeInfo{
return &EcVolumeInfo{
VolumeId: ecInfo.VolumeId,
Collection: ecInfo.Collection,
ShardBits: ecInfo.ShardBits.Minus(other.ShardBits),
ShardsInfo: ecInfo.ShardsInfo.Minus(other.ShardsInfo),
DiskType: ecInfo.DiskType,
DiskId: ecInfo.DiskId,
ExpireAtSec: ecInfo.ExpireAtSec,
}
// Initialize optimized ShardSizes for the result
ret.ensureShardSizesInitialized()
// Copy shard sizes for remaining shards
retIndex := 0
for shardId := ShardId(0); shardId < ShardId(MaxShardCount) && retIndex < len(ret.ShardSizes); shardId++ {
if ret.ShardBits.HasShardId(shardId) {
if size, exists := ecInfo.GetShardSize(shardId); exists {
ret.ShardSizes[retIndex] = size
}
retIndex++
}
}
return ret
}
func (ecInfo *EcVolumeInfo) ToVolumeEcShardInformationMessage() (ret *master_pb.VolumeEcShardInformationMessage) {
t := &master_pb.VolumeEcShardInformationMessage{
Id: uint32(ecInfo.VolumeId),
EcIndexBits: uint32(ecInfo.ShardBits),
Collection: ecInfo.Collection,
DiskType: ecInfo.DiskType,
ExpireAtSec: ecInfo.ExpireAtSec,
DiskId: ecInfo.DiskId,
}
// Directly set the optimized ShardSizes
t.ShardSizes = make([]int64, len(ecInfo.ShardSizes))
copy(t.ShardSizes, ecInfo.ShardSizes)
return t
}
type ShardBits uint32 // use bits to indicate the shard id, use 32 bits just for possible future extension
func (b ShardBits) AddShardId(id ShardId) ShardBits {
if id >= MaxShardCount {
return b // Reject out-of-range shard IDs
}
return b | (1 << id)
}
func (b ShardBits) RemoveShardId(id ShardId) ShardBits {
if id >= MaxShardCount {
return b // Reject out-of-range shard IDs
}
return b &^ (1 << id)
}
func (b ShardBits) HasShardId(id ShardId) bool {
if id >= MaxShardCount {
return false // Out-of-range shard IDs are never present
}
return b&(1<<id) > 0
}
func (b ShardBits) ShardIds() (ret []ShardId) {
for i := ShardId(0); i < ShardId(MaxShardCount); i++ {
if b.HasShardId(i) {
ret = append(ret, i)
}
}
return
}
func (b ShardBits) ToUint32Slice() (ret []uint32) {
for i := uint32(0); i < uint32(MaxShardCount); i++ {
if b.HasShardId(ShardId(i)) {
ret = append(ret, i)
}
}
return
}
func (b ShardBits) ShardIdCount() (count int) {
for count = 0; b > 0; count++ {
b &= b - 1
}
return
}
func (b ShardBits) Minus(other ShardBits) ShardBits {
return b &^ other
}
func (b ShardBits) Plus(other ShardBits) ShardBits {
return b | other
}
func (b ShardBits) MinusParityShards() ShardBits {
// Removes parity shards from the bit mask
// Assumes default 10+4 EC layout where parity shards are IDs 10-13
for i := DataShardsCount; i < TotalShardsCount; i++ {
b = b.RemoveShardId(ShardId(i))
}
return b
}
// ShardIdToIndex converts a shard ID to its index position in the ShardSizes slice
// Returns the index and true if the shard is present, -1 and false if not present
func (b ShardBits) ShardIdToIndex(shardId ShardId) (index int, found bool) {
if !b.HasShardId(shardId) {
return -1, false
}
// Create a mask for bits before the shardId
mask := uint32((1 << shardId) - 1)
// Count set bits before the shardId using efficient bit manipulation
index = bits.OnesCount32(uint32(b) & mask)
return index, true
}
// EachSetIndex iterates over all set shard IDs and calls the provided function for each
// This is highly efficient using bit manipulation - only iterates over actual set bits
func (b ShardBits) EachSetIndex(fn func(shardId ShardId)) {
bitsValue := uint32(b)
for bitsValue != 0 {
// Find the position of the least significant set bit
shardId := ShardId(bits.TrailingZeros32(bitsValue))
fn(shardId)
// Clear the least significant set bit
bitsValue &= bitsValue - 1
func (evi *EcVolumeInfo) ToVolumeEcShardInformationMessage() (ret *master_pb.VolumeEcShardInformationMessage) {
return &master_pb.VolumeEcShardInformationMessage{
Id: uint32(evi.VolumeId),
EcIndexBits: evi.ShardsInfo.Bitmap(),
ShardSizes: evi.ShardsInfo.SizesInt64(),
Collection: evi.Collection,
DiskType: evi.DiskType,
ExpireAtSec: evi.ExpireAtSec,
DiskId: evi.DiskId,
}
}
// IndexToShardId converts an index position in ShardSizes slice to the corresponding shard ID
// Returns the shard ID and true if valid index, -1 and false if invalid index
func (b ShardBits) IndexToShardId(index int) (shardId ShardId, found bool) {
if index < 0 {
return 0, false
}
currentIndex := 0
for i := ShardId(0); i < ShardId(MaxShardCount); i++ {
if b.HasShardId(i) {
if currentIndex == index {
return i, true
}
currentIndex++
}
}
return 0, false // index out of range
}
// Helper methods for EcVolumeInfo to manage the optimized ShardSizes slice
func (ecInfo *EcVolumeInfo) ensureShardSizesInitialized() {
expectedLength := ecInfo.ShardBits.ShardIdCount()
if ecInfo.ShardSizes == nil {
ecInfo.ShardSizes = make([]int64, expectedLength)
} else if len(ecInfo.ShardSizes) != expectedLength {
// Resize and preserve existing data
ecInfo.resizeShardSizes(ecInfo.ShardBits)
}
}
func (ecInfo *EcVolumeInfo) resizeShardSizes(prevShardBits ShardBits) {
expectedLength := ecInfo.ShardBits.ShardIdCount()
newSizes := make([]int64, expectedLength)
// Copy existing sizes to new positions based on current ShardBits
if len(ecInfo.ShardSizes) > 0 {
newIndex := 0
for shardId := ShardId(0); shardId < ShardId(MaxShardCount) && newIndex < expectedLength; shardId++ {
if ecInfo.ShardBits.HasShardId(shardId) {
// Try to find the size for this shard in the old array using previous ShardBits
if oldIndex, found := prevShardBits.ShardIdToIndex(shardId); found && oldIndex < len(ecInfo.ShardSizes) {
newSizes[newIndex] = ecInfo.ShardSizes[oldIndex]
}
newIndex++
}
}
}
ecInfo.ShardSizes = newSizes
}