Fix reporting of EC shard sizes from nodes to masters. (#7835)

SeaweedFS tracks EC shard sizes on topology data stuctures, but this information is never
relayed to master servers :( The end result is that commands reporting disk usage, such
as `volume.list` and `cluster.status`, yield incorrect figures when EC shards are present.

As an example for a simple 5-node test cluster, before...

```
> volume.list
Topology volumeSizeLimit:30000 MB hdd(volume:6/40 active:6 free:33 remote:0)
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9001 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:3  size:88967096  file_count:172  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[1 5]
        Disk hdd total size:88967096 file_count:172
      DataNode 192.168.10.111:9001 total size:88967096 file_count:172
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9002 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:77267536  file_count:166  replica_placement:2  version:3  modified_at_second:1766349617
          volume id:3  size:88967096  file_count:172  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[0 4]
        Disk hdd total size:166234632 file_count:338
      DataNode 192.168.10.111:9002 total size:166234632 file_count:338
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9003 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:2  size:77267536  file_count:166  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[2 6]
        Disk hdd total size:77267536 file_count:166
      DataNode 192.168.10.111:9003 total size:77267536 file_count:166
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9004 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:77267536  file_count:166  replica_placement:2  version:3  modified_at_second:1766349617
          volume id:3  size:88967096  file_count:172  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[3 7]
        Disk hdd total size:166234632 file_count:338
      DataNode 192.168.10.111:9004 total size:166234632 file_count:338
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9005 hdd(volume:0/8 active:0 free:8 remote:0)
        Disk hdd(volume:0/8 active:0 free:8 remote:0) id:0
          ec volume id:1 collection: shards:[8 9 10 11 12 13]
        Disk hdd total size:0 file_count:0
    Rack DefaultRack total size:498703896 file_count:1014
  DataCenter DefaultDataCenter total size:498703896 file_count:1014
total size:498703896 file_count:1014
```

...and after:

```
> volume.list
Topology volumeSizeLimit:30000 MB hdd(volume:6/40 active:6 free:33 remote:0)
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9001 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:2  size:81761800  file_count:161  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[1 5 9] sizes:[1:8.00 MiB 5:8.00 MiB 9:8.00 MiB] total:24.00 MiB
        Disk hdd total size:81761800 file_count:161
      DataNode 192.168.10.111:9001 total size:81761800 file_count:161
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9002 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:3  size:88678712  file_count:170  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[11 12 13] sizes:[11:8.00 MiB 12:8.00 MiB 13:8.00 MiB] total:24.00 MiB
        Disk hdd total size:88678712 file_count:170
      DataNode 192.168.10.111:9002 total size:88678712 file_count:170
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9003 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:81761800  file_count:161  replica_placement:2  version:3  modified_at_second:1766349495
          volume id:3  size:88678712  file_count:170  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[0 4 8] sizes:[0:8.00 MiB 4:8.00 MiB 8:8.00 MiB] total:24.00 MiB
        Disk hdd total size:170440512 file_count:331
      DataNode 192.168.10.111:9003 total size:170440512 file_count:331
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9004 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:81761800  file_count:161  replica_placement:2  version:3  modified_at_second:1766349495
          volume id:3  size:88678712  file_count:170  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[2 6 10] sizes:[2:8.00 MiB 6:8.00 MiB 10:8.00 MiB] total:24.00 MiB
        Disk hdd total size:170440512 file_count:331
      DataNode 192.168.10.111:9004 total size:170440512 file_count:331
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9005 hdd(volume:0/8 active:0 free:8 remote:0)
        Disk hdd(volume:0/8 active:0 free:8 remote:0) id:0
          ec volume id:1 collection: shards:[3 7] sizes:[3:8.00 MiB 7:8.00 MiB] total:16.00 MiB
        Disk hdd total size:0 file_count:0
    Rack DefaultRack total size:511321536 file_count:993
  DataCenter DefaultDataCenter total size:511321536 file_count:993
total size:511321536 file_count:993
```
This commit is contained in:
Lisandro Pin
2025-12-29 04:30:42 +01:00
committed by GitHub
parent 2b529e310d
commit 6b98b52acc
28 changed files with 801 additions and 773 deletions

View File

@@ -15,6 +15,33 @@ import (
type ShardId uint8
// Converts a slice of uint32s to ShardId.
func Uint32ToShardIds(ids []uint32) []ShardId {
res := make([]ShardId, len(ids))
for i, id := range ids {
res[i] = ShardId(id)
}
return res
}
// Converts a slice of ShardIds to uint32
func ShardIdsToUint32(ids []ShardId) []uint32 {
res := make([]uint32, len(ids))
for i, id := range ids {
res[i] = uint32(id)
}
return res
}
// Returns a slice of all possible ShardIds.
func AllShardIds() []ShardId {
res := make([]ShardId, TotalShardsCount)
for i := range res {
res[i] = ShardId(i)
}
return res
}
type EcVolumeShard struct {
VolumeId needle.VolumeId
ShardId ShardId

View File

@@ -1,68 +0,0 @@
package erasure_coding
import (
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
)
// GetShardSize returns the size of a specific shard from VolumeEcShardInformationMessage
// Returns the size and true if the shard exists, 0 and false if not present
func GetShardSize(msg *master_pb.VolumeEcShardInformationMessage, shardId ShardId) (size int64, found bool) {
if msg == nil || msg.ShardSizes == nil {
return 0, false
}
shardBits := ShardBits(msg.EcIndexBits)
index, found := shardBits.ShardIdToIndex(shardId)
if !found || index >= len(msg.ShardSizes) {
return 0, false
}
return msg.ShardSizes[index], true
}
// SetShardSize sets the size of a specific shard in VolumeEcShardInformationMessage
// Returns true if successful, false if the shard is not present in EcIndexBits
func SetShardSize(msg *master_pb.VolumeEcShardInformationMessage, shardId ShardId, size int64) bool {
if msg == nil {
return false
}
shardBits := ShardBits(msg.EcIndexBits)
index, found := shardBits.ShardIdToIndex(shardId)
if !found {
return false
}
// Initialize ShardSizes slice if needed
expectedLength := shardBits.ShardIdCount()
if msg.ShardSizes == nil {
msg.ShardSizes = make([]int64, expectedLength)
} else if len(msg.ShardSizes) != expectedLength {
// Resize the slice to match the expected length
newSizes := make([]int64, expectedLength)
copy(newSizes, msg.ShardSizes)
msg.ShardSizes = newSizes
}
if index >= len(msg.ShardSizes) {
return false
}
msg.ShardSizes[index] = size
return true
}
// InitializeShardSizes initializes the ShardSizes slice based on EcIndexBits
// This ensures the slice has the correct length for all present shards
func InitializeShardSizes(msg *master_pb.VolumeEcShardInformationMessage) {
if msg == nil {
return
}
shardBits := ShardBits(msg.EcIndexBits)
expectedLength := shardBits.ShardIdCount()
if msg.ShardSizes == nil || len(msg.ShardSizes) != expectedLength {
msg.ShardSizes = make([]int64, expectedLength)
}
}

View File

@@ -1,117 +0,0 @@
package erasure_coding
import (
"testing"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
)
func TestShardSizeHelpers(t *testing.T) {
// Create a message with shards 0, 2, and 5 present (EcIndexBits = 0b100101 = 37)
msg := &master_pb.VolumeEcShardInformationMessage{
Id: 123,
EcIndexBits: 37, // Binary: 100101, shards 0, 2, 5 are present
}
// Test SetShardSize
if !SetShardSize(msg, 0, 1000) {
t.Error("Failed to set size for shard 0")
}
if !SetShardSize(msg, 2, 2000) {
t.Error("Failed to set size for shard 2")
}
if !SetShardSize(msg, 5, 5000) {
t.Error("Failed to set size for shard 5")
}
// Test setting size for non-present shard should fail
if SetShardSize(msg, 1, 1500) {
t.Error("Should not be able to set size for non-present shard 1")
}
// Verify ShardSizes slice has correct length (3 shards)
if len(msg.ShardSizes) != 3 {
t.Errorf("Expected ShardSizes length 3, got %d", len(msg.ShardSizes))
}
// Test GetShardSize
if size, found := GetShardSize(msg, 0); !found || size != 1000 {
t.Errorf("Expected shard 0 size 1000, got %d (found: %v)", size, found)
}
if size, found := GetShardSize(msg, 2); !found || size != 2000 {
t.Errorf("Expected shard 2 size 2000, got %d (found: %v)", size, found)
}
if size, found := GetShardSize(msg, 5); !found || size != 5000 {
t.Errorf("Expected shard 5 size 5000, got %d (found: %v)", size, found)
}
// Test getting size for non-present shard
if size, found := GetShardSize(msg, 1); found {
t.Errorf("Should not find shard 1, but got size %d", size)
}
// Test direct slice access
if len(msg.ShardSizes) != 3 {
t.Errorf("Expected 3 shard sizes in slice, got %d", len(msg.ShardSizes))
}
expectedSizes := []int64{1000, 2000, 5000} // Ordered by shard ID: 0, 2, 5
for i, expectedSize := range expectedSizes {
if i < len(msg.ShardSizes) && msg.ShardSizes[i] != expectedSize {
t.Errorf("Expected ShardSizes[%d] = %d, got %d", i, expectedSize, msg.ShardSizes[i])
}
}
}
func TestShardBitsHelpers(t *testing.T) {
// Test with EcIndexBits = 37 (binary: 100101, shards 0, 2, 5)
shardBits := ShardBits(37)
// Test ShardIdToIndex
if index, found := shardBits.ShardIdToIndex(0); !found || index != 0 {
t.Errorf("Expected shard 0 at index 0, got %d (found: %v)", index, found)
}
if index, found := shardBits.ShardIdToIndex(2); !found || index != 1 {
t.Errorf("Expected shard 2 at index 1, got %d (found: %v)", index, found)
}
if index, found := shardBits.ShardIdToIndex(5); !found || index != 2 {
t.Errorf("Expected shard 5 at index 2, got %d (found: %v)", index, found)
}
// Test for non-present shard
if index, found := shardBits.ShardIdToIndex(1); found {
t.Errorf("Should not find shard 1, but got index %d", index)
}
// Test IndexToShardId
if shardId, found := shardBits.IndexToShardId(0); !found || shardId != 0 {
t.Errorf("Expected index 0 to be shard 0, got %d (found: %v)", shardId, found)
}
if shardId, found := shardBits.IndexToShardId(1); !found || shardId != 2 {
t.Errorf("Expected index 1 to be shard 2, got %d (found: %v)", shardId, found)
}
if shardId, found := shardBits.IndexToShardId(2); !found || shardId != 5 {
t.Errorf("Expected index 2 to be shard 5, got %d (found: %v)", shardId, found)
}
// Test for invalid index
if shardId, found := shardBits.IndexToShardId(3); found {
t.Errorf("Should not find shard for index 3, but got shard %d", shardId)
}
// Test EachSetIndex
var collectedShards []ShardId
shardBits.EachSetIndex(func(shardId ShardId) {
collectedShards = append(collectedShards, shardId)
})
expectedShards := []ShardId{0, 2, 5}
if len(collectedShards) != len(expectedShards) {
t.Errorf("Expected EachSetIndex to collect %v, got %v", expectedShards, collectedShards)
}
for i, expected := range expectedShards {
if i >= len(collectedShards) || collectedShards[i] != expected {
t.Errorf("Expected EachSetIndex to collect %v, got %v", expectedShards, collectedShards)
break
}
}
}

View File

@@ -3,7 +3,6 @@ package erasure_coding
import (
"errors"
"fmt"
"math"
"os"
"slices"
"sync"
@@ -185,7 +184,6 @@ func (ev *EcVolume) Sync() {
}
func (ev *EcVolume) Destroy() {
ev.Close()
for _, s := range ev.Shards {
@@ -240,29 +238,12 @@ func (ev *EcVolume) ShardIdList() (shardIds []ShardId) {
return
}
type ShardInfo struct {
ShardId ShardId
Size uint64
}
func (ev *EcVolume) ShardDetails() (shards []ShardInfo) {
for _, s := range ev.Shards {
shardSize := s.Size()
if shardSize >= 0 {
shards = append(shards, ShardInfo{
ShardId: s.ShardId,
Size: uint64(shardSize),
})
}
}
return
}
func (ev *EcVolume) ToVolumeEcShardInformationMessage(diskId uint32) (messages []*master_pb.VolumeEcShardInformationMessage) {
prevVolumeId := needle.VolumeId(math.MaxUint32)
var m *master_pb.VolumeEcShardInformationMessage
ecInfoPerVolume := map[needle.VolumeId]*master_pb.VolumeEcShardInformationMessage{}
for _, s := range ev.Shards {
if s.VolumeId != prevVolumeId {
m, ok := ecInfoPerVolume[s.VolumeId]
if !ok {
m = &master_pb.VolumeEcShardInformationMessage{
Id: uint32(s.VolumeId),
Collection: s.Collection,
@@ -270,13 +251,18 @@ func (ev *EcVolume) ToVolumeEcShardInformationMessage(diskId uint32) (messages [
ExpireAtSec: ev.ExpireAtSec,
DiskId: diskId,
}
messages = append(messages, m)
ecInfoPerVolume[s.VolumeId] = m
}
prevVolumeId = s.VolumeId
m.EcIndexBits = uint32(ShardBits(m.EcIndexBits).AddShardId(s.ShardId))
// Add shard size information using the optimized format
SetShardSize(m, s.ShardId, s.Size())
// Update EC shard bits and sizes.
si := ShardsInfoFromVolumeEcShardInformationMessage(m)
si.Set(s.ShardId, ShardSize(s.Size()))
m.EcIndexBits = uint32(si.Bitmap())
m.ShardSizes = si.SizesInt64()
}
for _, m := range ecInfoPerVolume {
messages = append(messages, m)
}
return
}

View File

@@ -1,260 +1,294 @@
package erasure_coding
import (
"math/bits"
"fmt"
"sort"
"github.com/dustin/go-humanize"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
)
// ShardsInfo encapsulates information for EC shards
type ShardSize int64
type ShardInfo struct {
Id ShardId
Size ShardSize
}
type ShardsInfo struct {
shards map[ShardId]*ShardInfo
}
func NewShardsInfo() *ShardsInfo {
return &ShardsInfo{
shards: map[ShardId]*ShardInfo{},
}
}
// Initializes a ShardsInfo from a ECVolume.
func ShardsInfoFromVolume(ev *EcVolume) *ShardsInfo {
res := &ShardsInfo{
shards: map[ShardId]*ShardInfo{},
}
for _, s := range ev.Shards {
res.Set(s.ShardId, ShardSize(s.Size()))
}
return res
}
// Initializes a ShardsInfo from a VolumeEcShardInformationMessage proto.
func ShardsInfoFromVolumeEcShardInformationMessage(vi *master_pb.VolumeEcShardInformationMessage) *ShardsInfo {
res := NewShardsInfo()
if vi == nil {
return res
}
var id ShardId
var j int
for bitmap := vi.EcIndexBits; bitmap != 0; bitmap >>= 1 {
if bitmap&1 != 0 {
var size ShardSize
if j < len(vi.ShardSizes) {
size = ShardSize(vi.ShardSizes[j])
}
j++
res.shards[id] = &ShardInfo{
Id: id,
Size: size,
}
}
id++
}
return res
}
// Returns a count of shards from a VolumeEcShardInformationMessage proto.
func ShardsCountFromVolumeEcShardInformationMessage(vi *master_pb.VolumeEcShardInformationMessage) int {
if vi == nil {
return 0
}
return ShardsInfoFromVolumeEcShardInformationMessage(vi).Count()
}
// Returns a string representation for a ShardsInfo.
func (sp *ShardsInfo) String() string {
var res string
ids := sp.Ids()
for i, id := range sp.Ids() {
res += fmt.Sprintf("%d:%s", id, humanize.Bytes(uint64(sp.shards[id].Size)))
if i < len(ids)-1 {
res += " "
}
}
return res
}
// AsSlice converts a ShardsInfo to a slice of ShardInfo structs, ordered by shard ID.
func (si *ShardsInfo) AsSlice() []*ShardInfo {
res := make([]*ShardInfo, len(si.shards))
i := 0
for _, id := range si.Ids() {
res[i] = si.shards[id]
i++
}
return res
}
// Count returns the number of EC shards.
func (si *ShardsInfo) Count() int {
return len(si.shards)
}
// Has verifies if a shard ID is present.
func (si *ShardsInfo) Has(id ShardId) bool {
_, ok := si.shards[id]
return ok
}
// Ids returns a list of shard IDs, in ascending order.
func (si *ShardsInfo) Ids() []ShardId {
ids := []ShardId{}
for id := range si.shards {
ids = append(ids, id)
}
sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] })
return ids
}
// IdsInt returns a list of shards ID as int, in ascending order.
func (si *ShardsInfo) IdsInt() []int {
ids := si.Ids()
res := make([]int, len(ids))
for i, id := range ids {
res[i] = int(id)
}
return res
}
// Ids returns a list of shards ID as uint32, in ascending order.
func (si *ShardsInfo) IdsUint32() []uint32 {
return ShardIdsToUint32(si.Ids())
}
// Set sets the size for a given shard ID.
func (si *ShardsInfo) Set(id ShardId, size ShardSize) {
if id >= MaxShardCount {
return
}
si.shards[id] = &ShardInfo{
Id: id,
Size: size,
}
}
// Delete deletes a shard by ID.
func (si *ShardsInfo) Delete(id ShardId) {
if id >= MaxShardCount {
return
}
if _, ok := si.shards[id]; ok {
delete(si.shards, id)
}
}
// Bitmap returns a bitmap for all existing shard IDs (bit 0 = shard #0... bit 31 = shard #31), in little endian.
func (si *ShardsInfo) Bitmap() uint32 {
var bits uint32
for id := range si.shards {
bits |= (1 << id)
}
return bits
}
// Size returns the size of a given shard ID, if present.
func (si *ShardsInfo) Size(id ShardId) ShardSize {
if s, ok := si.shards[id]; ok {
return s.Size
}
return 0
}
// TotalSize returns the size for all shards.
func (si *ShardsInfo) TotalSize() ShardSize {
var total ShardSize
for _, s := range si.shards {
total += s.Size
}
return total
}
// Sizes returns a compact slice of present shard sizes, from first to last.
func (si *ShardsInfo) Sizes() []ShardSize {
ids := si.Ids()
res := make([]ShardSize, len(ids))
if len(res) != 0 {
var i int
for _, id := range ids {
res[i] = si.shards[id].Size
i++
}
}
return res
}
// SizesInt64 returns a compact slice of present shard sizes, from first to last, as int64.
func (si *ShardsInfo) SizesInt64() []int64 {
res := make([]int64, si.Count())
for i, s := range si.Sizes() {
res[i] = int64(s)
}
return res
}
// Copy creates a copy of a ShardInfo.
func (si *ShardsInfo) Copy() *ShardsInfo {
new := NewShardsInfo()
for _, s := range si.shards {
new.Set(s.Id, s.Size)
}
return new
}
// DeleteParityShards removes party shards from a ShardInfo.
// Assumes default 10+4 EC layout where parity shards are IDs 10-13.
func (si *ShardsInfo) DeleteParityShards() {
for id := DataShardsCount; id < TotalShardsCount; id++ {
si.Delete(ShardId(id))
}
}
// MinusParityShards creates a ShardInfo copy, but with parity shards removed.
func (si *ShardsInfo) MinusParityShards() *ShardsInfo {
new := si.Copy()
new.DeleteParityShards()
return new
}
// Add merges all shards from another ShardInfo into this one.
func (si *ShardsInfo) Add(other *ShardsInfo) {
for _, s := range other.shards {
si.Set(s.Id, s.Size)
}
}
// Subtract removes all shards present on another ShardInfo.
func (si *ShardsInfo) Subtract(other *ShardsInfo) {
for _, s := range other.shards {
si.Delete(s.Id)
}
}
// Plus returns a new ShardInfo consisting of (this + other).
func (si *ShardsInfo) Plus(other *ShardsInfo) *ShardsInfo {
new := si.Copy()
new.Add(other)
return new
}
// Minus returns a new ShardInfo consisting of (this - other).
func (si *ShardsInfo) Minus(other *ShardsInfo) *ShardsInfo {
new := si.Copy()
new.Subtract(other)
return new
}
// data structure used in master
type EcVolumeInfo struct {
VolumeId needle.VolumeId
Collection string
ShardBits ShardBits
DiskType string
DiskId uint32 // ID of the disk this EC volume is on
ExpireAtSec uint64 // ec volume destroy time, calculated from the ec volume was created
ShardSizes []int64 // optimized: sizes for shards in order of set bits in ShardBits
}
func (ecInfo *EcVolumeInfo) AddShardId(id ShardId) {
oldBits := ecInfo.ShardBits
ecInfo.ShardBits = ecInfo.ShardBits.AddShardId(id)
// If shard was actually added, resize ShardSizes array
if oldBits != ecInfo.ShardBits {
ecInfo.resizeShardSizes(oldBits)
}
}
func (ecInfo *EcVolumeInfo) RemoveShardId(id ShardId) {
oldBits := ecInfo.ShardBits
ecInfo.ShardBits = ecInfo.ShardBits.RemoveShardId(id)
// If shard was actually removed, resize ShardSizes array
if oldBits != ecInfo.ShardBits {
ecInfo.resizeShardSizes(oldBits)
}
}
func (ecInfo *EcVolumeInfo) SetShardSize(id ShardId, size int64) {
ecInfo.ensureShardSizesInitialized()
if index, found := ecInfo.ShardBits.ShardIdToIndex(id); found && index < len(ecInfo.ShardSizes) {
ecInfo.ShardSizes[index] = size
}
}
func (ecInfo *EcVolumeInfo) GetShardSize(id ShardId) (int64, bool) {
if index, found := ecInfo.ShardBits.ShardIdToIndex(id); found && index < len(ecInfo.ShardSizes) {
return ecInfo.ShardSizes[index], true
}
return 0, false
}
func (ecInfo *EcVolumeInfo) GetTotalSize() int64 {
var total int64
for _, size := range ecInfo.ShardSizes {
total += size
}
return total
}
func (ecInfo *EcVolumeInfo) HasShardId(id ShardId) bool {
return ecInfo.ShardBits.HasShardId(id)
}
func (ecInfo *EcVolumeInfo) ShardIds() (ret []ShardId) {
return ecInfo.ShardBits.ShardIds()
}
func (ecInfo *EcVolumeInfo) ShardIdCount() (count int) {
return ecInfo.ShardBits.ShardIdCount()
DiskId uint32 // ID of the disk this EC volume is on
ExpireAtSec uint64 // ec volume destroy time, calculated from the ec volume was created
ShardsInfo *ShardsInfo
}
func (ecInfo *EcVolumeInfo) Minus(other *EcVolumeInfo) *EcVolumeInfo {
ret := &EcVolumeInfo{
return &EcVolumeInfo{
VolumeId: ecInfo.VolumeId,
Collection: ecInfo.Collection,
ShardBits: ecInfo.ShardBits.Minus(other.ShardBits),
ShardsInfo: ecInfo.ShardsInfo.Minus(other.ShardsInfo),
DiskType: ecInfo.DiskType,
DiskId: ecInfo.DiskId,
ExpireAtSec: ecInfo.ExpireAtSec,
}
// Initialize optimized ShardSizes for the result
ret.ensureShardSizesInitialized()
// Copy shard sizes for remaining shards
retIndex := 0
for shardId := ShardId(0); shardId < ShardId(MaxShardCount) && retIndex < len(ret.ShardSizes); shardId++ {
if ret.ShardBits.HasShardId(shardId) {
if size, exists := ecInfo.GetShardSize(shardId); exists {
ret.ShardSizes[retIndex] = size
}
retIndex++
}
}
return ret
}
func (ecInfo *EcVolumeInfo) ToVolumeEcShardInformationMessage() (ret *master_pb.VolumeEcShardInformationMessage) {
t := &master_pb.VolumeEcShardInformationMessage{
Id: uint32(ecInfo.VolumeId),
EcIndexBits: uint32(ecInfo.ShardBits),
Collection: ecInfo.Collection,
DiskType: ecInfo.DiskType,
ExpireAtSec: ecInfo.ExpireAtSec,
DiskId: ecInfo.DiskId,
}
// Directly set the optimized ShardSizes
t.ShardSizes = make([]int64, len(ecInfo.ShardSizes))
copy(t.ShardSizes, ecInfo.ShardSizes)
return t
}
type ShardBits uint32 // use bits to indicate the shard id, use 32 bits just for possible future extension
func (b ShardBits) AddShardId(id ShardId) ShardBits {
if id >= MaxShardCount {
return b // Reject out-of-range shard IDs
}
return b | (1 << id)
}
func (b ShardBits) RemoveShardId(id ShardId) ShardBits {
if id >= MaxShardCount {
return b // Reject out-of-range shard IDs
}
return b &^ (1 << id)
}
func (b ShardBits) HasShardId(id ShardId) bool {
if id >= MaxShardCount {
return false // Out-of-range shard IDs are never present
}
return b&(1<<id) > 0
}
func (b ShardBits) ShardIds() (ret []ShardId) {
for i := ShardId(0); i < ShardId(MaxShardCount); i++ {
if b.HasShardId(i) {
ret = append(ret, i)
}
}
return
}
func (b ShardBits) ToUint32Slice() (ret []uint32) {
for i := uint32(0); i < uint32(MaxShardCount); i++ {
if b.HasShardId(ShardId(i)) {
ret = append(ret, i)
}
}
return
}
func (b ShardBits) ShardIdCount() (count int) {
for count = 0; b > 0; count++ {
b &= b - 1
}
return
}
func (b ShardBits) Minus(other ShardBits) ShardBits {
return b &^ other
}
func (b ShardBits) Plus(other ShardBits) ShardBits {
return b | other
}
func (b ShardBits) MinusParityShards() ShardBits {
// Removes parity shards from the bit mask
// Assumes default 10+4 EC layout where parity shards are IDs 10-13
for i := DataShardsCount; i < TotalShardsCount; i++ {
b = b.RemoveShardId(ShardId(i))
}
return b
}
// ShardIdToIndex converts a shard ID to its index position in the ShardSizes slice
// Returns the index and true if the shard is present, -1 and false if not present
func (b ShardBits) ShardIdToIndex(shardId ShardId) (index int, found bool) {
if !b.HasShardId(shardId) {
return -1, false
}
// Create a mask for bits before the shardId
mask := uint32((1 << shardId) - 1)
// Count set bits before the shardId using efficient bit manipulation
index = bits.OnesCount32(uint32(b) & mask)
return index, true
}
// EachSetIndex iterates over all set shard IDs and calls the provided function for each
// This is highly efficient using bit manipulation - only iterates over actual set bits
func (b ShardBits) EachSetIndex(fn func(shardId ShardId)) {
bitsValue := uint32(b)
for bitsValue != 0 {
// Find the position of the least significant set bit
shardId := ShardId(bits.TrailingZeros32(bitsValue))
fn(shardId)
// Clear the least significant set bit
bitsValue &= bitsValue - 1
func (evi *EcVolumeInfo) ToVolumeEcShardInformationMessage() (ret *master_pb.VolumeEcShardInformationMessage) {
return &master_pb.VolumeEcShardInformationMessage{
Id: uint32(evi.VolumeId),
EcIndexBits: evi.ShardsInfo.Bitmap(),
ShardSizes: evi.ShardsInfo.SizesInt64(),
Collection: evi.Collection,
DiskType: evi.DiskType,
ExpireAtSec: evi.ExpireAtSec,
DiskId: evi.DiskId,
}
}
// IndexToShardId converts an index position in ShardSizes slice to the corresponding shard ID
// Returns the shard ID and true if valid index, -1 and false if invalid index
func (b ShardBits) IndexToShardId(index int) (shardId ShardId, found bool) {
if index < 0 {
return 0, false
}
currentIndex := 0
for i := ShardId(0); i < ShardId(MaxShardCount); i++ {
if b.HasShardId(i) {
if currentIndex == index {
return i, true
}
currentIndex++
}
}
return 0, false // index out of range
}
// Helper methods for EcVolumeInfo to manage the optimized ShardSizes slice
func (ecInfo *EcVolumeInfo) ensureShardSizesInitialized() {
expectedLength := ecInfo.ShardBits.ShardIdCount()
if ecInfo.ShardSizes == nil {
ecInfo.ShardSizes = make([]int64, expectedLength)
} else if len(ecInfo.ShardSizes) != expectedLength {
// Resize and preserve existing data
ecInfo.resizeShardSizes(ecInfo.ShardBits)
}
}
func (ecInfo *EcVolumeInfo) resizeShardSizes(prevShardBits ShardBits) {
expectedLength := ecInfo.ShardBits.ShardIdCount()
newSizes := make([]int64, expectedLength)
// Copy existing sizes to new positions based on current ShardBits
if len(ecInfo.ShardSizes) > 0 {
newIndex := 0
for shardId := ShardId(0); shardId < ShardId(MaxShardCount) && newIndex < expectedLength; shardId++ {
if ecInfo.ShardBits.HasShardId(shardId) {
// Try to find the size for this shard in the old array using previous ShardBits
if oldIndex, found := prevShardBits.ShardIdToIndex(shardId); found && oldIndex < len(ecInfo.ShardSizes) {
newSizes[newIndex] = ecInfo.ShardSizes[oldIndex]
}
newIndex++
}
}
}
ecInfo.ShardSizes = newSizes
}

View File

@@ -0,0 +1,173 @@
package erasure_coding_test
import (
"reflect"
"testing"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
erasure_coding "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
)
func TestShardsInfoDeleteParityShards(t *testing.T) {
si := erasure_coding.NewShardsInfo()
for _, id := range erasure_coding.AllShardIds() {
si.Set(id, 123)
}
si.DeleteParityShards()
if got, want := si.String(), "0:123 B 1:123 B 2:123 B 3:123 B 4:123 B 5:123 B 6:123 B 7:123 B 8:123 B 9:123 B"; got != want {
t.Errorf("expected %q, got %q", want, got)
}
}
func TestShardsInfoAsSlice(t *testing.T) {
si := erasure_coding.NewShardsInfo()
si.Set(5, 555)
si.Set(2, 222)
si.Set(7, 777)
si.Set(1, 111)
want := []*erasure_coding.ShardInfo{
&erasure_coding.ShardInfo{Id: 1, Size: 111},
&erasure_coding.ShardInfo{Id: 2, Size: 222},
&erasure_coding.ShardInfo{Id: 5, Size: 555},
&erasure_coding.ShardInfo{Id: 7, Size: 777},
}
if got := si.AsSlice(); !reflect.DeepEqual(got, want) {
t.Errorf("expected %v, got %v", want, got)
}
}
func TestShardsInfoSerialize(t *testing.T) {
testCases := []struct {
name string
shardIds map[erasure_coding.ShardId]erasure_coding.ShardSize
wantBits uint32
wantSizes []erasure_coding.ShardSize
}{
{
name: "no bits",
shardIds: nil,
wantBits: 0b0,
wantSizes: []erasure_coding.ShardSize{},
},
{
name: "single shard, first",
shardIds: map[erasure_coding.ShardId]erasure_coding.ShardSize{
0: 2345,
},
wantBits: 0b1,
wantSizes: []erasure_coding.ShardSize{2345},
},
{
name: "single shard, 5th",
shardIds: map[erasure_coding.ShardId]erasure_coding.ShardSize{
4: 6789,
},
wantBits: 0b10000,
wantSizes: []erasure_coding.ShardSize{6789},
},
{
name: "multiple shards",
shardIds: map[erasure_coding.ShardId]erasure_coding.ShardSize{
8: 800,
0: 5,
3: 300,
1: 100,
},
wantBits: 0b100001011,
wantSizes: []erasure_coding.ShardSize{5, 100, 300, 800},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
si := erasure_coding.NewShardsInfo()
for id, size := range tc.shardIds {
si.Set(id, size)
}
if got, want := si.Bitmap(), tc.wantBits; got != want {
t.Errorf("expected bits %v, got %v", want, got)
}
if got, want := si.Sizes(), tc.wantSizes; !reflect.DeepEqual(got, want) {
t.Errorf("expected sizes %v, got %v", want, got)
}
})
}
}
func TestShardsInfoFromVolumeEcShardInformationMessage(t *testing.T) {
testCases := []struct {
name string
ecvInfo *master_pb.VolumeEcShardInformationMessage
want string
}{
{
name: "no msg",
ecvInfo: nil,
want: "",
},
{
name: "no shards",
ecvInfo: &master_pb.VolumeEcShardInformationMessage{},
want: "",
},
{
name: "single shard",
ecvInfo: &master_pb.VolumeEcShardInformationMessage{
EcIndexBits: 0b100,
ShardSizes: []int64{333},
},
want: "2:333 B",
},
{
name: "multiple shards",
ecvInfo: &master_pb.VolumeEcShardInformationMessage{
EcIndexBits: 0b1101,
ShardSizes: []int64{111, 333, 444},
},
want: "0:111 B 2:333 B 3:444 B",
},
{
name: "multiple shards with missing sizes",
ecvInfo: &master_pb.VolumeEcShardInformationMessage{
EcIndexBits: 0b110110,
ShardSizes: []int64{111, 333, 444},
},
want: "1:111 B 2:333 B 4:444 B 5:0 B",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
si := erasure_coding.ShardsInfoFromVolumeEcShardInformationMessage(tc.ecvInfo)
if got, want := si.String(), tc.want; got != want {
t.Errorf("expected %q, got %q", want, got)
}
})
}
}
func TestShardsInfoCombine(t *testing.T) {
a := erasure_coding.NewShardsInfo()
a.Set(1, 111)
a.Set(2, 222)
a.Set(3, 333)
a.Set(4, 444)
a.Set(5, 0)
b := erasure_coding.NewShardsInfo()
b.Set(1, 555)
b.Set(4, 666)
b.Set(5, 777)
b.Set(6, 888)
if got, want := a.Plus(b).String(), "1:555 B 2:222 B 3:333 B 4:666 B 5:777 B 6:888 B"; got != want {
t.Errorf("expected %q for plus, got %q", want, got)
}
if got, want := a.Minus(b).String(), "2:222 B 3:333 B"; got != want {
t.Errorf("expected %q for minus, got %q", want, got)
}
}