Fix reporting of EC shard sizes from nodes to masters. (#7835)
SeaweedFS tracks EC shard sizes on topology data stuctures, but this information is never
relayed to master servers :( The end result is that commands reporting disk usage, such
as `volume.list` and `cluster.status`, yield incorrect figures when EC shards are present.
As an example for a simple 5-node test cluster, before...
```
> volume.list
Topology volumeSizeLimit:30000 MB hdd(volume:6/40 active:6 free:33 remote:0)
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9001 hdd(volume:1/8 active:1 free:7 remote:0)
Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
volume id:3 size:88967096 file_count:172 replica_placement:2 version:3 modified_at_second:1766349617
ec volume id:1 collection: shards:[1 5]
Disk hdd total size:88967096 file_count:172
DataNode 192.168.10.111:9001 total size:88967096 file_count:172
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9002 hdd(volume:2/8 active:2 free:6 remote:0)
Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
volume id:2 size:77267536 file_count:166 replica_placement:2 version:3 modified_at_second:1766349617
volume id:3 size:88967096 file_count:172 replica_placement:2 version:3 modified_at_second:1766349617
ec volume id:1 collection: shards:[0 4]
Disk hdd total size:166234632 file_count:338
DataNode 192.168.10.111:9002 total size:166234632 file_count:338
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9003 hdd(volume:1/8 active:1 free:7 remote:0)
Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
volume id:2 size:77267536 file_count:166 replica_placement:2 version:3 modified_at_second:1766349617
ec volume id:1 collection: shards:[2 6]
Disk hdd total size:77267536 file_count:166
DataNode 192.168.10.111:9003 total size:77267536 file_count:166
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9004 hdd(volume:2/8 active:2 free:6 remote:0)
Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
volume id:2 size:77267536 file_count:166 replica_placement:2 version:3 modified_at_second:1766349617
volume id:3 size:88967096 file_count:172 replica_placement:2 version:3 modified_at_second:1766349617
ec volume id:1 collection: shards:[3 7]
Disk hdd total size:166234632 file_count:338
DataNode 192.168.10.111:9004 total size:166234632 file_count:338
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9005 hdd(volume:0/8 active:0 free:8 remote:0)
Disk hdd(volume:0/8 active:0 free:8 remote:0) id:0
ec volume id:1 collection: shards:[8 9 10 11 12 13]
Disk hdd total size:0 file_count:0
Rack DefaultRack total size:498703896 file_count:1014
DataCenter DefaultDataCenter total size:498703896 file_count:1014
total size:498703896 file_count:1014
```
...and after:
```
> volume.list
Topology volumeSizeLimit:30000 MB hdd(volume:6/40 active:6 free:33 remote:0)
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9001 hdd(volume:1/8 active:1 free:7 remote:0)
Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
volume id:2 size:81761800 file_count:161 replica_placement:2 version:3 modified_at_second:1766349495
ec volume id:1 collection: shards:[1 5 9] sizes:[1:8.00 MiB 5:8.00 MiB 9:8.00 MiB] total:24.00 MiB
Disk hdd total size:81761800 file_count:161
DataNode 192.168.10.111:9001 total size:81761800 file_count:161
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9002 hdd(volume:1/8 active:1 free:7 remote:0)
Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
volume id:3 size:88678712 file_count:170 replica_placement:2 version:3 modified_at_second:1766349495
ec volume id:1 collection: shards:[11 12 13] sizes:[11:8.00 MiB 12:8.00 MiB 13:8.00 MiB] total:24.00 MiB
Disk hdd total size:88678712 file_count:170
DataNode 192.168.10.111:9002 total size:88678712 file_count:170
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9003 hdd(volume:2/8 active:2 free:6 remote:0)
Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
volume id:2 size:81761800 file_count:161 replica_placement:2 version:3 modified_at_second:1766349495
volume id:3 size:88678712 file_count:170 replica_placement:2 version:3 modified_at_second:1766349495
ec volume id:1 collection: shards:[0 4 8] sizes:[0:8.00 MiB 4:8.00 MiB 8:8.00 MiB] total:24.00 MiB
Disk hdd total size:170440512 file_count:331
DataNode 192.168.10.111:9003 total size:170440512 file_count:331
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9004 hdd(volume:2/8 active:2 free:6 remote:0)
Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
volume id:2 size:81761800 file_count:161 replica_placement:2 version:3 modified_at_second:1766349495
volume id:3 size:88678712 file_count:170 replica_placement:2 version:3 modified_at_second:1766349495
ec volume id:1 collection: shards:[2 6 10] sizes:[2:8.00 MiB 6:8.00 MiB 10:8.00 MiB] total:24.00 MiB
Disk hdd total size:170440512 file_count:331
DataNode 192.168.10.111:9004 total size:170440512 file_count:331
DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
DataNode 192.168.10.111:9005 hdd(volume:0/8 active:0 free:8 remote:0)
Disk hdd(volume:0/8 active:0 free:8 remote:0) id:0
ec volume id:1 collection: shards:[3 7] sizes:[3:8.00 MiB 7:8.00 MiB] total:16.00 MiB
Disk hdd total size:0 file_count:0
Rack DefaultRack total size:511321536 file_count:993
DataCenter DefaultDataCenter total size:511321536 file_count:993
total size:511321536 file_count:993
```
This commit is contained in:
@@ -108,12 +108,12 @@ func doEcDecode(commandEnv *CommandEnv, topoInfo *master_pb.TopologyInfo, collec
|
||||
}
|
||||
|
||||
// find volume location
|
||||
nodeToEcIndexBits := collectEcNodeShardBits(topoInfo, vid, diskType)
|
||||
nodeToEcShardsInfo := collectEcNodeShardsInfo(topoInfo, vid, diskType)
|
||||
|
||||
fmt.Printf("ec volume %d shard locations: %+v\n", vid, nodeToEcIndexBits)
|
||||
fmt.Printf("ec volume %d shard locations: %+v\n", vid, nodeToEcShardsInfo)
|
||||
|
||||
// collect ec shards to the server with most space
|
||||
targetNodeLocation, err := collectEcShards(commandEnv, nodeToEcIndexBits, collection, vid)
|
||||
targetNodeLocation, err := collectEcShards(commandEnv, nodeToEcShardsInfo, collection, vid)
|
||||
if err != nil {
|
||||
return fmt.Errorf("collectEcShards for volume %d: %v", vid, err)
|
||||
}
|
||||
@@ -124,13 +124,13 @@ func doEcDecode(commandEnv *CommandEnv, topoInfo *master_pb.TopologyInfo, collec
|
||||
// Special case: if the EC index has no live entries, decoding is a no-op.
|
||||
// Just purge EC shards and return success without generating/mounting an empty volume.
|
||||
if isEcDecodeEmptyVolumeErr(err) {
|
||||
return unmountAndDeleteEcShards(commandEnv.option.GrpcDialOption, collection, nodeToEcIndexBits, vid)
|
||||
return unmountAndDeleteEcShards(commandEnv.option.GrpcDialOption, collection, nodeToEcShardsInfo, vid)
|
||||
}
|
||||
return fmt.Errorf("generate normal volume %d on %s: %v", vid, targetNodeLocation, err)
|
||||
}
|
||||
|
||||
// delete the previous ec shards
|
||||
err = mountVolumeAndDeleteEcShards(commandEnv.option.GrpcDialOption, collection, targetNodeLocation, nodeToEcIndexBits, vid)
|
||||
err = mountVolumeAndDeleteEcShards(commandEnv.option.GrpcDialOption, collection, targetNodeLocation, nodeToEcShardsInfo, vid)
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete ec shards for volume %d: %v", vid, err)
|
||||
}
|
||||
@@ -150,24 +150,24 @@ func isEcDecodeEmptyVolumeErr(err error) bool {
|
||||
return strings.Contains(st.Message(), erasure_coding.EcNoLiveEntriesSubstring)
|
||||
}
|
||||
|
||||
func unmountAndDeleteEcShards(grpcDialOption grpc.DialOption, collection string, nodeToEcIndexBits map[pb.ServerAddress]erasure_coding.ShardBits, vid needle.VolumeId) error {
|
||||
return unmountAndDeleteEcShardsWithPrefix("unmountAndDeleteEcShards", grpcDialOption, collection, nodeToEcIndexBits, vid)
|
||||
func unmountAndDeleteEcShards(grpcDialOption grpc.DialOption, collection string, nodeToShardsInfo map[pb.ServerAddress]*erasure_coding.ShardsInfo, vid needle.VolumeId) error {
|
||||
return unmountAndDeleteEcShardsWithPrefix("unmountAndDeleteEcShards", grpcDialOption, collection, nodeToShardsInfo, vid)
|
||||
}
|
||||
|
||||
func unmountAndDeleteEcShardsWithPrefix(prefix string, grpcDialOption grpc.DialOption, collection string, nodeToEcIndexBits map[pb.ServerAddress]erasure_coding.ShardBits, vid needle.VolumeId) error {
|
||||
ewg := NewErrorWaitGroup(len(nodeToEcIndexBits))
|
||||
func unmountAndDeleteEcShardsWithPrefix(prefix string, grpcDialOption grpc.DialOption, collection string, nodeToShardsInfo map[pb.ServerAddress]*erasure_coding.ShardsInfo, vid needle.VolumeId) error {
|
||||
ewg := NewErrorWaitGroup(len(nodeToShardsInfo))
|
||||
|
||||
// unmount and delete ec shards in parallel (one goroutine per location)
|
||||
for location, ecIndexBits := range nodeToEcIndexBits {
|
||||
location, ecIndexBits := location, ecIndexBits // capture loop variables for goroutine
|
||||
for location, si := range nodeToShardsInfo {
|
||||
location, si := location, si // capture loop variables for goroutine
|
||||
ewg.Add(func() error {
|
||||
fmt.Printf("unmount ec volume %d on %s has shards: %+v\n", vid, location, ecIndexBits.ShardIds())
|
||||
if err := unmountEcShards(grpcDialOption, vid, location, ecIndexBits.ToUint32Slice()); err != nil {
|
||||
fmt.Printf("unmount ec volume %d on %s has shards: %+v\n", vid, location, si.Ids())
|
||||
if err := unmountEcShards(grpcDialOption, vid, location, si.Ids()); err != nil {
|
||||
return fmt.Errorf("%s unmount ec volume %d on %s: %w", prefix, vid, location, err)
|
||||
}
|
||||
|
||||
fmt.Printf("delete ec volume %d on %s has shards: %+v\n", vid, location, ecIndexBits.ShardIds())
|
||||
if err := sourceServerDeleteEcShards(grpcDialOption, collection, vid, location, ecIndexBits.ToUint32Slice()); err != nil {
|
||||
fmt.Printf("delete ec volume %d on %s has shards: %+v\n", vid, location, si.Ids())
|
||||
if err := sourceServerDeleteEcShards(grpcDialOption, collection, vid, location, si.Ids()); err != nil {
|
||||
return fmt.Errorf("%s delete ec volume %d on %s: %w", prefix, vid, location, err)
|
||||
}
|
||||
return nil
|
||||
@@ -176,7 +176,7 @@ func unmountAndDeleteEcShardsWithPrefix(prefix string, grpcDialOption grpc.DialO
|
||||
return ewg.Wait()
|
||||
}
|
||||
|
||||
func mountVolumeAndDeleteEcShards(grpcDialOption grpc.DialOption, collection string, targetNodeLocation pb.ServerAddress, nodeToEcIndexBits map[pb.ServerAddress]erasure_coding.ShardBits, vid needle.VolumeId) error {
|
||||
func mountVolumeAndDeleteEcShards(grpcDialOption grpc.DialOption, collection string, targetNodeLocation pb.ServerAddress, nodeToShardsInfo map[pb.ServerAddress]*erasure_coding.ShardsInfo, vid needle.VolumeId) error {
|
||||
|
||||
// mount volume
|
||||
if err := operation.WithVolumeServerClient(false, targetNodeLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
|
||||
@@ -188,7 +188,7 @@ func mountVolumeAndDeleteEcShards(grpcDialOption grpc.DialOption, collection str
|
||||
return fmt.Errorf("mountVolumeAndDeleteEcShards mount volume %d on %s: %v", vid, targetNodeLocation, err)
|
||||
}
|
||||
|
||||
return unmountAndDeleteEcShardsWithPrefix("mountVolumeAndDeleteEcShards", grpcDialOption, collection, nodeToEcIndexBits, vid)
|
||||
return unmountAndDeleteEcShardsWithPrefix("mountVolumeAndDeleteEcShards", grpcDialOption, collection, nodeToShardsInfo, vid)
|
||||
}
|
||||
|
||||
func generateNormalVolume(grpcDialOption grpc.DialOption, vid needle.VolumeId, collection string, sourceVolumeServer pb.ServerAddress) error {
|
||||
@@ -207,57 +207,57 @@ func generateNormalVolume(grpcDialOption grpc.DialOption, vid needle.VolumeId, c
|
||||
|
||||
}
|
||||
|
||||
func collectEcShards(commandEnv *CommandEnv, nodeToEcIndexBits map[pb.ServerAddress]erasure_coding.ShardBits, collection string, vid needle.VolumeId) (targetNodeLocation pb.ServerAddress, err error) {
|
||||
func collectEcShards(commandEnv *CommandEnv, nodeToShardsInfo map[pb.ServerAddress]*erasure_coding.ShardsInfo, collection string, vid needle.VolumeId) (targetNodeLocation pb.ServerAddress, err error) {
|
||||
|
||||
maxShardCount := 0
|
||||
var existingEcIndexBits erasure_coding.ShardBits
|
||||
for loc, ecIndexBits := range nodeToEcIndexBits {
|
||||
toBeCopiedShardCount := ecIndexBits.MinusParityShards().ShardIdCount()
|
||||
existingShardsInfo := erasure_coding.NewShardsInfo()
|
||||
for loc, si := range nodeToShardsInfo {
|
||||
toBeCopiedShardCount := si.MinusParityShards().Count()
|
||||
if toBeCopiedShardCount > maxShardCount {
|
||||
maxShardCount = toBeCopiedShardCount
|
||||
targetNodeLocation = loc
|
||||
existingEcIndexBits = ecIndexBits
|
||||
existingShardsInfo = si
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("collectEcShards: ec volume %d collect shards to %s from: %+v\n", vid, targetNodeLocation, nodeToEcIndexBits)
|
||||
fmt.Printf("collectEcShards: ec volume %d collect shards to %s from: %+v\n", vid, targetNodeLocation, nodeToShardsInfo)
|
||||
|
||||
var copiedEcIndexBits erasure_coding.ShardBits
|
||||
for loc, ecIndexBits := range nodeToEcIndexBits {
|
||||
copiedShardsInfo := erasure_coding.NewShardsInfo()
|
||||
for loc, si := range nodeToShardsInfo {
|
||||
if loc == targetNodeLocation {
|
||||
continue
|
||||
}
|
||||
|
||||
needToCopyEcIndexBits := ecIndexBits.Minus(existingEcIndexBits).MinusParityShards()
|
||||
if needToCopyEcIndexBits.ShardIdCount() == 0 {
|
||||
needToCopyShardsInfo := si.Minus(existingShardsInfo).MinusParityShards()
|
||||
if needToCopyShardsInfo.Count() == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
err = operation.WithVolumeServerClient(false, targetNodeLocation, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
|
||||
|
||||
fmt.Printf("copy %d.%v %s => %s\n", vid, needToCopyEcIndexBits.ShardIds(), loc, targetNodeLocation)
|
||||
fmt.Printf("copy %d.%v %s => %s\n", vid, needToCopyShardsInfo.Ids(), loc, targetNodeLocation)
|
||||
|
||||
_, copyErr := volumeServerClient.VolumeEcShardsCopy(context.Background(), &volume_server_pb.VolumeEcShardsCopyRequest{
|
||||
VolumeId: uint32(vid),
|
||||
Collection: collection,
|
||||
ShardIds: needToCopyEcIndexBits.ToUint32Slice(),
|
||||
ShardIds: needToCopyShardsInfo.IdsUint32(),
|
||||
CopyEcxFile: false,
|
||||
CopyEcjFile: true,
|
||||
CopyVifFile: true,
|
||||
SourceDataNode: string(loc),
|
||||
})
|
||||
if copyErr != nil {
|
||||
return fmt.Errorf("copy %d.%v %s => %s : %v\n", vid, needToCopyEcIndexBits.ShardIds(), loc, targetNodeLocation, copyErr)
|
||||
return fmt.Errorf("copy %d.%v %s => %s : %v\n", vid, needToCopyShardsInfo.Ids(), loc, targetNodeLocation, copyErr)
|
||||
}
|
||||
|
||||
fmt.Printf("mount %d.%v on %s\n", vid, needToCopyEcIndexBits.ShardIds(), targetNodeLocation)
|
||||
fmt.Printf("mount %d.%v on %s\n", vid, needToCopyShardsInfo.Ids(), targetNodeLocation)
|
||||
_, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{
|
||||
VolumeId: uint32(vid),
|
||||
Collection: collection,
|
||||
ShardIds: needToCopyEcIndexBits.ToUint32Slice(),
|
||||
ShardIds: needToCopyShardsInfo.IdsUint32(),
|
||||
})
|
||||
if mountErr != nil {
|
||||
return fmt.Errorf("mount %d.%v on %s : %v\n", vid, needToCopyEcIndexBits.ShardIds(), targetNodeLocation, mountErr)
|
||||
return fmt.Errorf("mount %d.%v on %s : %v\n", vid, needToCopyShardsInfo.Ids(), targetNodeLocation, mountErr)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -267,14 +267,12 @@ func collectEcShards(commandEnv *CommandEnv, nodeToEcIndexBits map[pb.ServerAddr
|
||||
break
|
||||
}
|
||||
|
||||
copiedEcIndexBits = copiedEcIndexBits.Plus(needToCopyEcIndexBits)
|
||||
|
||||
copiedShardsInfo.Add(needToCopyShardsInfo)
|
||||
}
|
||||
|
||||
nodeToEcIndexBits[targetNodeLocation] = existingEcIndexBits.Plus(copiedEcIndexBits)
|
||||
nodeToShardsInfo[targetNodeLocation] = existingShardsInfo.Plus(copiedShardsInfo)
|
||||
|
||||
return targetNodeLocation, err
|
||||
|
||||
}
|
||||
|
||||
func lookupVolumeIds(commandEnv *CommandEnv, volumeIds []string) (volumeIdLocations []*master_pb.LookupVolumeResponse_VolumeIdLocation, err error) {
|
||||
@@ -314,18 +312,17 @@ func collectEcShardIds(topoInfo *master_pb.TopologyInfo, collectionPattern strin
|
||||
return
|
||||
}
|
||||
|
||||
func collectEcNodeShardBits(topoInfo *master_pb.TopologyInfo, vid needle.VolumeId, diskType types.DiskType) map[pb.ServerAddress]erasure_coding.ShardBits {
|
||||
|
||||
nodeToEcIndexBits := make(map[pb.ServerAddress]erasure_coding.ShardBits)
|
||||
func collectEcNodeShardsInfo(topoInfo *master_pb.TopologyInfo, vid needle.VolumeId, diskType types.DiskType) map[pb.ServerAddress]*erasure_coding.ShardsInfo {
|
||||
res := make(map[pb.ServerAddress]*erasure_coding.ShardsInfo)
|
||||
eachDataNode(topoInfo, func(dc DataCenterId, rack RackId, dn *master_pb.DataNodeInfo) {
|
||||
if diskInfo, found := dn.DiskInfos[string(diskType)]; found {
|
||||
for _, v := range diskInfo.EcShardInfos {
|
||||
if v.Id == uint32(vid) {
|
||||
nodeToEcIndexBits[pb.NewServerAddressFromDataNode(dn)] = erasure_coding.ShardBits(v.EcIndexBits)
|
||||
res[pb.NewServerAddressFromDataNode(dn)] = erasure_coding.ShardsInfoFromVolumeEcShardInformationMessage(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return nodeToEcIndexBits
|
||||
return res
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user