Files
seaweedFS/weed/shell/command_cluster_status.go
Lisandro Pin 6b98b52acc Fix reporting of EC shard sizes from nodes to masters. (#7835)
SeaweedFS tracks EC shard sizes on topology data stuctures, but this information is never
relayed to master servers :( The end result is that commands reporting disk usage, such
as `volume.list` and `cluster.status`, yield incorrect figures when EC shards are present.

As an example for a simple 5-node test cluster, before...

```
> volume.list
Topology volumeSizeLimit:30000 MB hdd(volume:6/40 active:6 free:33 remote:0)
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9001 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:3  size:88967096  file_count:172  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[1 5]
        Disk hdd total size:88967096 file_count:172
      DataNode 192.168.10.111:9001 total size:88967096 file_count:172
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9002 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:77267536  file_count:166  replica_placement:2  version:3  modified_at_second:1766349617
          volume id:3  size:88967096  file_count:172  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[0 4]
        Disk hdd total size:166234632 file_count:338
      DataNode 192.168.10.111:9002 total size:166234632 file_count:338
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9003 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:2  size:77267536  file_count:166  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[2 6]
        Disk hdd total size:77267536 file_count:166
      DataNode 192.168.10.111:9003 total size:77267536 file_count:166
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9004 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:77267536  file_count:166  replica_placement:2  version:3  modified_at_second:1766349617
          volume id:3  size:88967096  file_count:172  replica_placement:2  version:3  modified_at_second:1766349617
          ec volume id:1 collection: shards:[3 7]
        Disk hdd total size:166234632 file_count:338
      DataNode 192.168.10.111:9004 total size:166234632 file_count:338
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9005 hdd(volume:0/8 active:0 free:8 remote:0)
        Disk hdd(volume:0/8 active:0 free:8 remote:0) id:0
          ec volume id:1 collection: shards:[8 9 10 11 12 13]
        Disk hdd total size:0 file_count:0
    Rack DefaultRack total size:498703896 file_count:1014
  DataCenter DefaultDataCenter total size:498703896 file_count:1014
total size:498703896 file_count:1014
```

...and after:

```
> volume.list
Topology volumeSizeLimit:30000 MB hdd(volume:6/40 active:6 free:33 remote:0)
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9001 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:2  size:81761800  file_count:161  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[1 5 9] sizes:[1:8.00 MiB 5:8.00 MiB 9:8.00 MiB] total:24.00 MiB
        Disk hdd total size:81761800 file_count:161
      DataNode 192.168.10.111:9001 total size:81761800 file_count:161
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9002 hdd(volume:1/8 active:1 free:7 remote:0)
        Disk hdd(volume:1/8 active:1 free:7 remote:0) id:0
          volume id:3  size:88678712  file_count:170  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[11 12 13] sizes:[11:8.00 MiB 12:8.00 MiB 13:8.00 MiB] total:24.00 MiB
        Disk hdd total size:88678712 file_count:170
      DataNode 192.168.10.111:9002 total size:88678712 file_count:170
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9003 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:81761800  file_count:161  replica_placement:2  version:3  modified_at_second:1766349495
          volume id:3  size:88678712  file_count:170  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[0 4 8] sizes:[0:8.00 MiB 4:8.00 MiB 8:8.00 MiB] total:24.00 MiB
        Disk hdd total size:170440512 file_count:331
      DataNode 192.168.10.111:9003 total size:170440512 file_count:331
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9004 hdd(volume:2/8 active:2 free:6 remote:0)
        Disk hdd(volume:2/8 active:2 free:6 remote:0) id:0
          volume id:2  size:81761800  file_count:161  replica_placement:2  version:3  modified_at_second:1766349495
          volume id:3  size:88678712  file_count:170  replica_placement:2  version:3  modified_at_second:1766349495
          ec volume id:1 collection: shards:[2 6 10] sizes:[2:8.00 MiB 6:8.00 MiB 10:8.00 MiB] total:24.00 MiB
        Disk hdd total size:170440512 file_count:331
      DataNode 192.168.10.111:9004 total size:170440512 file_count:331
  DataCenter DefaultDataCenter hdd(volume:6/40 active:6 free:33 remote:0)
    Rack DefaultRack hdd(volume:6/40 active:6 free:33 remote:0)
      DataNode 192.168.10.111:9005 hdd(volume:0/8 active:0 free:8 remote:0)
        Disk hdd(volume:0/8 active:0 free:8 remote:0) id:0
          ec volume id:1 collection: shards:[3 7] sizes:[3:8.00 MiB 7:8.00 MiB] total:16.00 MiB
        Disk hdd total size:0 file_count:0
    Rack DefaultRack total size:511321536 file_count:993
  DataCenter DefaultDataCenter total size:511321536 file_count:993
total size:511321536 file_count:993
```
2025-12-28 19:30:42 -08:00

440 lines
12 KiB
Go

package shell
import (
"context"
"flag"
"fmt"
"math"
"strings"
"sync"
"github.com/dustin/go-humanize"
"github.com/dustin/go-humanize/english"
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"io"
)
func init() {
Commands = append(Commands, &commandClusterStatus{})
}
// Map of volume_id -> [volume replicas] with stat details.
type VolumeReplicaStats struct {
Id string
VolumeId uint32
Files uint64
FilesDeleted uint64
TotalSize uint64
}
type RegularVolumeStats map[uint32][]*VolumeReplicaStats
type commandClusterStatus struct{}
type ClusterStatusPrinter struct {
writer io.Writer
writerMu sync.Mutex
humanize bool
maxParallelization int
locked bool
collections []string
topology *master_pb.TopologyInfo
volumeSizeLimitMb uint64
regularVolumeStats RegularVolumeStats
}
func (c *commandClusterStatus) Name() string {
return "cluster.status"
}
func (c *commandClusterStatus) Help() string {
return `outputs a quick overview of the cluster status`
}
func (c *commandClusterStatus) HasTag(CommandTag) bool {
return false
}
func (c *commandClusterStatus) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
flags := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
humanize := flags.Bool("humanize", true, "human-readable output")
includeFiles := flags.Bool("files", false, "include detailed file metrics, from all volume servers")
maxParallelization := flags.Int("maxParallelization", DefaultMaxParallelization, "run up to X tasks in parallel, whenever possible")
if err = flags.Parse(args); err != nil {
return err
}
collections, err := ListCollectionNames(commandEnv, true, true)
if err != nil {
return err
}
topology, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
if err != nil {
return err
}
sp := &ClusterStatusPrinter{
writer: writer,
humanize: *humanize,
maxParallelization: *maxParallelization,
locked: commandEnv.isLocked(),
collections: collections,
topology: topology,
volumeSizeLimitMb: volumeSizeLimitMb,
}
if *includeFiles {
if err := sp.loadFileStats(commandEnv); err != nil {
return err
}
}
sp.Print()
return nil
}
func (sp *ClusterStatusPrinter) uint64(n uint64) string {
if !sp.humanize {
return fmt.Sprintf("%d", n)
}
return humanize.Comma(int64(n))
}
func (sp *ClusterStatusPrinter) int(n int) string {
return sp.uint64(uint64(n))
}
func (sp *ClusterStatusPrinter) uint64Plural(n uint64, str string) string {
if !sp.humanize {
return fmt.Sprintf("%s(s)", str)
}
uin := math.MaxInt
if n < math.MaxInt {
uin = int(n)
}
return english.PluralWord(int(uin), str, "")
}
func (sp *ClusterStatusPrinter) plural(n int, str string) string {
return sp.uint64Plural(uint64(n), str)
}
func (sp *ClusterStatusPrinter) bytes(b uint64) string {
if !sp.humanize {
return fmt.Sprintf("%d %s", b, sp.plural(int(b), "byte"))
}
return fmt.Sprintf("%s", humanize.Bytes(b))
}
func (sp *ClusterStatusPrinter) uint64Ratio(a, b uint64) string {
var p float64
if b != 0 {
p = float64(a) / float64(b)
}
if !sp.humanize {
return fmt.Sprintf("%.02f", p)
}
return fmt.Sprintf("%s", humanize.FtoaWithDigits(p, 2))
}
func (sp *ClusterStatusPrinter) intRatio(a, b int) string {
return sp.uint64Ratio(uint64(a), uint64(b))
}
func (sp *ClusterStatusPrinter) uint64Pct(a, b uint64) string {
var p float64
if b != 0 {
p = 100 * float64(a) / float64(b)
}
if !sp.humanize {
return fmt.Sprintf("%.02f%%", p)
}
return fmt.Sprintf("%s%%", humanize.FtoaWithDigits(p, 2))
}
func (sp *ClusterStatusPrinter) intPct(a, b int) string {
return sp.uint64Pct(uint64(a), uint64(b))
}
func (sp *ClusterStatusPrinter) write(format string, a ...any) {
sp.writerMu.Lock()
defer sp.writerMu.Unlock()
format = strings.TrimRight(format, " ")
if len(format) == 0 {
format = "\n"
}
fmt.Fprintf(sp.writer, format, a...)
last := format[len(format)-1:]
if last != "\n" && last != "\r" {
fmt.Fprint(sp.writer, "\n")
}
}
func (sp *ClusterStatusPrinter) Print() {
sp.write("")
sp.printClusterInfo()
sp.printVolumeInfo()
sp.printStorageInfo()
sp.printFilesInfo()
}
// TODO: collect stats for EC volumes as well
func (sp *ClusterStatusPrinter) loadFileStats(commandEnv *CommandEnv) error {
sp.regularVolumeStats = RegularVolumeStats{}
var mu sync.Mutex
var progressTotal, progressDone uint64
ewg := NewErrorWaitGroup(sp.maxParallelization)
for _, dci := range sp.topology.DataCenterInfos {
for _, ri := range dci.RackInfos {
for _, dni := range ri.DataNodeInfos {
for _, d := range dni.DiskInfos {
mu.Lock()
progressTotal += uint64(len(d.VolumeInfos))
mu.Unlock()
for _, v := range d.VolumeInfos {
ewg.Add(func() error {
// Collect regular volume stats
err := operation.WithVolumeServerClient(false, pb.NewServerAddressWithGrpcPort(dni.Id, int(dni.GrpcPort)), commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
resp, reqErr := volumeServerClient.VolumeStatus(context.Background(), &volume_server_pb.VolumeStatusRequest{
VolumeId: uint32(v.Id),
})
if reqErr != nil {
return reqErr
}
mu.Lock()
defer mu.Unlock()
if resp != nil {
if _, ok := sp.regularVolumeStats[v.Id]; !ok {
sp.regularVolumeStats[v.Id] = []*VolumeReplicaStats{}
}
sp.regularVolumeStats[v.Id] = append(sp.regularVolumeStats[v.Id], &VolumeReplicaStats{
Id: dni.Id,
VolumeId: v.Id,
Files: resp.FileCount,
FilesDeleted: resp.FileDeletedCount,
TotalSize: resp.VolumeSize,
})
}
progressDone++
return nil
})
if err != nil {
return err
}
mu.Lock()
sp.write("collecting file stats: %s \r", sp.uint64Pct(progressDone, progressTotal))
mu.Unlock()
return nil
})
}
}
}
}
}
err := ewg.Wait()
sp.write("")
return err
}
func (sp *ClusterStatusPrinter) printClusterInfo() {
dcs := len(sp.topology.DataCenterInfos)
racks := 0
nodes := 0
disks := 0
for _, dci := range sp.topology.DataCenterInfos {
racks += len(dci.RackInfos)
for _, ri := range dci.RackInfos {
for _, dni := range ri.DataNodeInfos {
nodes++
disks += len(dni.DiskInfos)
}
}
}
status := "unlocked"
if sp.locked {
status = "LOCKED"
}
sp.write("cluster:")
sp.write("\tid: %s", sp.topology.Id)
sp.write("\tstatus: %s", status)
sp.write("\tnodes: %s", sp.int(nodes))
sp.write("\ttopology: %s %s, %s %s on %s %s",
sp.int(dcs), sp.plural(dcs, "DC"),
sp.int(disks), sp.plural(disks, "disk"),
sp.int(racks), sp.plural(racks, "rack"))
sp.write("")
}
func (sp *ClusterStatusPrinter) printVolumeInfo() {
collections := len(sp.collections)
var maxVolumes uint64
volumeIds := map[needle.VolumeId]bool{}
ecVolumeIds := map[needle.VolumeId]bool{}
var replicas, roReplicas, rwReplicas, ecShards int
for _, dci := range sp.topology.DataCenterInfos {
for _, ri := range dci.RackInfos {
for _, dni := range ri.DataNodeInfos {
for _, di := range dni.DiskInfos {
maxVolumes += uint64(di.MaxVolumeCount)
for _, vi := range di.VolumeInfos {
vid := needle.VolumeId(vi.Id)
volumeIds[vid] = true
replicas++
if vi.ReadOnly {
roReplicas++
} else {
rwReplicas++
}
}
for _, eci := range di.EcShardInfos {
vid := needle.VolumeId(eci.Id)
ecVolumeIds[vid] = true
ecShards += erasure_coding.ShardsCountFromVolumeEcShardInformationMessage(eci)
}
}
}
}
}
volumes := len(volumeIds)
ecVolumes := len(ecVolumeIds)
totalVolumes := volumes + ecVolumes
sp.write("volumes:")
sp.write("\ttotal: %s %s, %s %s",
sp.int(totalVolumes), sp.plural(totalVolumes, "volume"),
sp.int(collections), sp.plural(collections, "collection"))
sp.write("\tmax size: %s", sp.bytes(sp.volumeSizeLimitMb*1024*1024))
sp.write("\tregular: %s/%s %s on %s %s, %s writable (%s), %s read-only (%s)",
sp.int(volumes), sp.uint64(maxVolumes), sp.plural(volumes, "volume"),
sp.int(replicas), sp.plural(replicas, "replica"),
sp.int(rwReplicas), sp.intPct(rwReplicas, replicas),
sp.int(roReplicas), sp.intPct(roReplicas, replicas))
sp.write("\tEC: %s EC %s on %s %s (%s shards/volume)",
sp.int(ecVolumes), sp.plural(ecVolumes, "volume"),
sp.int(ecShards), sp.plural(ecShards, "shard"),
sp.intRatio(ecShards, ecVolumes))
sp.write("")
}
func (sp *ClusterStatusPrinter) printStorageInfo() {
perVolumeSize := map[needle.VolumeId]uint64{}
perEcVolumeSize := map[needle.VolumeId]uint64{}
var rawVolumeSize, rawEcVolumeSize uint64
for _, dci := range sp.topology.DataCenterInfos {
for _, ri := range dci.RackInfos {
for _, dni := range ri.DataNodeInfos {
for _, di := range dni.DiskInfos {
for _, vi := range di.VolumeInfos {
vid := needle.VolumeId(vi.Id)
perVolumeSize[vid] = vi.Size
rawVolumeSize += vi.Size
}
for _, eci := range di.EcShardInfos {
vid := needle.VolumeId(eci.Id)
var size uint64
for _, ss := range eci.ShardSizes {
size += uint64(ss)
}
perEcVolumeSize[vid] += size
rawEcVolumeSize += size
}
}
}
}
}
// normalize EC logical volume sizes given shard settings
for vid := range perEcVolumeSize {
perEcVolumeSize[vid] = perEcVolumeSize[vid] * erasure_coding.DataShardsCount / erasure_coding.TotalShardsCount
}
var volumeSize, ecVolumeSize uint64
for _, s := range perVolumeSize {
volumeSize += s
}
for _, s := range perEcVolumeSize {
ecVolumeSize += s
}
totalSize := volumeSize + ecVolumeSize
sp.write("storage:")
sp.write("\ttotal: %s", sp.bytes(totalSize))
sp.write("\tregular volumes: %s", sp.bytes(volumeSize))
sp.write("\tEC volumes: %s", sp.bytes(ecVolumeSize))
sp.write("\traw: %s on volume replicas, %s on EC shards", sp.bytes(rawVolumeSize), sp.bytes(rawEcVolumeSize))
sp.write("")
}
func (sp *ClusterStatusPrinter) printFilesInfo() {
if len(sp.regularVolumeStats) == 0 {
return
}
var regularFilesTotal, regularFilesDeleted, regularFilesSize uint64
var regularFilesTotalRaw, regularFilesDeletedRaw, regularFilesSizeRaw uint64
for _, replicaStats := range sp.regularVolumeStats {
rc := uint64(len(replicaStats))
var volumeFilesTotal, volumeFilesSize, volumeFilesDeleted uint64
for _, rs := range replicaStats {
regularFilesTotalRaw += rs.Files
regularFilesSizeRaw += rs.TotalSize
regularFilesDeletedRaw += rs.FilesDeleted
volumeFilesTotal += rs.Files
volumeFilesSize += rs.TotalSize
volumeFilesDeleted += rs.FilesDeleted
}
regularFilesTotal += (volumeFilesTotal / rc)
regularFilesSize += (volumeFilesSize / rc)
regularFilesDeleted += (volumeFilesDeleted / rc)
}
regularFiles := regularFilesTotal - regularFilesDeleted
regularFilesRaw := regularFilesTotalRaw - regularFilesDeletedRaw
var avgFileSize uint64
if regularFilesTotal != 0 {
avgFileSize = regularFilesSize / regularFilesTotal
}
sp.write("files:")
sp.write("\tregular: %s %s, %s readable (%s), %s deleted (%s), avg %s per file",
sp.uint64(regularFilesTotal), sp.uint64Plural(regularFilesTotal, "file"),
sp.uint64(regularFiles), sp.uint64Pct(regularFiles, regularFilesTotal),
sp.uint64(regularFilesDeleted), sp.uint64Pct(regularFilesDeleted, regularFilesTotal),
sp.bytes(avgFileSize))
sp.write("\tregular raw: %s %s, %s readable (%s), %s deleted (%s), %s total",
sp.uint64(regularFilesTotalRaw), sp.uint64Plural(regularFilesTotalRaw, "file"),
sp.uint64(regularFilesRaw), sp.uint64Pct(regularFilesRaw, regularFilesTotalRaw),
sp.uint64(regularFilesDeletedRaw), sp.uint64Pct(regularFilesDeletedRaw, regularFilesTotalRaw),
sp.bytes(regularFilesSizeRaw))
sp.write("\tEC: [no data]")
sp.write("\tEC raw: [no data]")
sp.write("")
}