Implement a weed shell command to return a status overview of the cluster. (#7704)
Detailed file information will be implemented in a follow-up MR. Note also that masters are currently not reporting back EC shard sizes correctly, via `master_pb.VolumeEcShardInformationMessage.shard_sizes`. F.ex: ``` > cluster.status cluster: id: topo status: LOCKED nodes: 10 topology: 1 DC(s)s, 1 disk(s) on 1 rack(s) volumes: total: 3 volumes on 1 collections max size: 31457280000 bytes regular: 2/80 volumes on 6 replicas, 6 writable (100.00%), 0 read-only (0.00%) EC: 1 EC volumes on 14 shards (14.00 shards/volume) storage: total: 186024424 bytes regular volumes: 186024424 bytes EC volumes: 0 bytes raw: 558073152 bytes on volume replicas, 0 bytes on EC shard files ```
This commit is contained in:
214
weed/shell/command_cluster_status.go
Normal file
214
weed/shell/command_cluster_status.go
Normal file
@@ -0,0 +1,214 @@
|
||||
package shell
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
|
||||
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
||||
|
||||
"io"
|
||||
)
|
||||
|
||||
func init() {
|
||||
Commands = append(Commands, &commandClusterStatus{})
|
||||
}
|
||||
|
||||
type commandClusterStatus struct{}
|
||||
type ClusterStatusPrinter struct {
|
||||
writer io.Writer
|
||||
|
||||
locked bool
|
||||
collections []string
|
||||
topology *master_pb.TopologyInfo
|
||||
volumeSizeLimitMb uint64
|
||||
}
|
||||
|
||||
func (c *commandClusterStatus) Name() string {
|
||||
return "cluster.status"
|
||||
}
|
||||
|
||||
func (c *commandClusterStatus) Help() string {
|
||||
return `outputs a quick overview of the cluster status`
|
||||
}
|
||||
|
||||
func (c *commandClusterStatus) HasTag(CommandTag) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (c *commandClusterStatus) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
|
||||
flags := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
|
||||
|
||||
if err = flags.Parse(args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
collections, err := ListCollectionNames(commandEnv, true, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
topology, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sp := &ClusterStatusPrinter{
|
||||
writer: writer,
|
||||
|
||||
locked: commandEnv.isLocked(),
|
||||
collections: collections,
|
||||
topology: topology,
|
||||
volumeSizeLimitMb: volumeSizeLimitMb,
|
||||
}
|
||||
sp.Print()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: humanize figures in output
|
||||
// TODO: add option to collect detailed file stats
|
||||
func (sp *ClusterStatusPrinter) Print() {
|
||||
sp.write("")
|
||||
sp.printClusterInfo()
|
||||
sp.printVolumeInfo()
|
||||
sp.printStorageInfo()
|
||||
}
|
||||
|
||||
func (sp *ClusterStatusPrinter) write(format string, a ...any) {
|
||||
fmt.Fprintf(sp.writer, strings.TrimRight(format, "\r\n "), a...)
|
||||
fmt.Fprint(sp.writer, "\n")
|
||||
}
|
||||
|
||||
func (sp *ClusterStatusPrinter) printClusterInfo() {
|
||||
dcs := len(sp.topology.DataCenterInfos)
|
||||
|
||||
racks := 0
|
||||
nodes := 0
|
||||
disks := 0
|
||||
for _, dci := range sp.topology.DataCenterInfos {
|
||||
racks += len(dci.RackInfos)
|
||||
for _, ri := range dci.RackInfos {
|
||||
for _, dni := range ri.DataNodeInfos {
|
||||
nodes++
|
||||
disks += len(dni.DiskInfos)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
status := "unlocked"
|
||||
if sp.locked {
|
||||
status = "LOCKED"
|
||||
}
|
||||
|
||||
sp.write("cluster:")
|
||||
sp.write("\tid: %s", sp.topology.Id)
|
||||
sp.write("\tstatus: %s", status)
|
||||
sp.write("\tnodes: %d", nodes)
|
||||
sp.write("\ttopology: %d DC(s), %d disk(s) on %d rack(s)", dcs, disks, racks)
|
||||
sp.write("")
|
||||
}
|
||||
|
||||
func (sp *ClusterStatusPrinter) printVolumeInfo() {
|
||||
collections := len(sp.collections)
|
||||
var maxVolumes uint64
|
||||
volumes := map[needle.VolumeId]bool{}
|
||||
ecVolumes := map[needle.VolumeId]bool{}
|
||||
|
||||
var replicas, roReplicas, rwReplicas, ecShards uint64
|
||||
|
||||
for _, dci := range sp.topology.DataCenterInfos {
|
||||
for _, ri := range dci.RackInfos {
|
||||
for _, dni := range ri.DataNodeInfos {
|
||||
for _, di := range dni.DiskInfos {
|
||||
maxVolumes += uint64(di.MaxVolumeCount)
|
||||
for _, vi := range di.VolumeInfos {
|
||||
vid := needle.VolumeId(vi.Id)
|
||||
volumes[vid] = true
|
||||
replicas++
|
||||
if vi.ReadOnly {
|
||||
roReplicas++
|
||||
} else {
|
||||
rwReplicas++
|
||||
}
|
||||
}
|
||||
for _, eci := range di.EcShardInfos {
|
||||
vid := needle.VolumeId(eci.Id)
|
||||
ecVolumes[vid] = true
|
||||
ecShards += uint64(erasure_coding.ShardBits(eci.EcIndexBits).ShardIdCount())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var roReplicasRatio, rwReplicasRatio, ecShardsPerVolume float64
|
||||
if replicas != 0 {
|
||||
roReplicasRatio = float64(roReplicas) / float64(replicas)
|
||||
rwReplicasRatio = float64(rwReplicas) / float64(replicas)
|
||||
}
|
||||
if len(ecVolumes) != 0 {
|
||||
ecShardsPerVolume = float64(ecShards) / float64(len(ecVolumes))
|
||||
}
|
||||
|
||||
totalVolumes := len(volumes) + len(ecVolumes)
|
||||
|
||||
sp.write("volumes:")
|
||||
sp.write("\ttotal: %d volumes on %d collections", totalVolumes, collections)
|
||||
sp.write("\tmax size: %d bytes", sp.volumeSizeLimitMb*1024*1024)
|
||||
sp.write("\tregular: %d/%d volumes on %d replicas, %d writable (%.02f%%), %d read-only (%.02f%%)", len(volumes), maxVolumes, replicas, rwReplicas, 100*rwReplicasRatio, roReplicas, 100*roReplicasRatio)
|
||||
sp.write("\tEC: %d EC volumes on %d shards (%.02f shards/volume)", len(ecVolumes), ecShards, ecShardsPerVolume)
|
||||
sp.write("")
|
||||
}
|
||||
|
||||
func (sp *ClusterStatusPrinter) printStorageInfo() {
|
||||
perVolumeSize := map[needle.VolumeId]uint64{}
|
||||
perEcVolumeSize := map[needle.VolumeId]uint64{}
|
||||
var rawVolumeSize, rawEcVolumeSize uint64
|
||||
|
||||
for _, dci := range sp.topology.DataCenterInfos {
|
||||
for _, ri := range dci.RackInfos {
|
||||
for _, dni := range ri.DataNodeInfos {
|
||||
for _, di := range dni.DiskInfos {
|
||||
for _, vi := range di.VolumeInfos {
|
||||
vid := needle.VolumeId(vi.Id)
|
||||
perVolumeSize[vid] = vi.Size
|
||||
rawVolumeSize += vi.Size
|
||||
}
|
||||
for _, eci := range di.EcShardInfos {
|
||||
vid := needle.VolumeId(eci.Id)
|
||||
var size uint64
|
||||
for _, ss := range eci.ShardSizes {
|
||||
size += uint64(ss)
|
||||
}
|
||||
perEcVolumeSize[vid] += size
|
||||
rawEcVolumeSize += size
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// normalize EC logical volume sizes given shard settings
|
||||
for vid := range perEcVolumeSize {
|
||||
perEcVolumeSize[vid] = perEcVolumeSize[vid] * erasure_coding.DataShardsCount / erasure_coding.TotalShardsCount
|
||||
}
|
||||
|
||||
var volumeSize, ecVolumeSize uint64
|
||||
for _, s := range perVolumeSize {
|
||||
volumeSize += s
|
||||
}
|
||||
for _, s := range perEcVolumeSize {
|
||||
ecVolumeSize += s
|
||||
}
|
||||
|
||||
totalSize := volumeSize + ecVolumeSize
|
||||
|
||||
sp.write("storage:")
|
||||
sp.write("\ttotal: %d bytes", totalSize)
|
||||
sp.write("\tregular volumes: %d bytes", volumeSize)
|
||||
sp.write("\tEC volumes: %d bytes", ecVolumeSize)
|
||||
sp.write("\traw: %d bytes on volume replicas, %d bytes on EC shard files", rawVolumeSize, rawEcVolumeSize)
|
||||
sp.write("")
|
||||
}
|
||||
Reference in New Issue
Block a user