Shell: add verbose ec encoding mode (#7105)
* add verbose ec encoding mode * address comments
This commit is contained in:
@@ -36,8 +36,8 @@ func (c *commandEcEncode) Name() string {
|
|||||||
func (c *commandEcEncode) Help() string {
|
func (c *commandEcEncode) Help() string {
|
||||||
return `apply erasure coding to a volume
|
return `apply erasure coding to a volume
|
||||||
|
|
||||||
ec.encode [-collection=""] [-fullPercent=95 -quietFor=1h]
|
ec.encode [-collection=""] [-fullPercent=95 -quietFor=1h] [-verbose]
|
||||||
ec.encode [-collection=""] [-volumeId=<volume_id>]
|
ec.encode [-collection=""] [-volumeId=<volume_id>] [-verbose]
|
||||||
|
|
||||||
This command will:
|
This command will:
|
||||||
1. freeze one volume
|
1. freeze one volume
|
||||||
@@ -53,6 +53,9 @@ func (c *commandEcEncode) Help() string {
|
|||||||
If you only have less than 4 volume servers, with erasure coding, at least you can afford to
|
If you only have less than 4 volume servers, with erasure coding, at least you can afford to
|
||||||
have 4 corrupted shard files.
|
have 4 corrupted shard files.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-verbose: show detailed reasons why volumes are not selected for encoding
|
||||||
|
|
||||||
Re-balancing algorithm:
|
Re-balancing algorithm:
|
||||||
` + ecBalanceAlgorithmDescription
|
` + ecBalanceAlgorithmDescription
|
||||||
}
|
}
|
||||||
@@ -72,6 +75,7 @@ func (c *commandEcEncode) Do(args []string, commandEnv *CommandEnv, writer io.Wr
|
|||||||
forceChanges := encodeCommand.Bool("force", false, "force the encoding even if the cluster has less than recommended 4 nodes")
|
forceChanges := encodeCommand.Bool("force", false, "force the encoding even if the cluster has less than recommended 4 nodes")
|
||||||
shardReplicaPlacement := encodeCommand.String("shardReplicaPlacement", "", "replica placement for EC shards, or master default if empty")
|
shardReplicaPlacement := encodeCommand.String("shardReplicaPlacement", "", "replica placement for EC shards, or master default if empty")
|
||||||
applyBalancing := encodeCommand.Bool("rebalance", false, "re-balance EC shards after creation")
|
applyBalancing := encodeCommand.Bool("rebalance", false, "re-balance EC shards after creation")
|
||||||
|
verbose := encodeCommand.Bool("verbose", false, "show detailed reasons why volumes are not selected for encoding")
|
||||||
|
|
||||||
if err = encodeCommand.Parse(args); err != nil {
|
if err = encodeCommand.Parse(args); err != nil {
|
||||||
return nil
|
return nil
|
||||||
@@ -109,7 +113,7 @@ func (c *commandEcEncode) Do(args []string, commandEnv *CommandEnv, writer io.Wr
|
|||||||
balanceCollections = collectCollectionsForVolumeIds(topologyInfo, volumeIds)
|
balanceCollections = collectCollectionsForVolumeIds(topologyInfo, volumeIds)
|
||||||
} else {
|
} else {
|
||||||
// apply to all volumes for the given collection
|
// apply to all volumes for the given collection
|
||||||
volumeIds, err = collectVolumeIdsForEcEncode(commandEnv, *collection, nil, *fullPercentage, *quietPeriod)
|
volumeIds, err = collectVolumeIdsForEcEncode(commandEnv, *collection, nil, *fullPercentage, *quietPeriod, *verbose)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -266,7 +270,7 @@ func generateEcShards(grpcDialOption grpc.DialOption, volumeId needle.VolumeId,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection string, sourceDiskType *types.DiskType, fullPercentage float64, quietPeriod time.Duration) (vids []needle.VolumeId, err error) {
|
func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection string, sourceDiskType *types.DiskType, fullPercentage float64, quietPeriod time.Duration, verbose bool) (vids []needle.VolumeId, err error) {
|
||||||
// collect topology information
|
// collect topology information
|
||||||
topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
|
topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -278,33 +282,106 @@ func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection stri
|
|||||||
|
|
||||||
fmt.Printf("collect volumes quiet for: %d seconds and %.1f%% full\n", quietSeconds, fullPercentage)
|
fmt.Printf("collect volumes quiet for: %d seconds and %.1f%% full\n", quietSeconds, fullPercentage)
|
||||||
|
|
||||||
|
// Statistics for verbose mode
|
||||||
|
var (
|
||||||
|
totalVolumes int
|
||||||
|
remoteVolumes int
|
||||||
|
wrongCollection int
|
||||||
|
wrongDiskType int
|
||||||
|
tooRecent int
|
||||||
|
tooSmall int
|
||||||
|
noFreeDisk int
|
||||||
|
)
|
||||||
|
|
||||||
vidMap := make(map[uint32]bool)
|
vidMap := make(map[uint32]bool)
|
||||||
eachDataNode(topologyInfo, func(dc DataCenterId, rack RackId, dn *master_pb.DataNodeInfo) {
|
eachDataNode(topologyInfo, func(dc DataCenterId, rack RackId, dn *master_pb.DataNodeInfo) {
|
||||||
for _, diskInfo := range dn.DiskInfos {
|
for _, diskInfo := range dn.DiskInfos {
|
||||||
for _, v := range diskInfo.VolumeInfos {
|
for _, v := range diskInfo.VolumeInfos {
|
||||||
|
totalVolumes++
|
||||||
|
|
||||||
// ignore remote volumes
|
// ignore remote volumes
|
||||||
if v.RemoteStorageName != "" && v.RemoteStorageKey != "" {
|
if v.RemoteStorageName != "" && v.RemoteStorageKey != "" {
|
||||||
|
remoteVolumes++
|
||||||
|
if verbose {
|
||||||
|
fmt.Printf("skip volume %d on %s: remote volume (storage: %s, key: %s)\n",
|
||||||
|
v.Id, dn.Id, v.RemoteStorageName, v.RemoteStorageKey)
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if v.Collection == selectedCollection && v.ModifiedAtSecond+quietSeconds < nowUnixSeconds &&
|
|
||||||
(sourceDiskType == nil || types.ToDiskType(v.DiskType) == *sourceDiskType) {
|
// check collection
|
||||||
if float64(v.Size) > fullPercentage/100*float64(volumeSizeLimitMb)*1024*1024 {
|
if v.Collection != selectedCollection {
|
||||||
if good, found := vidMap[v.Id]; found {
|
wrongCollection++
|
||||||
if good {
|
if verbose {
|
||||||
if diskInfo.FreeVolumeCount < 2 {
|
fmt.Printf("skip volume %d on %s: wrong collection (expected: %s, actual: %s)\n",
|
||||||
glog.V(0).Infof("skip %s %d on %s, no free disk", v.Collection, v.Id, dn.Id)
|
v.Id, dn.Id, selectedCollection, v.Collection)
|
||||||
vidMap[v.Id] = false
|
}
|
||||||
}
|
continue
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
if diskInfo.FreeVolumeCount < 2 {
|
// check disk type
|
||||||
glog.V(0).Infof("skip %s %d on %s, no free disk", v.Collection, v.Id, dn.Id)
|
if sourceDiskType != nil && types.ToDiskType(v.DiskType) != *sourceDiskType {
|
||||||
vidMap[v.Id] = false
|
wrongDiskType++
|
||||||
} else {
|
if verbose {
|
||||||
vidMap[v.Id] = true
|
fmt.Printf("skip volume %d on %s: wrong disk type (expected: %s, actual: %s)\n",
|
||||||
|
v.Id, dn.Id, sourceDiskType.ReadableString(), types.ToDiskType(v.DiskType).ReadableString())
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// check quiet period
|
||||||
|
if v.ModifiedAtSecond+quietSeconds >= nowUnixSeconds {
|
||||||
|
tooRecent++
|
||||||
|
if verbose {
|
||||||
|
fmt.Printf("skip volume %d on %s: too recently modified (last modified: %d seconds ago, required: %d seconds)\n",
|
||||||
|
v.Id, dn.Id, nowUnixSeconds-v.ModifiedAtSecond, quietSeconds)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// check size
|
||||||
|
sizeThreshold := fullPercentage / 100 * float64(volumeSizeLimitMb) * 1024 * 1024
|
||||||
|
if float64(v.Size) <= sizeThreshold {
|
||||||
|
tooSmall++
|
||||||
|
if verbose {
|
||||||
|
fmt.Printf("skip volume %d on %s: too small (size: %.1f MB, threshold: %.1f MB, %.1f%% full)\n",
|
||||||
|
v.Id, dn.Id, float64(v.Size)/(1024*1024), sizeThreshold/(1024*1024),
|
||||||
|
float64(v.Size)*100/(float64(volumeSizeLimitMb)*1024*1024))
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// check free disk space
|
||||||
|
if good, found := vidMap[v.Id]; found {
|
||||||
|
if good {
|
||||||
|
if diskInfo.FreeVolumeCount < 2 {
|
||||||
|
glog.V(0).Infof("skip %s %d on %s, no free disk", v.Collection, v.Id, dn.Id)
|
||||||
|
if verbose {
|
||||||
|
fmt.Printf("skip volume %d on %s: insufficient free disk space (free volumes: %d, required: 2)\n",
|
||||||
|
v.Id, dn.Id, diskInfo.FreeVolumeCount)
|
||||||
}
|
}
|
||||||
|
vidMap[v.Id] = false
|
||||||
|
noFreeDisk++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
if diskInfo.FreeVolumeCount < 2 {
|
||||||
|
glog.V(0).Infof("skip %s %d on %s, no free disk", v.Collection, v.Id, dn.Id)
|
||||||
|
if verbose {
|
||||||
|
fmt.Printf("skip volume %d on %s: insufficient free disk space (free volumes: %d, required: 2)\n",
|
||||||
|
v.Id, dn.Id, diskInfo.FreeVolumeCount)
|
||||||
|
}
|
||||||
|
vidMap[v.Id] = false
|
||||||
|
noFreeDisk++
|
||||||
|
} else {
|
||||||
|
if verbose {
|
||||||
|
fmt.Printf("selected volume %d on %s: size %.1f MB (%.1f%% full), last modified %d seconds ago, free volumes: %d\n",
|
||||||
|
v.Id, dn.Id, float64(v.Size)/(1024*1024),
|
||||||
|
float64(v.Size)*100/(float64(volumeSizeLimitMb)*1024*1024),
|
||||||
|
nowUnixSeconds-v.ModifiedAtSecond, diskInfo.FreeVolumeCount)
|
||||||
|
}
|
||||||
|
vidMap[v.Id] = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -316,5 +393,35 @@ func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection stri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Print summary statistics in verbose mode or when no volumes selected
|
||||||
|
if verbose || len(vids) == 0 {
|
||||||
|
fmt.Printf("\nVolume selection summary:\n")
|
||||||
|
fmt.Printf(" Total volumes examined: %d\n", totalVolumes)
|
||||||
|
fmt.Printf(" Selected for encoding: %d\n", len(vids))
|
||||||
|
|
||||||
|
if totalVolumes > 0 {
|
||||||
|
fmt.Printf("\nReasons for exclusion:\n")
|
||||||
|
if remoteVolumes > 0 {
|
||||||
|
fmt.Printf(" Remote volumes: %d\n", remoteVolumes)
|
||||||
|
}
|
||||||
|
if wrongCollection > 0 {
|
||||||
|
fmt.Printf(" Wrong collection: %d\n", wrongCollection)
|
||||||
|
}
|
||||||
|
if wrongDiskType > 0 {
|
||||||
|
fmt.Printf(" Wrong disk type: %d\n", wrongDiskType)
|
||||||
|
}
|
||||||
|
if tooRecent > 0 {
|
||||||
|
fmt.Printf(" Too recently modified: %d\n", tooRecent)
|
||||||
|
}
|
||||||
|
if tooSmall > 0 {
|
||||||
|
fmt.Printf(" Too small (< %.1f%% full): %d\n", fullPercentage, tooSmall)
|
||||||
|
}
|
||||||
|
if noFreeDisk > 0 {
|
||||||
|
fmt.Printf(" Insufficient free disk space: %d\n", noFreeDisk)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Println()
|
||||||
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ func (c *commandVolumeTierUpload) Do(args []string, commandEnv *CommandEnv, writ
|
|||||||
|
|
||||||
// apply to all volumes in the collection
|
// apply to all volumes in the collection
|
||||||
// reusing collectVolumeIdsForEcEncode for now
|
// reusing collectVolumeIdsForEcEncode for now
|
||||||
volumeIds, err := collectVolumeIdsForEcEncode(commandEnv, *collection, diskType, *fullPercentage, *quietPeriod)
|
volumeIds, err := collectVolumeIdsForEcEncode(commandEnv, *collection, diskType, *fullPercentage, *quietPeriod, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user