* Fix imbalance detection disk type grouping and volume grow errors This PR addresses two issues: 1. Imbalance Detection: Previously, balance detection did not verify disk types, leading to false positives when comparing heterogenous nodes (e.g. SSD vs HDD). Logic is now updated to group volumes by DiskType before calculating imbalance. 2. Volume Grow Errors: Fixed a variable scope issue in master_grpc_server_volume.go and added a pre-check for available space to prevent 'only 0 volumes left' error logs when a disk type is full or abandoned. Included units tests for the detection logic. * Refactor balance detection loop into detectForDiskType * Fix potential panic in volume grow logic by checking replica placement parse error
380 lines
12 KiB
Go
380 lines
12 KiB
Go
package weed_server
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math"
|
|
"math/rand/v2"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/stats"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/topology"
|
|
|
|
"github.com/seaweedfs/raft"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/security"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/types"
|
|
)
|
|
|
|
const (
|
|
volumeGrowStepCount = 2
|
|
)
|
|
|
|
func (ms *MasterServer) DoAutomaticVolumeGrow(req *topology.VolumeGrowRequest) {
|
|
if ms.option.VolumeGrowthDisabled {
|
|
glog.V(1).Infof("automatic volume grow disabled")
|
|
return
|
|
}
|
|
glog.V(1).Infoln("starting automatic volume grow")
|
|
start := time.Now()
|
|
newVidLocations, err := ms.vg.AutomaticGrowByType(req.Option, ms.grpcDialOption, ms.Topo, req.Count)
|
|
glog.V(1).Infoln("finished automatic volume grow, cost ", time.Now().Sub(start))
|
|
if err != nil {
|
|
glog.V(1).Infof("automatic volume grow failed: %+v", err)
|
|
return
|
|
}
|
|
for _, newVidLocation := range newVidLocations {
|
|
ms.broadcastToClients(&master_pb.KeepConnectedResponse{VolumeLocation: newVidLocation})
|
|
}
|
|
}
|
|
|
|
func (ms *MasterServer) ProcessGrowRequest() {
|
|
go func() {
|
|
ctx := context.Background()
|
|
firstRun := true
|
|
for {
|
|
if firstRun {
|
|
firstRun = false
|
|
} else {
|
|
time.Sleep(5*time.Minute + time.Duration(30*rand.Float32())*time.Second)
|
|
}
|
|
if !ms.Topo.IsLeader() {
|
|
continue
|
|
}
|
|
dcs := ms.Topo.ListDCAndRacks()
|
|
for _, vlc := range ms.Topo.ListVolumeLayoutCollections() {
|
|
var err error
|
|
vl := vlc.VolumeLayout
|
|
lastGrowCount := vl.GetLastGrowCount()
|
|
if vl.HasGrowRequest() {
|
|
continue
|
|
}
|
|
writable, crowded := vl.GetWritableVolumeCount()
|
|
mustGrow := int(lastGrowCount) - writable
|
|
vgr := vlc.ToVolumeGrowRequest()
|
|
stats.MasterVolumeLayoutWritable.WithLabelValues(vlc.Collection, vgr.DiskType, vgr.Replication, vgr.Ttl).Set(float64(writable))
|
|
stats.MasterVolumeLayoutCrowded.WithLabelValues(vlc.Collection, vgr.DiskType, vgr.Replication, vgr.Ttl).Set(float64(crowded))
|
|
|
|
switch {
|
|
case mustGrow > 0:
|
|
if rp, rpErr := super_block.NewReplicaPlacementFromString(vgr.Replication); rpErr != nil {
|
|
glog.V(0).Infof("failed to parse replica placement %s: %v", vgr.Replication, rpErr)
|
|
} else {
|
|
vgr.WritableVolumeCount = uint32(mustGrow)
|
|
if ms.Topo.AvailableSpaceFor(&topology.VolumeGrowOption{DiskType: types.ToDiskType(vgr.DiskType)}) >= int64(vgr.WritableVolumeCount*uint32(rp.GetCopyCount())) {
|
|
_, err = ms.VolumeGrow(ctx, vgr)
|
|
}
|
|
}
|
|
case lastGrowCount > 0 && writable < int(lastGrowCount*2) && float64(crowded+volumeGrowStepCount) > float64(writable)*topology.VolumeGrowStrategy.Threshold:
|
|
vgr.WritableVolumeCount = volumeGrowStepCount
|
|
_, err = ms.VolumeGrow(ctx, vgr)
|
|
}
|
|
if err != nil {
|
|
glog.V(0).Infof("volume grow request failed: %+v", err)
|
|
}
|
|
writableVolumes := vl.CloneWritableVolumes()
|
|
for dcId, racks := range dcs {
|
|
for _, rackId := range racks {
|
|
if vl.ShouldGrowVolumesByDcAndRack(&writableVolumes, dcId, rackId) {
|
|
vgr.DataCenter = string(dcId)
|
|
vgr.Rack = string(rackId)
|
|
if lastGrowCount > 0 {
|
|
vgr.WritableVolumeCount = uint32(math.Ceil(float64(lastGrowCount) / float64(len(dcs)*len(racks))))
|
|
} else {
|
|
vgr.WritableVolumeCount = volumeGrowStepCount
|
|
}
|
|
|
|
if _, err = ms.VolumeGrow(ctx, vgr); err != nil {
|
|
glog.V(0).Infof("volume grow request for dc:%s rack:%s failed: %+v", dcId, rackId, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
go func() {
|
|
filter := sync.Map{}
|
|
for {
|
|
req, ok := <-ms.volumeGrowthRequestChan
|
|
if !ok {
|
|
break
|
|
}
|
|
|
|
option := req.Option
|
|
vl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType)
|
|
|
|
if !ms.Topo.IsLeader() {
|
|
//discard buffered requests
|
|
time.Sleep(time.Second * 1)
|
|
vl.DoneGrowRequest()
|
|
continue
|
|
}
|
|
|
|
// filter out identical requests being processed
|
|
found := false
|
|
filter.Range(func(k, v interface{}) bool {
|
|
existingReq := k.(*topology.VolumeGrowRequest)
|
|
if existingReq.Equals(req) {
|
|
found = true
|
|
}
|
|
return !found
|
|
})
|
|
|
|
// not atomic but it's okay
|
|
if found || (!req.Force && !vl.ShouldGrowVolumes()) {
|
|
glog.V(4).Infoln("discard volume grow request")
|
|
time.Sleep(time.Millisecond * 211)
|
|
vl.DoneGrowRequest()
|
|
continue
|
|
}
|
|
|
|
filter.Store(req, nil)
|
|
// we have lock called inside vg
|
|
glog.V(0).Infof("volume grow %+v", req)
|
|
go func(req *topology.VolumeGrowRequest, vl *topology.VolumeLayout) {
|
|
ms.DoAutomaticVolumeGrow(req)
|
|
vl.DoneGrowRequest()
|
|
filter.Delete(req)
|
|
}(req, vl)
|
|
}
|
|
}()
|
|
}
|
|
|
|
func (ms *MasterServer) LookupVolume(ctx context.Context, req *master_pb.LookupVolumeRequest) (*master_pb.LookupVolumeResponse, error) {
|
|
|
|
resp := &master_pb.LookupVolumeResponse{}
|
|
volumeLocations := ms.lookupVolumeId(req.VolumeOrFileIds, req.Collection)
|
|
|
|
for _, volumeOrFileId := range req.VolumeOrFileIds {
|
|
vid := volumeOrFileId
|
|
commaSep := strings.Index(vid, ",")
|
|
if commaSep > 0 {
|
|
vid = vid[0:commaSep]
|
|
}
|
|
if result, found := volumeLocations[vid]; found {
|
|
var locations []*master_pb.Location
|
|
for _, loc := range result.Locations {
|
|
locations = append(locations, &master_pb.Location{
|
|
Url: loc.Url,
|
|
PublicUrl: loc.PublicUrl,
|
|
DataCenter: loc.DataCenter,
|
|
GrpcPort: uint32(loc.GrpcPort),
|
|
})
|
|
}
|
|
var auth string
|
|
if commaSep > 0 { // this is a file id
|
|
auth = string(security.GenJwtForVolumeServer(ms.guard.SigningKey, ms.guard.ExpiresAfterSec, result.VolumeOrFileId))
|
|
}
|
|
resp.VolumeIdLocations = append(resp.VolumeIdLocations, &master_pb.LookupVolumeResponse_VolumeIdLocation{
|
|
VolumeOrFileId: result.VolumeOrFileId,
|
|
Locations: locations,
|
|
Error: result.Error,
|
|
Auth: auth,
|
|
})
|
|
}
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
func (ms *MasterServer) Statistics(ctx context.Context, req *master_pb.StatisticsRequest) (*master_pb.StatisticsResponse, error) {
|
|
|
|
if !ms.Topo.IsLeader() {
|
|
return nil, raft.NotLeaderError
|
|
}
|
|
|
|
if req.Replication == "" {
|
|
req.Replication = ms.option.DefaultReplicaPlacement
|
|
}
|
|
replicaPlacement, err := super_block.NewReplicaPlacementFromString(req.Replication)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ttl, err := needle.ReadTTL(req.Ttl)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
volumeLayout := ms.Topo.GetVolumeLayout(req.Collection, replicaPlacement, ttl, types.ToDiskType(req.DiskType))
|
|
stats := volumeLayout.Stats()
|
|
totalSize := ms.Topo.GetDiskUsages().GetMaxVolumeCount() * int64(ms.option.VolumeSizeLimitMB) * 1024 * 1024
|
|
resp := &master_pb.StatisticsResponse{
|
|
TotalSize: uint64(totalSize),
|
|
UsedSize: stats.UsedSize,
|
|
FileCount: stats.FileCount,
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
func (ms *MasterServer) VolumeList(ctx context.Context, req *master_pb.VolumeListRequest) (*master_pb.VolumeListResponse, error) {
|
|
|
|
if !ms.Topo.IsLeader() {
|
|
return nil, raft.NotLeaderError
|
|
}
|
|
|
|
resp := &master_pb.VolumeListResponse{
|
|
TopologyInfo: ms.Topo.ToTopologyInfo(),
|
|
VolumeSizeLimitMb: uint64(ms.option.VolumeSizeLimitMB),
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
func (ms *MasterServer) LookupEcVolume(ctx context.Context, req *master_pb.LookupEcVolumeRequest) (*master_pb.LookupEcVolumeResponse, error) {
|
|
|
|
if !ms.Topo.IsLeader() {
|
|
return nil, raft.NotLeaderError
|
|
}
|
|
|
|
resp := &master_pb.LookupEcVolumeResponse{}
|
|
|
|
ecLocations, found := ms.Topo.LookupEcShards(needle.VolumeId(req.VolumeId))
|
|
|
|
if !found {
|
|
return resp, fmt.Errorf("ec volume %d not found", req.VolumeId)
|
|
}
|
|
|
|
resp.VolumeId = req.VolumeId
|
|
|
|
for shardId, shardLocations := range ecLocations.Locations {
|
|
var locations []*master_pb.Location
|
|
for _, dn := range shardLocations {
|
|
locations = append(locations, &master_pb.Location{
|
|
Url: dn.Url(),
|
|
PublicUrl: dn.PublicUrl,
|
|
DataCenter: dn.GetDataCenterId(),
|
|
})
|
|
}
|
|
resp.ShardIdLocations = append(resp.ShardIdLocations, &master_pb.LookupEcVolumeResponse_EcShardIdLocation{
|
|
ShardId: uint32(shardId),
|
|
Locations: locations,
|
|
})
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
func (ms *MasterServer) VacuumVolume(ctx context.Context, req *master_pb.VacuumVolumeRequest) (*master_pb.VacuumVolumeResponse, error) {
|
|
|
|
if !ms.Topo.IsLeader() {
|
|
return nil, raft.NotLeaderError
|
|
}
|
|
|
|
resp := &master_pb.VacuumVolumeResponse{}
|
|
|
|
ms.Topo.Vacuum(ms.grpcDialOption, float64(req.GarbageThreshold), ms.option.MaxParallelVacuumPerServer, req.VolumeId, req.Collection, ms.preallocateSize, false)
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
func (ms *MasterServer) DisableVacuum(ctx context.Context, req *master_pb.DisableVacuumRequest) (*master_pb.DisableVacuumResponse, error) {
|
|
|
|
ms.Topo.DisableVacuum()
|
|
resp := &master_pb.DisableVacuumResponse{}
|
|
return resp, nil
|
|
}
|
|
|
|
func (ms *MasterServer) EnableVacuum(ctx context.Context, req *master_pb.EnableVacuumRequest) (*master_pb.EnableVacuumResponse, error) {
|
|
|
|
ms.Topo.EnableVacuum()
|
|
resp := &master_pb.EnableVacuumResponse{}
|
|
return resp, nil
|
|
}
|
|
|
|
func (ms *MasterServer) VolumeMarkReadonly(ctx context.Context, req *master_pb.VolumeMarkReadonlyRequest) (*master_pb.VolumeMarkReadonlyResponse, error) {
|
|
|
|
if !ms.Topo.IsLeader() {
|
|
return nil, raft.NotLeaderError
|
|
}
|
|
|
|
resp := &master_pb.VolumeMarkReadonlyResponse{}
|
|
|
|
replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(req.ReplicaPlacement))
|
|
vl := ms.Topo.GetVolumeLayout(req.Collection, replicaPlacement, needle.LoadTTLFromUint32(req.Ttl), types.ToDiskType(req.DiskType))
|
|
dataNodes := ms.Topo.Lookup(req.Collection, needle.VolumeId(req.VolumeId))
|
|
|
|
for _, dn := range dataNodes {
|
|
if dn.Ip == req.Ip && dn.Port == int(req.Port) {
|
|
if req.IsReadonly {
|
|
vl.SetVolumeReadOnly(dn, needle.VolumeId(req.VolumeId))
|
|
} else {
|
|
vl.SetVolumeWritable(dn, needle.VolumeId(req.VolumeId))
|
|
}
|
|
}
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
func (ms *MasterServer) VolumeGrow(ctx context.Context, req *master_pb.VolumeGrowRequest) (*master_pb.VolumeGrowResponse, error) {
|
|
if !ms.Topo.IsLeader() {
|
|
return nil, raft.NotLeaderError
|
|
}
|
|
if req.Replication == "" {
|
|
req.Replication = ms.option.DefaultReplicaPlacement
|
|
}
|
|
replicaPlacement, err := super_block.NewReplicaPlacementFromString(req.Replication)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ttl, err := needle.ReadTTL(req.Ttl)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if req.DataCenter != "" && !ms.Topo.DataCenterExists(req.DataCenter) {
|
|
return nil, fmt.Errorf("data center not exists")
|
|
}
|
|
|
|
ver := needle.GetCurrentVersion()
|
|
volumeGrowOption := topology.VolumeGrowOption{
|
|
Collection: req.Collection,
|
|
ReplicaPlacement: replicaPlacement,
|
|
Ttl: ttl,
|
|
DiskType: types.ToDiskType(req.DiskType),
|
|
Preallocate: ms.preallocateSize,
|
|
DataCenter: req.DataCenter,
|
|
Rack: req.Rack,
|
|
DataNode: req.DataNode,
|
|
MemoryMapMaxSizeMb: req.MemoryMapMaxSizeMb,
|
|
Version: uint32(ver),
|
|
}
|
|
volumeGrowRequest := topology.VolumeGrowRequest{
|
|
Option: &volumeGrowOption,
|
|
Count: req.WritableVolumeCount,
|
|
Force: true,
|
|
Reason: "grpc volume grow",
|
|
}
|
|
replicaCount := int64(req.WritableVolumeCount * uint32(replicaPlacement.GetCopyCount()))
|
|
|
|
if ms.Topo.AvailableSpaceFor(&volumeGrowOption) < replicaCount {
|
|
return nil, fmt.Errorf("only %d volumes left, not enough for %d", ms.Topo.AvailableSpaceFor(&volumeGrowOption), replicaCount)
|
|
}
|
|
|
|
if !ms.Topo.DataCenterExists(volumeGrowOption.DataCenter) {
|
|
err = fmt.Errorf("data center %v not found in topology", volumeGrowOption.DataCenter)
|
|
}
|
|
|
|
ms.DoAutomaticVolumeGrow(&volumeGrowRequest)
|
|
|
|
return &master_pb.VolumeGrowResponse{}, nil
|
|
}
|