add admin script worker (#8491)
* admin: add plugin lock coordination * shell: allow bypassing lock checks * plugin worker: add admin script handler * mini: include admin_script in plugin defaults * admin script UI: drop name and enlarge text * admin script: add default script * admin_script: make run interval configurable * plugin: gate other jobs during admin_script runs * plugin: use last completed admin_script run * admin: backfill plugin config defaults * templ Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * comparable to default version Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * default to run Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * format Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * shell: respect pre-set noLock for fix.replication * shell: add force no-lock mode for admin scripts * volume balance worker already exists Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * admin: expose scheduler status JSON * shell: add sleep command * shell: restrict sleep syntax * Revert "shell: respect pre-set noLock for fix.replication" This reverts commit 2b14e8b82602a740d3a473c085e3b3a14f1ddbb3. * templ Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * fix import Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * less logs Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * Reduce master client logs on canceled contexts * Update mini default job type count --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -11,6 +11,8 @@ import (
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||
@@ -26,6 +28,22 @@ type masterVolumeProvider struct {
|
||||
masterClient *MasterClient
|
||||
}
|
||||
|
||||
func isCanceledErr(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
return true
|
||||
}
|
||||
if statusErr, ok := status.FromError(err); ok {
|
||||
switch statusErr.Code() {
|
||||
case codes.Canceled, codes.DeadlineExceeded:
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// LookupVolumeIds queries the master for volume locations (fallback when cache misses)
|
||||
// Returns partial results with aggregated errors for volumes that failed
|
||||
func (p *masterVolumeProvider) LookupVolumeIds(ctx context.Context, volumeIds []string) (map[string][]Location, error) {
|
||||
@@ -194,8 +212,13 @@ func (mc *MasterClient) tryConnectToMaster(ctx context.Context, master pb.Server
|
||||
|
||||
resp, err := stream.Recv()
|
||||
if err != nil {
|
||||
glog.V(0).Infof("%s.%s masterClient failed to receive from %s: %v", mc.FilerGroup, mc.clientType, master, err)
|
||||
stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToReceive).Inc()
|
||||
canceled := isCanceledErr(err) || ctx.Err() != nil
|
||||
if canceled {
|
||||
glog.V(1).Infof("%s.%s masterClient stream closed from %s: %v", mc.FilerGroup, mc.clientType, master, err)
|
||||
} else {
|
||||
glog.V(0).Infof("%s.%s masterClient failed to receive from %s: %v", mc.FilerGroup, mc.clientType, master, err)
|
||||
stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToReceive).Inc()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -219,8 +242,13 @@ func (mc *MasterClient) tryConnectToMaster(ctx context.Context, master pb.Server
|
||||
for {
|
||||
resp, err := stream.Recv()
|
||||
if err != nil {
|
||||
glog.V(0).Infof("%s.%s masterClient failed to receive from %s: %v", mc.FilerGroup, mc.clientType, master, err)
|
||||
stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToReceive).Inc()
|
||||
canceled := isCanceledErr(err) || ctx.Err() != nil
|
||||
if canceled {
|
||||
glog.V(1).Infof("%s.%s masterClient stream closed from %s: %v", mc.FilerGroup, mc.clientType, master, err)
|
||||
} else {
|
||||
glog.V(0).Infof("%s.%s masterClient failed to receive from %s: %v", mc.FilerGroup, mc.clientType, master, err)
|
||||
stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToReceive).Inc()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -252,12 +280,20 @@ func (mc *MasterClient) tryConnectToMaster(ctx context.Context, master pb.Server
|
||||
mc.OnPeerUpdateLock.RUnlock()
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
glog.V(0).Infof("Connection attempt to master stopped: %v", err)
|
||||
if isCanceledErr(err) {
|
||||
glog.V(1).Infof("Connection attempt to master stopped: %v", err)
|
||||
} else {
|
||||
glog.V(0).Infof("Connection attempt to master stopped: %v", err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
})
|
||||
if gprcErr != nil {
|
||||
if isCanceledErr(gprcErr) || ctx.Err() != nil {
|
||||
glog.V(1).Infof("%s.%s masterClient connection closed to %v: %v", mc.FilerGroup, mc.clientType, master, gprcErr)
|
||||
return nextHintedLeader
|
||||
}
|
||||
stats.MasterClientConnectCounter.WithLabelValues(stats.Failed).Inc()
|
||||
glog.V(1).Infof("%s.%s masterClient failed to connect with master %v: %v", mc.FilerGroup, mc.clientType, master, gprcErr)
|
||||
}
|
||||
@@ -387,7 +423,11 @@ func (mc *MasterClient) KeepConnectedToMaster(ctx context.Context) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
glog.V(0).Infof("Connection to masters stopped: %v", ctx.Err())
|
||||
if isCanceledErr(ctx.Err()) {
|
||||
glog.V(1).Infof("Connection to masters stopped: %v", ctx.Err())
|
||||
} else {
|
||||
glog.V(0).Infof("Connection to masters stopped: %v", ctx.Err())
|
||||
}
|
||||
return
|
||||
default:
|
||||
reconnectStart := time.Now()
|
||||
|
||||
Reference in New Issue
Block a user