add admin script worker (#8491)
* admin: add plugin lock coordination * shell: allow bypassing lock checks * plugin worker: add admin script handler * mini: include admin_script in plugin defaults * admin script UI: drop name and enlarge text * admin script: add default script * admin_script: make run interval configurable * plugin: gate other jobs during admin_script runs * plugin: use last completed admin_script run * admin: backfill plugin config defaults * templ Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * comparable to default version Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * default to run Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * format Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * shell: respect pre-set noLock for fix.replication * shell: add force no-lock mode for admin scripts * volume balance worker already exists Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * admin: expose scheduler status JSON * shell: add sleep command * shell: restrict sleep syntax * Revert "shell: respect pre-set noLock for fix.replication" This reverts commit 2b14e8b82602a740d3a473c085e3b3a14f1ddbb3. * templ Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * fix import Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * less logs Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * Reduce master client logs on canceled contexts * Update mini default job type count --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -24,6 +24,7 @@ const (
|
||||
defaultHeartbeatInterval = 30
|
||||
defaultReconnectDelay = 5
|
||||
defaultPendingSchemaBuffer = 1
|
||||
adminScriptJobType = "admin_script"
|
||||
)
|
||||
|
||||
type Options struct {
|
||||
@@ -32,6 +33,7 @@ type Options struct {
|
||||
SendTimeout time.Duration
|
||||
SchedulerTick time.Duration
|
||||
ClusterContextProvider func(context.Context) (*plugin_pb.ClusterContext, error)
|
||||
LockManager LockManager
|
||||
}
|
||||
|
||||
// JobTypeInfo contains metadata about a plugin job type.
|
||||
@@ -52,6 +54,7 @@ type Plugin struct {
|
||||
|
||||
schedulerTick time.Duration
|
||||
clusterContextProvider func(context.Context) (*plugin_pb.ClusterContext, error)
|
||||
lockManager LockManager
|
||||
|
||||
schedulerMu sync.Mutex
|
||||
nextDetectionAt map[string]time.Time
|
||||
@@ -62,6 +65,9 @@ type Plugin struct {
|
||||
|
||||
schedulerExecMu sync.Mutex
|
||||
schedulerExecReservations map[string]int
|
||||
adminScriptRunMu sync.RWMutex
|
||||
schedulerDetectionMu sync.Mutex
|
||||
schedulerDetection map[string]*schedulerDetectionInfo
|
||||
|
||||
dedupeMu sync.Mutex
|
||||
recentDedupeByType map[string]map[string]time.Time
|
||||
@@ -148,6 +154,7 @@ func New(options Options) (*Plugin, error) {
|
||||
sendTimeout: sendTimeout,
|
||||
schedulerTick: schedulerTick,
|
||||
clusterContextProvider: options.ClusterContextProvider,
|
||||
lockManager: options.LockManager,
|
||||
sessions: make(map[string]*streamSession),
|
||||
pendingSchema: make(map[string]chan *plugin_pb.ConfigSchemaResponse),
|
||||
pendingDetection: make(map[string]*pendingDetectionState),
|
||||
@@ -156,6 +163,7 @@ func New(options Options) (*Plugin, error) {
|
||||
detectionInFlight: make(map[string]bool),
|
||||
detectorLeases: make(map[string]string),
|
||||
schedulerExecReservations: make(map[string]int),
|
||||
schedulerDetection: make(map[string]*schedulerDetectionInfo),
|
||||
recentDedupeByType: make(map[string]map[string]time.Time),
|
||||
jobs: make(map[string]*TrackedJob),
|
||||
activities: make([]JobActivity, 0, 256),
|
||||
@@ -382,6 +390,13 @@ func (r *Plugin) BaseDir() string {
|
||||
return r.store.BaseDir()
|
||||
}
|
||||
|
||||
func (r *Plugin) acquireAdminLock(reason string) (func(), error) {
|
||||
if r == nil || r.lockManager == nil {
|
||||
return func() {}, nil
|
||||
}
|
||||
return r.lockManager.Acquire(reason)
|
||||
}
|
||||
|
||||
// RunDetectionWithReport requests one detector worker and returns proposals with request metadata.
|
||||
func (r *Plugin) RunDetectionWithReport(
|
||||
ctx context.Context,
|
||||
@@ -389,6 +404,9 @@ func (r *Plugin) RunDetectionWithReport(
|
||||
clusterContext *plugin_pb.ClusterContext,
|
||||
maxResults int32,
|
||||
) (*DetectionReport, error) {
|
||||
releaseGate := r.acquireDetectionExecutionGate(jobType, false)
|
||||
defer releaseGate()
|
||||
|
||||
detector, err := r.pickDetector(jobType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -403,7 +421,10 @@ func (r *Plugin) RunDetectionWithReport(
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
lastSuccessfulRun := r.loadLastSuccessfulRun(jobType)
|
||||
lastCompletedRun := r.loadLastSuccessfulRun(jobType)
|
||||
if strings.EqualFold(strings.TrimSpace(jobType), adminScriptJobType) {
|
||||
lastCompletedRun = r.loadLastCompletedRun(jobType)
|
||||
}
|
||||
|
||||
state := &pendingDetectionState{
|
||||
complete: make(chan *plugin_pb.DetectionComplete, 1),
|
||||
@@ -444,7 +465,7 @@ func (r *Plugin) RunDetectionWithReport(
|
||||
AdminConfigValues: adminConfigValues,
|
||||
WorkerConfigValues: workerConfigValues,
|
||||
ClusterContext: clusterContext,
|
||||
LastSuccessfulRun: lastSuccessfulRun,
|
||||
LastSuccessfulRun: lastCompletedRun,
|
||||
MaxResults: maxResults,
|
||||
},
|
||||
},
|
||||
@@ -531,11 +552,14 @@ func (r *Plugin) ExecuteJob(
|
||||
if job == nil {
|
||||
return nil, fmt.Errorf("job is nil")
|
||||
}
|
||||
if strings.TrimSpace(job.JobType) == "" {
|
||||
jobType := strings.TrimSpace(job.JobType)
|
||||
if jobType == "" {
|
||||
return nil, fmt.Errorf("job_type is required")
|
||||
}
|
||||
releaseGate := r.acquireDetectionExecutionGate(jobType, true)
|
||||
defer releaseGate()
|
||||
|
||||
executor, err := r.registry.PickExecutor(job.JobType)
|
||||
executor, err := r.registry.PickExecutor(jobType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -543,6 +567,23 @@ func (r *Plugin) ExecuteJob(
|
||||
return r.executeJobWithExecutor(ctx, executor, job, clusterContext, attempt)
|
||||
}
|
||||
|
||||
func (r *Plugin) acquireDetectionExecutionGate(jobType string, execution bool) func() {
|
||||
normalizedJobType := strings.ToLower(strings.TrimSpace(jobType))
|
||||
if execution && normalizedJobType == adminScriptJobType {
|
||||
r.adminScriptRunMu.Lock()
|
||||
return func() {
|
||||
r.adminScriptRunMu.Unlock()
|
||||
}
|
||||
}
|
||||
if normalizedJobType != adminScriptJobType {
|
||||
r.adminScriptRunMu.RLock()
|
||||
return func() {
|
||||
r.adminScriptRunMu.RUnlock()
|
||||
}
|
||||
}
|
||||
return func() {}
|
||||
}
|
||||
|
||||
func (r *Plugin) executeJobWithExecutor(
|
||||
ctx context.Context,
|
||||
executor *WorkerSession,
|
||||
@@ -1291,6 +1332,41 @@ func (r *Plugin) loadLastSuccessfulRun(jobType string) *timestamppb.Timestamp {
|
||||
return timestamppb.New(latest.UTC())
|
||||
}
|
||||
|
||||
func (r *Plugin) loadLastCompletedRun(jobType string) *timestamppb.Timestamp {
|
||||
history, err := r.store.LoadRunHistory(jobType)
|
||||
if err != nil {
|
||||
glog.Warningf("Plugin failed to load run history for %s: %v", jobType, err)
|
||||
return nil
|
||||
}
|
||||
if history == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var latest time.Time
|
||||
for i := range history.SuccessfulRuns {
|
||||
completedAt := history.SuccessfulRuns[i].CompletedAt
|
||||
if completedAt == nil || completedAt.IsZero() {
|
||||
continue
|
||||
}
|
||||
if latest.IsZero() || completedAt.After(latest) {
|
||||
latest = *completedAt
|
||||
}
|
||||
}
|
||||
for i := range history.ErrorRuns {
|
||||
completedAt := history.ErrorRuns[i].CompletedAt
|
||||
if completedAt == nil || completedAt.IsZero() {
|
||||
continue
|
||||
}
|
||||
if latest.IsZero() || completedAt.After(latest) {
|
||||
latest = *completedAt
|
||||
}
|
||||
}
|
||||
if latest.IsZero() {
|
||||
return nil
|
||||
}
|
||||
return timestamppb.New(latest.UTC())
|
||||
}
|
||||
|
||||
func CloneConfigValueMap(in map[string]*plugin_pb.ConfigValue) map[string]*plugin_pb.ConfigValue {
|
||||
if len(in) == 0 {
|
||||
return map[string]*plugin_pb.ConfigValue{}
|
||||
|
||||
Reference in New Issue
Block a user