Plugin scheduler: sequential iterations with max runtime (#8496)

* pb: add job type max runtime setting

* plugin: default job type max runtime

* plugin: redesign scheduler loop

* admin ui: update scheduler settings

* plugin: fix scheduler loop state name

* plugin scheduler: restore backlog skip

* plugin scheduler: drop legacy detection helper

* admin api: require scheduler config body

* admin ui: preserve detection interval on save

* plugin scheduler: use job context and drain cancels

* plugin scheduler: respect detection intervals

* plugin scheduler: gate runs and drain queue

* ec test: reuse req/resp vars

* ec test: add scheduler debug logs

* Adjust scheduler idle sleep and initial run delay

* Clear pending job queue before scheduler runs

* Log next detection time in EC integration test

* Improve plugin scheduler debug logging in EC test

* Expose scheduler next detection time

* Log scheduler next detection time in EC test

* Wake scheduler on config or worker updates

* Expose scheduler sleep interval in UI

* Fix scheduler sleep save value selection

* Set scheduler idle sleep default to 613s

* Show scheduler next run time in plugin UI

---------

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
Chris Lu
2026-03-03 23:09:49 -08:00
committed by GitHub
parent e1e5b4a8a6
commit 18ccc9b773
19 changed files with 1241 additions and 191 deletions

View File

@@ -68,6 +68,13 @@ type Plugin struct {
adminScriptRunMu sync.RWMutex
schedulerDetectionMu sync.Mutex
schedulerDetection map[string]*schedulerDetectionInfo
schedulerRunMu sync.Mutex
schedulerRun map[string]*schedulerRunInfo
schedulerLoopMu sync.Mutex
schedulerLoopState schedulerLoopState
schedulerConfigMu sync.RWMutex
schedulerConfig SchedulerConfig
schedulerWakeCh chan struct{}
dedupeMu sync.Mutex
recentDedupeByType map[string]map[string]time.Time
@@ -164,14 +171,31 @@ func New(options Options) (*Plugin, error) {
detectorLeases: make(map[string]string),
schedulerExecReservations: make(map[string]int),
schedulerDetection: make(map[string]*schedulerDetectionInfo),
schedulerRun: make(map[string]*schedulerRunInfo),
recentDedupeByType: make(map[string]map[string]time.Time),
jobs: make(map[string]*TrackedJob),
activities: make([]JobActivity, 0, 256),
persistTicker: time.NewTicker(2 * time.Second),
schedulerWakeCh: make(chan struct{}, 1),
shutdownCh: make(chan struct{}),
}
plugin.ctx, plugin.ctxCancel = context.WithCancel(context.Background())
if cfg, err := plugin.store.LoadSchedulerConfig(); err != nil {
glog.Warningf("Plugin failed to load scheduler config: %v", err)
plugin.schedulerConfig = DefaultSchedulerConfig()
} else if cfg == nil {
defaults := DefaultSchedulerConfig()
plugin.schedulerConfig = defaults
if plugin.store.IsConfigured() {
if err := plugin.store.SaveSchedulerConfig(&defaults); err != nil {
glog.Warningf("Plugin failed to persist scheduler defaults: %v", err)
}
}
} else {
plugin.schedulerConfig = normalizeSchedulerConfig(*cfg)
}
if err := plugin.loadPersistedMonitorState(); err != nil {
glog.Warningf("Plugin failed to load persisted monitoring state: %v", err)
}
@@ -371,7 +395,11 @@ func (r *Plugin) LoadJobTypeConfig(jobType string) (*plugin_pb.PersistedJobTypeC
}
func (r *Plugin) SaveJobTypeConfig(config *plugin_pb.PersistedJobTypeConfig) error {
return r.store.SaveJobTypeConfig(config)
if err := r.store.SaveJobTypeConfig(config); err != nil {
return err
}
r.wakeScheduler()
return nil
}
func (r *Plugin) LoadDescriptor(jobType string) (*plugin_pb.JobTypeDescriptor, error) {
@@ -390,6 +418,31 @@ func (r *Plugin) BaseDir() string {
return r.store.BaseDir()
}
func (r *Plugin) GetSchedulerConfig() SchedulerConfig {
if r == nil {
return DefaultSchedulerConfig()
}
r.schedulerConfigMu.RLock()
cfg := r.schedulerConfig
r.schedulerConfigMu.RUnlock()
return normalizeSchedulerConfig(cfg)
}
func (r *Plugin) UpdateSchedulerConfig(cfg SchedulerConfig) (SchedulerConfig, error) {
if r == nil {
return DefaultSchedulerConfig(), fmt.Errorf("plugin is not initialized")
}
normalized := normalizeSchedulerConfig(cfg)
if err := r.store.SaveSchedulerConfig(&normalized); err != nil {
return SchedulerConfig{}, err
}
r.schedulerConfigMu.Lock()
r.schedulerConfig = normalized
r.schedulerConfigMu.Unlock()
r.wakeScheduler()
return normalized, nil
}
func (r *Plugin) acquireAdminLock(reason string) (func(), error) {
if r == nil || r.lockManager == nil {
return func() {}, nil
@@ -912,6 +965,7 @@ func (r *Plugin) handleWorkerMessage(workerID string, message *plugin_pb.WorkerT
switch body := message.Body.(type) {
case *plugin_pb.WorkerToAdminMessage_Hello:
r.registry.UpsertFromHello(body.Hello)
r.wakeScheduler()
case *plugin_pb.WorkerToAdminMessage_Heartbeat:
r.registry.UpdateHeartbeat(workerID, body.Heartbeat)
case *plugin_pb.WorkerToAdminMessage_ConfigSchemaResponse:
@@ -1011,6 +1065,7 @@ func (r *Plugin) ensureJobTypeConfigFromDescriptor(jobType string, descriptor *p
PerWorkerExecutionConcurrency: defaults.PerWorkerExecutionConcurrency,
RetryLimit: defaults.RetryLimit,
RetryBackoffSeconds: defaults.RetryBackoffSeconds,
JobTypeMaxRuntimeSeconds: defaults.JobTypeMaxRuntimeSeconds,
}
}