Plugin scheduler: sequential iterations with max runtime (#8496)
* pb: add job type max runtime setting * plugin: default job type max runtime * plugin: redesign scheduler loop * admin ui: update scheduler settings * plugin: fix scheduler loop state name * plugin scheduler: restore backlog skip * plugin scheduler: drop legacy detection helper * admin api: require scheduler config body * admin ui: preserve detection interval on save * plugin scheduler: use job context and drain cancels * plugin scheduler: respect detection intervals * plugin scheduler: gate runs and drain queue * ec test: reuse req/resp vars * ec test: add scheduler debug logs * Adjust scheduler idle sleep and initial run delay * Clear pending job queue before scheduler runs * Log next detection time in EC integration test * Improve plugin scheduler debug logging in EC test * Expose scheduler next detection time * Log scheduler next detection time in EC test * Wake scheduler on config or worker updates * Expose scheduler sleep interval in UI * Fix scheduler sleep save value selection * Set scheduler idle sleep default to 613s * Show scheduler next run time in plugin UI --------- Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
@@ -68,6 +68,13 @@ type Plugin struct {
|
||||
adminScriptRunMu sync.RWMutex
|
||||
schedulerDetectionMu sync.Mutex
|
||||
schedulerDetection map[string]*schedulerDetectionInfo
|
||||
schedulerRunMu sync.Mutex
|
||||
schedulerRun map[string]*schedulerRunInfo
|
||||
schedulerLoopMu sync.Mutex
|
||||
schedulerLoopState schedulerLoopState
|
||||
schedulerConfigMu sync.RWMutex
|
||||
schedulerConfig SchedulerConfig
|
||||
schedulerWakeCh chan struct{}
|
||||
|
||||
dedupeMu sync.Mutex
|
||||
recentDedupeByType map[string]map[string]time.Time
|
||||
@@ -164,14 +171,31 @@ func New(options Options) (*Plugin, error) {
|
||||
detectorLeases: make(map[string]string),
|
||||
schedulerExecReservations: make(map[string]int),
|
||||
schedulerDetection: make(map[string]*schedulerDetectionInfo),
|
||||
schedulerRun: make(map[string]*schedulerRunInfo),
|
||||
recentDedupeByType: make(map[string]map[string]time.Time),
|
||||
jobs: make(map[string]*TrackedJob),
|
||||
activities: make([]JobActivity, 0, 256),
|
||||
persistTicker: time.NewTicker(2 * time.Second),
|
||||
schedulerWakeCh: make(chan struct{}, 1),
|
||||
shutdownCh: make(chan struct{}),
|
||||
}
|
||||
plugin.ctx, plugin.ctxCancel = context.WithCancel(context.Background())
|
||||
|
||||
if cfg, err := plugin.store.LoadSchedulerConfig(); err != nil {
|
||||
glog.Warningf("Plugin failed to load scheduler config: %v", err)
|
||||
plugin.schedulerConfig = DefaultSchedulerConfig()
|
||||
} else if cfg == nil {
|
||||
defaults := DefaultSchedulerConfig()
|
||||
plugin.schedulerConfig = defaults
|
||||
if plugin.store.IsConfigured() {
|
||||
if err := plugin.store.SaveSchedulerConfig(&defaults); err != nil {
|
||||
glog.Warningf("Plugin failed to persist scheduler defaults: %v", err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
plugin.schedulerConfig = normalizeSchedulerConfig(*cfg)
|
||||
}
|
||||
|
||||
if err := plugin.loadPersistedMonitorState(); err != nil {
|
||||
glog.Warningf("Plugin failed to load persisted monitoring state: %v", err)
|
||||
}
|
||||
@@ -371,7 +395,11 @@ func (r *Plugin) LoadJobTypeConfig(jobType string) (*plugin_pb.PersistedJobTypeC
|
||||
}
|
||||
|
||||
func (r *Plugin) SaveJobTypeConfig(config *plugin_pb.PersistedJobTypeConfig) error {
|
||||
return r.store.SaveJobTypeConfig(config)
|
||||
if err := r.store.SaveJobTypeConfig(config); err != nil {
|
||||
return err
|
||||
}
|
||||
r.wakeScheduler()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *Plugin) LoadDescriptor(jobType string) (*plugin_pb.JobTypeDescriptor, error) {
|
||||
@@ -390,6 +418,31 @@ func (r *Plugin) BaseDir() string {
|
||||
return r.store.BaseDir()
|
||||
}
|
||||
|
||||
func (r *Plugin) GetSchedulerConfig() SchedulerConfig {
|
||||
if r == nil {
|
||||
return DefaultSchedulerConfig()
|
||||
}
|
||||
r.schedulerConfigMu.RLock()
|
||||
cfg := r.schedulerConfig
|
||||
r.schedulerConfigMu.RUnlock()
|
||||
return normalizeSchedulerConfig(cfg)
|
||||
}
|
||||
|
||||
func (r *Plugin) UpdateSchedulerConfig(cfg SchedulerConfig) (SchedulerConfig, error) {
|
||||
if r == nil {
|
||||
return DefaultSchedulerConfig(), fmt.Errorf("plugin is not initialized")
|
||||
}
|
||||
normalized := normalizeSchedulerConfig(cfg)
|
||||
if err := r.store.SaveSchedulerConfig(&normalized); err != nil {
|
||||
return SchedulerConfig{}, err
|
||||
}
|
||||
r.schedulerConfigMu.Lock()
|
||||
r.schedulerConfig = normalized
|
||||
r.schedulerConfigMu.Unlock()
|
||||
r.wakeScheduler()
|
||||
return normalized, nil
|
||||
}
|
||||
|
||||
func (r *Plugin) acquireAdminLock(reason string) (func(), error) {
|
||||
if r == nil || r.lockManager == nil {
|
||||
return func() {}, nil
|
||||
@@ -912,6 +965,7 @@ func (r *Plugin) handleWorkerMessage(workerID string, message *plugin_pb.WorkerT
|
||||
switch body := message.Body.(type) {
|
||||
case *plugin_pb.WorkerToAdminMessage_Hello:
|
||||
r.registry.UpsertFromHello(body.Hello)
|
||||
r.wakeScheduler()
|
||||
case *plugin_pb.WorkerToAdminMessage_Heartbeat:
|
||||
r.registry.UpdateHeartbeat(workerID, body.Heartbeat)
|
||||
case *plugin_pb.WorkerToAdminMessage_ConfigSchemaResponse:
|
||||
@@ -1011,6 +1065,7 @@ func (r *Plugin) ensureJobTypeConfigFromDescriptor(jobType string, descriptor *p
|
||||
PerWorkerExecutionConcurrency: defaults.PerWorkerExecutionConcurrency,
|
||||
RetryLimit: defaults.RetryLimit,
|
||||
RetryBackoffSeconds: defaults.RetryBackoffSeconds,
|
||||
JobTypeMaxRuntimeSeconds: defaults.JobTypeMaxRuntimeSeconds,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user