* chore: remove unreachable dead code across the codebase Remove ~50,000 lines of unreachable code identified by static analysis. Major removals: - weed/filer/redis_lua: entire unused Redis Lua filer store implementation - weed/wdclient/net2, resource_pool: unused connection/resource pool packages - weed/plugin/worker/lifecycle: unused lifecycle plugin worker - weed/s3api: unused S3 policy templates, presigned URL IAM, streaming copy, multipart IAM, key rotation, and various SSE helper functions - weed/mq/kafka: unused partition mapping, compression, schema, and protocol functions - weed/mq/offset: unused SQL storage and migration code - weed/worker: unused registry, task, and monitoring functions - weed/query: unused SQL engine, parquet scanner, and type functions - weed/shell: unused EC proportional rebalance functions - weed/storage/erasure_coding/distribution: unused distribution analysis functions - Individual unreachable functions removed from 150+ files across admin, credential, filer, iam, kms, mount, mq, operation, pb, s3api, server, shell, storage, topology, and util packages * fix(s3): reset shared memory store in IAM test to prevent flaky failure TestLoadIAMManagerFromConfig_EmptyConfigWithFallbackKey was flaky because the MemoryStore credential backend is a singleton registered via init(). Earlier tests that create anonymous identities pollute the shared store, causing LookupAnonymous() to unexpectedly return true. Fix by calling Reset() on the memory store before the test runs. * style: run gofmt on changed files * fix: restore KMS functions used by integration tests * fix(plugin): prevent panic on send to closed worker session channel The Plugin.sendToWorker method could panic with "send on closed channel" when a worker disconnected while a message was being sent. The race was between streamSession.close() closing the outgoing channel and sendToWorker writing to it concurrently. Add a done channel to streamSession that is closed before the outgoing channel, and check it in sendToWorker's select to safely detect closed sessions without panicking.
1508 lines
41 KiB
Go
1508 lines
41 KiB
Go
package plugin
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
|
|
"google.golang.org/protobuf/types/known/timestamppb"
|
|
)
|
|
|
|
var (
|
|
errExecutorAtCapacity = errors.New("executor is at capacity")
|
|
errSchedulerShutdown = errors.New("scheduler shutdown")
|
|
)
|
|
|
|
const (
|
|
defaultSchedulerTick = 5 * time.Second
|
|
defaultScheduledDetectionInterval = 300 * time.Second
|
|
defaultScheduledDetectionTimeout = 45 * time.Second
|
|
defaultScheduledExecutionTimeout = 90 * time.Second
|
|
defaultScheduledJobTypeMaxRuntime = 30 * time.Minute
|
|
defaultScheduledMaxResults int32 = 1000
|
|
defaultScheduledExecutionConcurrency = 1
|
|
defaultScheduledPerWorkerConcurrency = 1
|
|
maxScheduledExecutionConcurrency = 128
|
|
defaultScheduledRetryBackoff = 5 * time.Second
|
|
defaultClusterContextTimeout = 10 * time.Second
|
|
defaultWaitingBacklogFloor = 8
|
|
defaultWaitingBacklogMultiplier = 4
|
|
maxEstimatedRuntimeCap = 8 * time.Hour
|
|
)
|
|
|
|
type schedulerPolicy struct {
|
|
DetectionInterval time.Duration
|
|
DetectionTimeout time.Duration
|
|
ExecutionTimeout time.Duration
|
|
JobTypeMaxRuntime time.Duration
|
|
RetryBackoff time.Duration
|
|
MaxResults int32
|
|
ExecutionConcurrency int
|
|
PerWorkerConcurrency int
|
|
RetryLimit int
|
|
ExecutorReserveBackoff time.Duration
|
|
}
|
|
|
|
// laneSchedulerLoop is the main scheduling goroutine for a single lane.
|
|
// Each lane runs independently with its own timing, lock scope, and wake channel.
|
|
func (r *Plugin) laneSchedulerLoop(ls *schedulerLaneState) {
|
|
defer r.wg.Done()
|
|
for {
|
|
select {
|
|
case <-r.shutdownCh:
|
|
return
|
|
default:
|
|
}
|
|
|
|
hadJobs := r.runLaneSchedulerIteration(ls)
|
|
r.recordLaneIterationComplete(ls, hadJobs)
|
|
|
|
if hadJobs {
|
|
continue
|
|
}
|
|
|
|
r.setLaneLoopState(ls, "", "sleeping")
|
|
idleSleep := LaneIdleSleep(ls.lane)
|
|
if nextRun := r.earliestLaneDetectionAt(ls.lane); !nextRun.IsZero() {
|
|
if until := time.Until(nextRun); until <= 0 {
|
|
idleSleep = 0
|
|
} else if until < idleSleep {
|
|
idleSleep = until
|
|
}
|
|
}
|
|
if idleSleep <= 0 {
|
|
continue
|
|
}
|
|
|
|
timer := time.NewTimer(idleSleep)
|
|
select {
|
|
case <-r.shutdownCh:
|
|
timer.Stop()
|
|
return
|
|
case <-ls.wakeCh:
|
|
if !timer.Stop() {
|
|
<-timer.C
|
|
}
|
|
continue
|
|
case <-timer.C:
|
|
}
|
|
}
|
|
}
|
|
|
|
// runLaneSchedulerIteration runs one scheduling pass for a single lane,
|
|
// processing only the job types assigned to that lane.
|
|
//
|
|
// For lanes that require a lock (e.g. LaneDefault), all job types are
|
|
// processed sequentially under one admin lock because their volume
|
|
// management operations share global state.
|
|
//
|
|
// For lanes that do not require a lock (e.g. LaneIceberg, LaneLifecycle),
|
|
// each job type runs independently in its own goroutine so they do not
|
|
// block each other.
|
|
func (r *Plugin) runLaneSchedulerIteration(ls *schedulerLaneState) bool {
|
|
r.expireStaleJobs(time.Now().UTC())
|
|
|
|
allJobTypes := r.registry.DetectableJobTypes()
|
|
// Filter to only job types belonging to this lane.
|
|
var jobTypes []string
|
|
for _, jt := range allJobTypes {
|
|
if JobTypeLane(jt) == ls.lane {
|
|
jobTypes = append(jobTypes, jt)
|
|
}
|
|
}
|
|
if len(jobTypes) == 0 {
|
|
r.setLaneLoopState(ls, "", "idle")
|
|
return false
|
|
}
|
|
|
|
if LaneRequiresLock(ls.lane) {
|
|
return r.runLaneSchedulerIterationLocked(ls, jobTypes)
|
|
}
|
|
return r.runLaneSchedulerIterationConcurrent(ls, jobTypes)
|
|
}
|
|
|
|
// dueJobType pairs a job type with its resolved scheduling policy.
|
|
type dueJobType struct {
|
|
jobType string
|
|
policy schedulerPolicy
|
|
}
|
|
|
|
// collectDueJobTypes loads policies for all job types in the lane and
|
|
// returns those whose detection interval has elapsed. It also returns
|
|
// the full set of active job type names for later pruning.
|
|
func (r *Plugin) collectDueJobTypes(ls *schedulerLaneState, jobTypes []string) (active map[string]struct{}, due []dueJobType) {
|
|
active = make(map[string]struct{}, len(jobTypes))
|
|
for _, jobType := range jobTypes {
|
|
active[jobType] = struct{}{}
|
|
|
|
policy, enabled, err := r.loadSchedulerPolicy(jobType)
|
|
if err != nil {
|
|
glog.Warningf("Plugin scheduler [%s] failed to load policy for %s: %v", ls.lane, jobType, err)
|
|
continue
|
|
}
|
|
if !enabled {
|
|
r.clearSchedulerJobType(jobType)
|
|
continue
|
|
}
|
|
initialDelay := time.Duration(0)
|
|
if runInfo := r.snapshotSchedulerRun(jobType); runInfo.lastRunStartedAt.IsZero() {
|
|
initialDelay = 5 * time.Second
|
|
}
|
|
if !r.markDetectionDue(jobType, policy.DetectionInterval, initialDelay) {
|
|
continue
|
|
}
|
|
due = append(due, dueJobType{jobType: jobType, policy: policy})
|
|
}
|
|
return active, due
|
|
}
|
|
|
|
// runLaneSchedulerIterationLocked processes job types sequentially under a
|
|
// single admin lock. Used by the default lane where volume management
|
|
// operations must be serialised.
|
|
func (r *Plugin) runLaneSchedulerIterationLocked(ls *schedulerLaneState, jobTypes []string) bool {
|
|
r.setLaneLoopState(ls, "", "waiting_for_lock")
|
|
lockName := fmt.Sprintf("plugin scheduler:%s", ls.lane)
|
|
releaseLock, err := r.acquireAdminLock(lockName)
|
|
if err != nil {
|
|
glog.Warningf("Plugin scheduler [%s] failed to acquire lock: %v", ls.lane, err)
|
|
r.setLaneLoopState(ls, "", "idle")
|
|
return false
|
|
}
|
|
if releaseLock != nil {
|
|
defer releaseLock()
|
|
}
|
|
|
|
active, due := r.collectDueJobTypes(ls, jobTypes)
|
|
hadJobs := false
|
|
for _, w := range due {
|
|
if r.runJobTypeIteration(w.jobType, w.policy) {
|
|
hadJobs = true
|
|
}
|
|
}
|
|
|
|
r.pruneSchedulerState(active)
|
|
r.pruneDetectorLeases(active)
|
|
r.setLaneLoopState(ls, "", "idle")
|
|
return hadJobs
|
|
}
|
|
|
|
// runLaneSchedulerIterationConcurrent processes each job type in its own
|
|
// goroutine so they run independently. Used by lanes (e.g. iceberg,
|
|
// lifecycle) whose job types do not share global state.
|
|
func (r *Plugin) runLaneSchedulerIterationConcurrent(ls *schedulerLaneState, jobTypes []string) bool {
|
|
active, due := r.collectDueJobTypes(ls, jobTypes)
|
|
|
|
r.setLaneLoopState(ls, "", "busy")
|
|
|
|
var hadJobs atomic.Bool
|
|
var wg sync.WaitGroup
|
|
for _, w := range due {
|
|
wg.Add(1)
|
|
go func(jobType string, policy schedulerPolicy) {
|
|
defer wg.Done()
|
|
if r.runJobTypeIteration(jobType, policy) {
|
|
hadJobs.Store(true)
|
|
}
|
|
}(w.jobType, w.policy)
|
|
}
|
|
wg.Wait()
|
|
|
|
r.pruneSchedulerState(active)
|
|
r.pruneDetectorLeases(active)
|
|
r.setLaneLoopState(ls, "", "idle")
|
|
return hadJobs.Load()
|
|
}
|
|
|
|
// wakeAllLanes wakes all lane scheduler goroutines.
|
|
func (r *Plugin) wakeAllLanes() {
|
|
if r == nil {
|
|
return
|
|
}
|
|
for _, ls := range r.lanes {
|
|
select {
|
|
case ls.wakeCh <- struct{}{}:
|
|
default:
|
|
}
|
|
}
|
|
}
|
|
|
|
// wakeScheduler wakes the lane that owns the given job type, or all lanes
|
|
// if no job type is specified. Kept for backward compatibility.
|
|
func (r *Plugin) wakeScheduler() {
|
|
r.wakeAllLanes()
|
|
}
|
|
|
|
func (r *Plugin) runJobTypeIteration(jobType string, policy schedulerPolicy) bool {
|
|
r.recordSchedulerRunStart(jobType)
|
|
r.clearWaitingJobQueue(jobType)
|
|
r.setSchedulerLoopStateForJobType(jobType, "detecting")
|
|
r.markJobTypeInFlight(jobType)
|
|
defer r.finishDetection(jobType)
|
|
|
|
start := time.Now().UTC()
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: "scheduled detection started",
|
|
Stage: "detecting",
|
|
OccurredAt: timeToPtr(start),
|
|
})
|
|
|
|
if skip, waitingCount, waitingThreshold := r.shouldSkipDetectionForWaitingJobs(jobType, policy); skip {
|
|
r.recordSchedulerDetectionSkip(jobType, fmt.Sprintf("waiting backlog %d reached threshold %d", waitingCount, waitingThreshold))
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: fmt.Sprintf("scheduled detection skipped: waiting backlog %d reached threshold %d", waitingCount, waitingThreshold),
|
|
Stage: "skipped_waiting_backlog",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
r.recordSchedulerRunComplete(jobType, "skipped")
|
|
return false
|
|
}
|
|
|
|
maxRuntime := policy.JobTypeMaxRuntime
|
|
if maxRuntime <= 0 {
|
|
maxRuntime = defaultScheduledJobTypeMaxRuntime
|
|
}
|
|
jobCtx, cancel := context.WithTimeout(context.Background(), maxRuntime)
|
|
defer cancel()
|
|
|
|
clusterContext, err := r.loadSchedulerClusterContext(jobCtx)
|
|
if err != nil {
|
|
r.recordSchedulerDetectionError(jobType, err)
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: fmt.Sprintf("scheduled detection aborted: %v", err),
|
|
Stage: "failed",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
r.recordSchedulerRunComplete(jobType, "error")
|
|
return false
|
|
}
|
|
|
|
detectionTimeout := policy.DetectionTimeout
|
|
remaining := time.Until(start.Add(maxRuntime))
|
|
if remaining <= 0 {
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: "scheduled run timed out before detection",
|
|
Stage: "timeout",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
r.recordSchedulerRunComplete(jobType, "timeout")
|
|
return false
|
|
}
|
|
if detectionTimeout <= 0 {
|
|
detectionTimeout = defaultScheduledDetectionTimeout
|
|
}
|
|
if detectionTimeout > remaining {
|
|
detectionTimeout = remaining
|
|
}
|
|
|
|
detectCtx, cancelDetect := context.WithTimeout(jobCtx, detectionTimeout)
|
|
proposals, err := r.RunDetection(detectCtx, jobType, clusterContext, policy.MaxResults)
|
|
cancelDetect()
|
|
if err != nil {
|
|
r.recordSchedulerDetectionError(jobType, err)
|
|
stage := "failed"
|
|
status := "error"
|
|
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
|
|
stage = "timeout"
|
|
status = "timeout"
|
|
}
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: fmt.Sprintf("scheduled detection failed: %v", err),
|
|
Stage: stage,
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
r.recordSchedulerRunComplete(jobType, status)
|
|
return false
|
|
}
|
|
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: fmt.Sprintf("scheduled detection completed: %d proposal(s)", len(proposals)),
|
|
Stage: "detected",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
r.recordSchedulerDetectionSuccess(jobType, len(proposals))
|
|
|
|
detected := len(proposals) > 0
|
|
|
|
filteredByActive, skippedActive := r.filterProposalsWithActiveJobs(jobType, proposals)
|
|
if skippedActive > 0 {
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: fmt.Sprintf("scheduled detection skipped %d proposal(s) due to active assigned/running jobs", skippedActive),
|
|
Stage: "deduped_active_jobs",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
}
|
|
|
|
if len(filteredByActive) == 0 {
|
|
r.recordSchedulerRunComplete(jobType, "success")
|
|
return detected
|
|
}
|
|
|
|
filtered := r.filterScheduledProposals(filteredByActive)
|
|
if len(filtered) != len(filteredByActive) {
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: fmt.Sprintf("scheduled detection deduped %d proposal(s) within this run", len(filteredByActive)-len(filtered)),
|
|
Stage: "deduped",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
}
|
|
|
|
if len(filtered) == 0 {
|
|
r.recordSchedulerRunComplete(jobType, "success")
|
|
return detected
|
|
}
|
|
|
|
r.setSchedulerLoopStateForJobType(jobType, "executing")
|
|
|
|
// Scan proposals for the maximum estimated_runtime_seconds so the
|
|
// execution phase gets enough time for large jobs (e.g. vacuum on
|
|
// big volumes). If any proposal needs more time than the remaining
|
|
// JobTypeMaxRuntime, extend the execution context accordingly.
|
|
var maxEstimatedRuntime time.Duration
|
|
for _, p := range filtered {
|
|
if p.Parameters != nil {
|
|
if est, ok := p.Parameters["estimated_runtime_seconds"]; ok {
|
|
if v := est.GetInt64Value(); v > 0 {
|
|
if d := time.Duration(v) * time.Second; d > maxEstimatedRuntime {
|
|
maxEstimatedRuntime = d
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if maxEstimatedRuntime > maxEstimatedRuntimeCap {
|
|
maxEstimatedRuntime = maxEstimatedRuntimeCap
|
|
}
|
|
|
|
remaining = time.Until(start.Add(maxRuntime))
|
|
if remaining <= 0 {
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: "scheduled execution skipped: job type max runtime reached",
|
|
Stage: "timeout",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
r.recordSchedulerRunComplete(jobType, "timeout")
|
|
return detected
|
|
}
|
|
|
|
// If the longest estimated job exceeds the remaining JobTypeMaxRuntime,
|
|
// create a new execution context with enough headroom instead of using
|
|
// jobCtx which would cancel too early.
|
|
execCtx := jobCtx
|
|
execCancel := context.CancelFunc(func() {})
|
|
if maxEstimatedRuntime > 0 && maxEstimatedRuntime > remaining {
|
|
execCtx, execCancel = context.WithTimeout(context.Background(), maxEstimatedRuntime)
|
|
remaining = maxEstimatedRuntime
|
|
}
|
|
defer execCancel()
|
|
|
|
execPolicy := policy
|
|
if execPolicy.ExecutionTimeout <= 0 {
|
|
execPolicy.ExecutionTimeout = defaultScheduledExecutionTimeout
|
|
}
|
|
if execPolicy.ExecutionTimeout > remaining {
|
|
execPolicy.ExecutionTimeout = remaining
|
|
}
|
|
|
|
successCount, errorCount, canceledCount := r.dispatchScheduledProposals(execCtx, jobType, filtered, clusterContext, execPolicy)
|
|
|
|
status := "success"
|
|
if execCtx.Err() != nil {
|
|
status = "timeout"
|
|
} else if errorCount > 0 || canceledCount > 0 {
|
|
status = "error"
|
|
}
|
|
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: fmt.Sprintf("scheduled execution finished: success=%d error=%d canceled=%d", successCount, errorCount, canceledCount),
|
|
Stage: "executed",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
r.recordSchedulerRunComplete(jobType, status)
|
|
return detected
|
|
}
|
|
|
|
func (r *Plugin) loadSchedulerPolicy(jobType string) (schedulerPolicy, bool, error) {
|
|
cfg, err := r.store.LoadJobTypeConfig(jobType)
|
|
if err != nil {
|
|
return schedulerPolicy{}, false, err
|
|
}
|
|
descriptor, err := r.store.LoadDescriptor(jobType)
|
|
if err != nil {
|
|
return schedulerPolicy{}, false, err
|
|
}
|
|
|
|
adminRuntime := deriveSchedulerAdminRuntime(cfg, descriptor)
|
|
if adminRuntime == nil {
|
|
return schedulerPolicy{}, false, nil
|
|
}
|
|
if !adminRuntime.Enabled {
|
|
return schedulerPolicy{}, false, nil
|
|
}
|
|
|
|
policy := schedulerPolicy{
|
|
DetectionInterval: durationFromSeconds(adminRuntime.DetectionIntervalSeconds, defaultScheduledDetectionInterval),
|
|
DetectionTimeout: durationFromSeconds(adminRuntime.DetectionTimeoutSeconds, defaultScheduledDetectionTimeout),
|
|
ExecutionTimeout: defaultScheduledExecutionTimeout,
|
|
JobTypeMaxRuntime: durationFromSeconds(adminRuntime.JobTypeMaxRuntimeSeconds, defaultScheduledJobTypeMaxRuntime),
|
|
RetryBackoff: durationFromSeconds(adminRuntime.RetryBackoffSeconds, defaultScheduledRetryBackoff),
|
|
MaxResults: adminRuntime.MaxJobsPerDetection,
|
|
ExecutionConcurrency: int(adminRuntime.GlobalExecutionConcurrency),
|
|
PerWorkerConcurrency: int(adminRuntime.PerWorkerExecutionConcurrency),
|
|
RetryLimit: int(adminRuntime.RetryLimit),
|
|
ExecutorReserveBackoff: 200 * time.Millisecond,
|
|
}
|
|
|
|
if policy.DetectionInterval < r.schedulerTick {
|
|
policy.DetectionInterval = r.schedulerTick
|
|
}
|
|
if policy.MaxResults <= 0 {
|
|
policy.MaxResults = defaultScheduledMaxResults
|
|
}
|
|
if policy.ExecutionConcurrency <= 0 {
|
|
policy.ExecutionConcurrency = defaultScheduledExecutionConcurrency
|
|
}
|
|
if policy.ExecutionConcurrency > maxScheduledExecutionConcurrency {
|
|
policy.ExecutionConcurrency = maxScheduledExecutionConcurrency
|
|
}
|
|
if policy.PerWorkerConcurrency <= 0 {
|
|
policy.PerWorkerConcurrency = defaultScheduledPerWorkerConcurrency
|
|
}
|
|
if policy.PerWorkerConcurrency > policy.ExecutionConcurrency {
|
|
policy.PerWorkerConcurrency = policy.ExecutionConcurrency
|
|
}
|
|
if policy.RetryLimit < 0 {
|
|
policy.RetryLimit = 0
|
|
}
|
|
if policy.JobTypeMaxRuntime <= 0 {
|
|
policy.JobTypeMaxRuntime = defaultScheduledJobTypeMaxRuntime
|
|
}
|
|
|
|
// Plugin protocol currently has only detection timeout in admin settings.
|
|
execTimeout := time.Duration(adminRuntime.DetectionTimeoutSeconds*2) * time.Second
|
|
if execTimeout < defaultScheduledExecutionTimeout {
|
|
execTimeout = defaultScheduledExecutionTimeout
|
|
}
|
|
policy.ExecutionTimeout = execTimeout
|
|
|
|
return policy, true, nil
|
|
}
|
|
|
|
func (r *Plugin) ListSchedulerStates() ([]SchedulerJobTypeState, error) {
|
|
jobTypes, err := r.ListKnownJobTypes()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
r.schedulerMu.Lock()
|
|
nextDetectionAt := make(map[string]time.Time, len(r.nextDetectionAt))
|
|
for jobType, nextRun := range r.nextDetectionAt {
|
|
nextDetectionAt[jobType] = nextRun
|
|
}
|
|
detectionInFlight := make(map[string]bool, len(r.detectionInFlight))
|
|
for jobType, inFlight := range r.detectionInFlight {
|
|
detectionInFlight[jobType] = inFlight
|
|
}
|
|
r.schedulerMu.Unlock()
|
|
|
|
states := make([]SchedulerJobTypeState, 0, len(jobTypes))
|
|
for _, jobTypeInfo := range jobTypes {
|
|
jobType := jobTypeInfo.JobType
|
|
state := SchedulerJobTypeState{
|
|
JobType: jobType,
|
|
Lane: string(JobTypeLane(jobType)),
|
|
DetectionInFlight: detectionInFlight[jobType],
|
|
}
|
|
|
|
if nextRun, ok := nextDetectionAt[jobType]; ok && !nextRun.IsZero() {
|
|
nextRunUTC := nextRun.UTC()
|
|
state.NextDetectionAt = &nextRunUTC
|
|
}
|
|
|
|
policy, enabled, loadErr := r.loadSchedulerPolicy(jobType)
|
|
|
|
if loadErr != nil {
|
|
state.PolicyError = loadErr.Error()
|
|
} else {
|
|
state.Enabled = enabled
|
|
if enabled {
|
|
state.DetectionIntervalSeconds = secondsFromDuration(policy.DetectionInterval)
|
|
state.DetectionTimeoutSeconds = secondsFromDuration(policy.DetectionTimeout)
|
|
state.ExecutionTimeoutSeconds = secondsFromDuration(policy.ExecutionTimeout)
|
|
state.JobTypeMaxRuntimeSeconds = secondsFromDuration(policy.JobTypeMaxRuntime)
|
|
state.MaxJobsPerDetection = policy.MaxResults
|
|
state.GlobalExecutionConcurrency = policy.ExecutionConcurrency
|
|
state.PerWorkerExecutionConcurrency = policy.PerWorkerConcurrency
|
|
state.RetryLimit = policy.RetryLimit
|
|
state.RetryBackoffSeconds = secondsFromDuration(policy.RetryBackoff)
|
|
}
|
|
}
|
|
|
|
runInfo := r.snapshotSchedulerRun(jobType)
|
|
if !runInfo.lastRunStartedAt.IsZero() {
|
|
at := runInfo.lastRunStartedAt
|
|
state.LastRunStartedAt = &at
|
|
}
|
|
if !runInfo.lastRunCompletedAt.IsZero() {
|
|
at := runInfo.lastRunCompletedAt
|
|
state.LastRunCompletedAt = &at
|
|
}
|
|
if runInfo.lastRunStatus != "" {
|
|
state.LastRunStatus = runInfo.lastRunStatus
|
|
}
|
|
|
|
leasedWorkerID := r.getDetectorLease(jobType)
|
|
if leasedWorkerID != "" {
|
|
state.DetectorWorkerID = leasedWorkerID
|
|
if worker, ok := r.registry.Get(leasedWorkerID); ok {
|
|
if capability := worker.Capabilities[jobType]; capability != nil && capability.CanDetect {
|
|
state.DetectorAvailable = true
|
|
}
|
|
}
|
|
}
|
|
if state.DetectorWorkerID == "" {
|
|
detector, detectorErr := r.registry.PickDetector(jobType)
|
|
if detectorErr == nil && detector != nil {
|
|
state.DetectorAvailable = true
|
|
state.DetectorWorkerID = detector.WorkerID
|
|
}
|
|
}
|
|
|
|
executors, executorErr := r.registry.ListExecutors(jobType)
|
|
if executorErr == nil {
|
|
state.ExecutorWorkerCount = len(executors)
|
|
}
|
|
|
|
states = append(states, state)
|
|
}
|
|
|
|
return states, nil
|
|
}
|
|
|
|
func deriveSchedulerAdminRuntime(
|
|
cfg *plugin_pb.PersistedJobTypeConfig,
|
|
descriptor *plugin_pb.JobTypeDescriptor,
|
|
) *plugin_pb.AdminRuntimeConfig {
|
|
if cfg != nil && cfg.AdminRuntime != nil {
|
|
adminConfig := *cfg.AdminRuntime
|
|
return &adminConfig
|
|
}
|
|
|
|
if descriptor == nil || descriptor.AdminRuntimeDefaults == nil {
|
|
return nil
|
|
}
|
|
|
|
defaults := descriptor.AdminRuntimeDefaults
|
|
return &plugin_pb.AdminRuntimeConfig{
|
|
Enabled: defaults.Enabled,
|
|
DetectionIntervalSeconds: defaults.DetectionIntervalSeconds,
|
|
DetectionTimeoutSeconds: defaults.DetectionTimeoutSeconds,
|
|
MaxJobsPerDetection: defaults.MaxJobsPerDetection,
|
|
GlobalExecutionConcurrency: defaults.GlobalExecutionConcurrency,
|
|
PerWorkerExecutionConcurrency: defaults.PerWorkerExecutionConcurrency,
|
|
RetryLimit: defaults.RetryLimit,
|
|
RetryBackoffSeconds: defaults.RetryBackoffSeconds,
|
|
JobTypeMaxRuntimeSeconds: defaults.JobTypeMaxRuntimeSeconds,
|
|
}
|
|
}
|
|
|
|
func (r *Plugin) markDetectionDue(jobType string, interval, initialDelay time.Duration) bool {
|
|
now := time.Now().UTC()
|
|
|
|
r.schedulerMu.Lock()
|
|
defer r.schedulerMu.Unlock()
|
|
|
|
if r.detectionInFlight[jobType] {
|
|
return false
|
|
}
|
|
|
|
nextRun, exists := r.nextDetectionAt[jobType]
|
|
if exists && now.Before(nextRun) {
|
|
return false
|
|
}
|
|
if !exists && initialDelay > 0 {
|
|
r.nextDetectionAt[jobType] = now.Add(initialDelay)
|
|
return false
|
|
}
|
|
|
|
r.nextDetectionAt[jobType] = now.Add(interval)
|
|
r.detectionInFlight[jobType] = true
|
|
return true
|
|
}
|
|
|
|
// earliestLaneDetectionAt returns the earliest next detection time among
|
|
// job types that belong to the given lane.
|
|
func (r *Plugin) earliestLaneDetectionAt(lane SchedulerLane) time.Time {
|
|
if r == nil {
|
|
return time.Time{}
|
|
}
|
|
|
|
r.schedulerMu.Lock()
|
|
defer r.schedulerMu.Unlock()
|
|
|
|
var earliest time.Time
|
|
for jobType, nextRun := range r.nextDetectionAt {
|
|
if JobTypeLane(jobType) != lane {
|
|
continue
|
|
}
|
|
if nextRun.IsZero() {
|
|
continue
|
|
}
|
|
if earliest.IsZero() || nextRun.Before(earliest) {
|
|
earliest = nextRun
|
|
}
|
|
}
|
|
|
|
return earliest
|
|
}
|
|
|
|
// earliestNextDetectionAt returns the earliest next detection time across
|
|
// all job types regardless of lane. Kept for backward compatibility and
|
|
// the global scheduler status API.
|
|
func (r *Plugin) earliestNextDetectionAt() time.Time {
|
|
if r == nil {
|
|
return time.Time{}
|
|
}
|
|
|
|
r.schedulerMu.Lock()
|
|
defer r.schedulerMu.Unlock()
|
|
|
|
var earliest time.Time
|
|
for _, nextRun := range r.nextDetectionAt {
|
|
if nextRun.IsZero() {
|
|
continue
|
|
}
|
|
if earliest.IsZero() || nextRun.Before(earliest) {
|
|
earliest = nextRun
|
|
}
|
|
}
|
|
|
|
return earliest
|
|
}
|
|
|
|
func (r *Plugin) markJobTypeInFlight(jobType string) {
|
|
r.schedulerMu.Lock()
|
|
r.detectionInFlight[jobType] = true
|
|
r.schedulerMu.Unlock()
|
|
}
|
|
|
|
func (r *Plugin) finishDetection(jobType string) {
|
|
r.schedulerMu.Lock()
|
|
delete(r.detectionInFlight, jobType)
|
|
r.schedulerMu.Unlock()
|
|
}
|
|
|
|
func (r *Plugin) pruneSchedulerState(activeJobTypes map[string]struct{}) {
|
|
r.schedulerMu.Lock()
|
|
defer r.schedulerMu.Unlock()
|
|
|
|
for jobType := range r.nextDetectionAt {
|
|
if _, ok := activeJobTypes[jobType]; !ok {
|
|
delete(r.nextDetectionAt, jobType)
|
|
delete(r.detectionInFlight, jobType)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Plugin) clearSchedulerJobType(jobType string) {
|
|
r.schedulerMu.Lock()
|
|
delete(r.nextDetectionAt, jobType)
|
|
delete(r.detectionInFlight, jobType)
|
|
r.schedulerMu.Unlock()
|
|
r.clearDetectorLease(jobType, "")
|
|
}
|
|
|
|
func (r *Plugin) pruneDetectorLeases(activeJobTypes map[string]struct{}) {
|
|
r.detectorLeaseMu.Lock()
|
|
defer r.detectorLeaseMu.Unlock()
|
|
|
|
for jobType := range r.detectorLeases {
|
|
if _, ok := activeJobTypes[jobType]; !ok {
|
|
delete(r.detectorLeases, jobType)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Plugin) loadSchedulerClusterContext(ctx context.Context) (*plugin_pb.ClusterContext, error) {
|
|
if r.clusterContextProvider == nil {
|
|
return nil, fmt.Errorf("cluster context provider is not configured")
|
|
}
|
|
|
|
if ctx == nil {
|
|
ctx = context.Background()
|
|
}
|
|
clusterCtx, cancel := context.WithTimeout(ctx, defaultClusterContextTimeout)
|
|
defer cancel()
|
|
|
|
clusterContext, err := r.clusterContextProvider(clusterCtx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if clusterContext == nil {
|
|
return nil, fmt.Errorf("cluster context provider returned nil")
|
|
}
|
|
return clusterContext, nil
|
|
}
|
|
|
|
func (r *Plugin) dispatchScheduledProposals(
|
|
ctx context.Context,
|
|
jobType string,
|
|
proposals []*plugin_pb.JobProposal,
|
|
clusterContext *plugin_pb.ClusterContext,
|
|
policy schedulerPolicy,
|
|
) (int, int, int) {
|
|
if ctx == nil {
|
|
ctx = context.Background()
|
|
}
|
|
|
|
jobQueue := make(chan *plugin_pb.JobSpec, len(proposals))
|
|
for index, proposal := range proposals {
|
|
job := buildScheduledJobSpec(jobType, proposal, index)
|
|
r.trackExecutionQueued(job)
|
|
select {
|
|
case <-r.shutdownCh:
|
|
close(jobQueue)
|
|
return 0, 0, 0
|
|
default:
|
|
jobQueue <- job
|
|
}
|
|
}
|
|
close(jobQueue)
|
|
|
|
var wg sync.WaitGroup
|
|
var statsMu sync.Mutex
|
|
successCount := 0
|
|
errorCount := 0
|
|
canceledCount := 0
|
|
|
|
workerCount := policy.ExecutionConcurrency
|
|
if workerCount < 1 {
|
|
workerCount = 1
|
|
}
|
|
|
|
for i := 0; i < workerCount; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
|
|
jobLoop:
|
|
for job := range jobQueue {
|
|
select {
|
|
case <-r.shutdownCh:
|
|
return
|
|
default:
|
|
}
|
|
|
|
if ctx.Err() != nil {
|
|
r.cancelQueuedJob(job, ctx.Err())
|
|
statsMu.Lock()
|
|
canceledCount++
|
|
statsMu.Unlock()
|
|
continue
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case <-r.shutdownCh:
|
|
return
|
|
default:
|
|
}
|
|
if ctx.Err() != nil {
|
|
r.cancelQueuedJob(job, ctx.Err())
|
|
statsMu.Lock()
|
|
canceledCount++
|
|
statsMu.Unlock()
|
|
continue jobLoop
|
|
}
|
|
|
|
executor, release, reserveErr := r.reserveScheduledExecutor(ctx, jobType, policy)
|
|
if reserveErr != nil {
|
|
if ctx.Err() != nil {
|
|
r.cancelQueuedJob(job, ctx.Err())
|
|
statsMu.Lock()
|
|
canceledCount++
|
|
statsMu.Unlock()
|
|
continue jobLoop
|
|
}
|
|
statsMu.Lock()
|
|
errorCount++
|
|
statsMu.Unlock()
|
|
r.appendActivity(JobActivity{
|
|
JobType: jobType,
|
|
Source: "admin_scheduler",
|
|
Message: fmt.Sprintf("scheduled execution reservation failed: %v", reserveErr),
|
|
Stage: "failed",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
break
|
|
}
|
|
|
|
err := r.executeScheduledJobWithExecutor(ctx, executor, job, clusterContext, policy)
|
|
release()
|
|
if errors.Is(err, errExecutorAtCapacity) {
|
|
r.trackExecutionQueued(job)
|
|
if !waitForShutdownOrTimerWithContext(r.shutdownCh, ctx, policy.ExecutorReserveBackoff) {
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
if err != nil {
|
|
if ctx.Err() != nil || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
|
|
r.cancelQueuedJob(job, err)
|
|
statsMu.Lock()
|
|
canceledCount++
|
|
statsMu.Unlock()
|
|
continue jobLoop
|
|
}
|
|
statsMu.Lock()
|
|
errorCount++
|
|
statsMu.Unlock()
|
|
r.appendActivity(JobActivity{
|
|
JobID: job.JobId,
|
|
JobType: job.JobType,
|
|
Source: "admin_scheduler",
|
|
Message: fmt.Sprintf("scheduled execution failed: %v", err),
|
|
Stage: "failed",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
break
|
|
}
|
|
|
|
statsMu.Lock()
|
|
successCount++
|
|
statsMu.Unlock()
|
|
break
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
drainErr := ctx.Err()
|
|
if drainErr == nil {
|
|
drainErr = errSchedulerShutdown
|
|
}
|
|
for job := range jobQueue {
|
|
r.cancelQueuedJob(job, drainErr)
|
|
canceledCount++
|
|
}
|
|
|
|
return successCount, errorCount, canceledCount
|
|
}
|
|
|
|
func (r *Plugin) reserveScheduledExecutor(
|
|
ctx context.Context,
|
|
jobType string,
|
|
policy schedulerPolicy,
|
|
) (*WorkerSession, func(), error) {
|
|
if ctx == nil {
|
|
ctx = context.Background()
|
|
}
|
|
|
|
deadline := time.Now().Add(policy.ExecutionTimeout)
|
|
if policy.ExecutionTimeout <= 0 {
|
|
deadline = time.Now().Add(10 * time.Minute) // Default cap
|
|
}
|
|
if ctxDeadline, ok := ctx.Deadline(); ok && ctxDeadline.Before(deadline) {
|
|
deadline = ctxDeadline
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case <-r.shutdownCh:
|
|
return nil, nil, fmt.Errorf("plugin is shutting down")
|
|
default:
|
|
}
|
|
if ctx.Err() != nil {
|
|
return nil, nil, ctx.Err()
|
|
}
|
|
|
|
if time.Now().After(deadline) {
|
|
return nil, nil, fmt.Errorf("timed out waiting for executor capacity for %s", jobType)
|
|
}
|
|
|
|
executors, err := r.registry.ListExecutors(jobType)
|
|
if err != nil {
|
|
if !waitForShutdownOrTimerWithContext(r.shutdownCh, ctx, policy.ExecutorReserveBackoff) {
|
|
if ctx.Err() != nil {
|
|
return nil, nil, ctx.Err()
|
|
}
|
|
return nil, nil, fmt.Errorf("plugin is shutting down")
|
|
}
|
|
continue
|
|
}
|
|
|
|
for _, executor := range executors {
|
|
release, ok := r.tryReserveExecutorCapacity(executor, jobType, policy)
|
|
if !ok {
|
|
continue
|
|
}
|
|
return executor, release, nil
|
|
}
|
|
|
|
if !waitForShutdownOrTimerWithContext(r.shutdownCh, ctx, policy.ExecutorReserveBackoff) {
|
|
if ctx.Err() != nil {
|
|
return nil, nil, ctx.Err()
|
|
}
|
|
return nil, nil, fmt.Errorf("plugin is shutting down")
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Plugin) tryReserveExecutorCapacity(
|
|
executor *WorkerSession,
|
|
jobType string,
|
|
policy schedulerPolicy,
|
|
) (func(), bool) {
|
|
return r.tryReserveExecutorCapacityForLane(executor, jobType, policy, JobTypeLane(jobType))
|
|
}
|
|
|
|
// tryReserveExecutorCapacityForLane reserves an execution slot on the
|
|
// per-lane reservation pool so that lanes cannot starve each other.
|
|
func (r *Plugin) tryReserveExecutorCapacityForLane(
|
|
executor *WorkerSession,
|
|
jobType string,
|
|
policy schedulerPolicy,
|
|
lane SchedulerLane,
|
|
) (func(), bool) {
|
|
if executor == nil || strings.TrimSpace(executor.WorkerID) == "" {
|
|
return nil, false
|
|
}
|
|
|
|
limit := schedulerWorkerExecutionLimit(executor, jobType, policy)
|
|
if limit <= 0 {
|
|
return nil, false
|
|
}
|
|
heartbeatUsed := 0
|
|
if executor.Heartbeat != nil && executor.Heartbeat.ExecutionSlotsUsed > 0 {
|
|
heartbeatUsed = int(executor.Heartbeat.ExecutionSlotsUsed)
|
|
}
|
|
|
|
workerID := strings.TrimSpace(executor.WorkerID)
|
|
|
|
ls := r.lanes[lane]
|
|
if ls == nil {
|
|
// Fallback to global reservations if lane state is missing.
|
|
r.schedulerExecMu.Lock()
|
|
reserved := r.schedulerExecReservations[workerID]
|
|
if heartbeatUsed+reserved >= limit {
|
|
r.schedulerExecMu.Unlock()
|
|
return nil, false
|
|
}
|
|
r.schedulerExecReservations[workerID] = reserved + 1
|
|
r.schedulerExecMu.Unlock()
|
|
release := func() { r.releaseExecutorCapacity(workerID) }
|
|
return release, true
|
|
}
|
|
|
|
ls.execMu.Lock()
|
|
reserved := ls.execRes[workerID]
|
|
if heartbeatUsed+reserved >= limit {
|
|
ls.execMu.Unlock()
|
|
return nil, false
|
|
}
|
|
ls.execRes[workerID] = reserved + 1
|
|
ls.execMu.Unlock()
|
|
|
|
release := func() {
|
|
r.releaseExecutorCapacityForLane(workerID, lane)
|
|
}
|
|
return release, true
|
|
}
|
|
|
|
// releaseExecutorCapacityForLane releases a reservation from the per-lane pool.
|
|
func (r *Plugin) releaseExecutorCapacityForLane(workerID string, lane SchedulerLane) {
|
|
workerID = strings.TrimSpace(workerID)
|
|
if workerID == "" {
|
|
return
|
|
}
|
|
|
|
ls := r.lanes[lane]
|
|
if ls == nil {
|
|
r.releaseExecutorCapacity(workerID)
|
|
return
|
|
}
|
|
|
|
ls.execMu.Lock()
|
|
defer ls.execMu.Unlock()
|
|
|
|
current := ls.execRes[workerID]
|
|
if current <= 1 {
|
|
delete(ls.execRes, workerID)
|
|
return
|
|
}
|
|
ls.execRes[workerID] = current - 1
|
|
}
|
|
|
|
func (r *Plugin) releaseExecutorCapacity(workerID string) {
|
|
workerID = strings.TrimSpace(workerID)
|
|
if workerID == "" {
|
|
return
|
|
}
|
|
|
|
r.schedulerExecMu.Lock()
|
|
defer r.schedulerExecMu.Unlock()
|
|
|
|
current := r.schedulerExecReservations[workerID]
|
|
if current <= 1 {
|
|
delete(r.schedulerExecReservations, workerID)
|
|
return
|
|
}
|
|
r.schedulerExecReservations[workerID] = current - 1
|
|
}
|
|
|
|
func schedulerWorkerExecutionLimit(executor *WorkerSession, jobType string, policy schedulerPolicy) int {
|
|
limit := policy.PerWorkerConcurrency
|
|
if limit <= 0 {
|
|
limit = defaultScheduledPerWorkerConcurrency
|
|
}
|
|
|
|
if capability := executor.Capabilities[jobType]; capability != nil && capability.MaxExecutionConcurrency > 0 {
|
|
capLimit := int(capability.MaxExecutionConcurrency)
|
|
if capLimit < limit {
|
|
limit = capLimit
|
|
}
|
|
}
|
|
|
|
if executor.Heartbeat != nil && executor.Heartbeat.ExecutionSlotsTotal > 0 {
|
|
heartbeatLimit := int(executor.Heartbeat.ExecutionSlotsTotal)
|
|
if heartbeatLimit < limit {
|
|
limit = heartbeatLimit
|
|
}
|
|
}
|
|
|
|
if limit < 0 {
|
|
return 0
|
|
}
|
|
return limit
|
|
}
|
|
|
|
func (r *Plugin) executeScheduledJobWithExecutor(
|
|
ctx context.Context,
|
|
executor *WorkerSession,
|
|
job *plugin_pb.JobSpec,
|
|
clusterContext *plugin_pb.ClusterContext,
|
|
policy schedulerPolicy,
|
|
) error {
|
|
maxAttempts := policy.RetryLimit + 1
|
|
if maxAttempts < 1 {
|
|
maxAttempts = 1
|
|
}
|
|
|
|
var lastErr error
|
|
for attempt := 1; attempt <= maxAttempts; attempt++ {
|
|
select {
|
|
case <-r.shutdownCh:
|
|
return fmt.Errorf("plugin is shutting down")
|
|
default:
|
|
}
|
|
if ctx != nil && ctx.Err() != nil {
|
|
return ctx.Err()
|
|
}
|
|
|
|
parent := ctx
|
|
if parent == nil {
|
|
parent = context.Background()
|
|
}
|
|
// Use the job's estimated runtime if provided and larger than the
|
|
// default execution timeout. This lets handlers like vacuum scale
|
|
// the timeout based on volume size so large volumes are not killed.
|
|
timeout := policy.ExecutionTimeout
|
|
if job.Parameters != nil {
|
|
if est, ok := job.Parameters["estimated_runtime_seconds"]; ok {
|
|
if v := est.GetInt64Value(); v > 0 {
|
|
estimated := time.Duration(v) * time.Second
|
|
if estimated > maxEstimatedRuntimeCap {
|
|
estimated = maxEstimatedRuntimeCap
|
|
}
|
|
if estimated > timeout {
|
|
timeout = estimated
|
|
}
|
|
}
|
|
}
|
|
}
|
|
execCtx, cancel := context.WithTimeout(parent, timeout)
|
|
_, err := r.executeJobWithExecutor(execCtx, executor, job, clusterContext, int32(attempt))
|
|
cancel()
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
if isExecutorAtCapacityError(err) {
|
|
return errExecutorAtCapacity
|
|
}
|
|
lastErr = err
|
|
|
|
if attempt < maxAttempts {
|
|
r.appendActivity(JobActivity{
|
|
JobID: job.JobId,
|
|
JobType: job.JobType,
|
|
Source: "admin_scheduler",
|
|
Message: fmt.Sprintf("retrying job attempt %d/%d after error: %v", attempt, maxAttempts, err),
|
|
Stage: "retry",
|
|
OccurredAt: timeToPtr(time.Now().UTC()),
|
|
})
|
|
if !waitForShutdownOrTimerWithContext(r.shutdownCh, ctx, policy.RetryBackoff) {
|
|
if ctx != nil && ctx.Err() != nil {
|
|
return ctx.Err()
|
|
}
|
|
return fmt.Errorf("plugin is shutting down")
|
|
}
|
|
}
|
|
}
|
|
|
|
if lastErr == nil {
|
|
lastErr = fmt.Errorf("execution failed without an explicit error")
|
|
}
|
|
return lastErr
|
|
}
|
|
|
|
func (r *Plugin) shouldSkipDetectionForWaitingJobs(jobType string, policy schedulerPolicy) (bool, int, int) {
|
|
waitingCount := r.countWaitingTrackedJobs(jobType)
|
|
threshold := waitingBacklogThreshold(policy)
|
|
if threshold <= 0 {
|
|
return false, waitingCount, threshold
|
|
}
|
|
return waitingCount >= threshold, waitingCount, threshold
|
|
}
|
|
|
|
func (r *Plugin) countWaitingTrackedJobs(jobType string) int {
|
|
normalizedJobType := strings.TrimSpace(jobType)
|
|
if normalizedJobType == "" {
|
|
return 0
|
|
}
|
|
|
|
waiting := 0
|
|
r.jobsMu.RLock()
|
|
for _, job := range r.jobs {
|
|
if job == nil {
|
|
continue
|
|
}
|
|
if strings.TrimSpace(job.JobType) != normalizedJobType {
|
|
continue
|
|
}
|
|
if !isWaitingTrackedJobState(job.State) {
|
|
continue
|
|
}
|
|
waiting++
|
|
}
|
|
r.jobsMu.RUnlock()
|
|
|
|
return waiting
|
|
}
|
|
|
|
func (r *Plugin) clearWaitingJobQueue(jobType string) int {
|
|
normalizedJobType := strings.TrimSpace(jobType)
|
|
if normalizedJobType == "" {
|
|
return 0
|
|
}
|
|
|
|
jobIDs := make([]string, 0)
|
|
seen := make(map[string]struct{})
|
|
|
|
r.jobsMu.RLock()
|
|
for _, job := range r.jobs {
|
|
if job == nil {
|
|
continue
|
|
}
|
|
if strings.TrimSpace(job.JobType) != normalizedJobType {
|
|
continue
|
|
}
|
|
if !isWaitingTrackedJobState(job.State) {
|
|
continue
|
|
}
|
|
jobID := strings.TrimSpace(job.JobID)
|
|
if jobID == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[jobID]; ok {
|
|
continue
|
|
}
|
|
seen[jobID] = struct{}{}
|
|
jobIDs = append(jobIDs, jobID)
|
|
}
|
|
r.jobsMu.RUnlock()
|
|
|
|
if len(jobIDs) == 0 {
|
|
return 0
|
|
}
|
|
|
|
reason := fmt.Sprintf("cleared queued job before %s run", normalizedJobType)
|
|
for _, jobID := range jobIDs {
|
|
r.markJobCanceled(&plugin_pb.JobSpec{
|
|
JobId: jobID,
|
|
JobType: normalizedJobType,
|
|
}, reason)
|
|
}
|
|
|
|
return len(jobIDs)
|
|
}
|
|
|
|
func waitingBacklogThreshold(policy schedulerPolicy) int {
|
|
concurrency := policy.ExecutionConcurrency
|
|
if concurrency <= 0 {
|
|
concurrency = defaultScheduledExecutionConcurrency
|
|
}
|
|
threshold := concurrency * defaultWaitingBacklogMultiplier
|
|
if threshold < defaultWaitingBacklogFloor {
|
|
threshold = defaultWaitingBacklogFloor
|
|
}
|
|
if policy.MaxResults > 0 && threshold > int(policy.MaxResults) {
|
|
threshold = int(policy.MaxResults)
|
|
}
|
|
return threshold
|
|
}
|
|
|
|
func isExecutorAtCapacityError(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
if errors.Is(err, errExecutorAtCapacity) {
|
|
return true
|
|
}
|
|
return strings.Contains(strings.ToLower(err.Error()), "executor is at capacity")
|
|
}
|
|
|
|
func buildScheduledJobSpec(jobType string, proposal *plugin_pb.JobProposal, index int) *plugin_pb.JobSpec {
|
|
now := timestamppb.Now()
|
|
|
|
jobID := fmt.Sprintf("%s-scheduled-%d-%d", jobType, now.AsTime().UnixNano(), index)
|
|
|
|
job := &plugin_pb.JobSpec{
|
|
JobId: jobID,
|
|
JobType: jobType,
|
|
Priority: plugin_pb.JobPriority_JOB_PRIORITY_NORMAL,
|
|
Parameters: map[string]*plugin_pb.ConfigValue{},
|
|
Labels: map[string]string{},
|
|
CreatedAt: now,
|
|
ScheduledAt: now,
|
|
}
|
|
|
|
if proposal == nil {
|
|
return job
|
|
}
|
|
|
|
if proposal.JobType != "" {
|
|
job.JobType = proposal.JobType
|
|
}
|
|
job.Summary = proposal.Summary
|
|
job.Detail = proposal.Detail
|
|
if proposal.Priority != plugin_pb.JobPriority_JOB_PRIORITY_UNSPECIFIED {
|
|
job.Priority = proposal.Priority
|
|
}
|
|
job.DedupeKey = proposal.DedupeKey
|
|
job.Parameters = CloneConfigValueMap(proposal.Parameters)
|
|
if proposal.Labels != nil {
|
|
job.Labels = make(map[string]string, len(proposal.Labels))
|
|
for k, v := range proposal.Labels {
|
|
job.Labels[k] = v
|
|
}
|
|
}
|
|
if proposal.NotBefore != nil {
|
|
job.ScheduledAt = proposal.NotBefore
|
|
}
|
|
|
|
return job
|
|
}
|
|
|
|
func durationFromSeconds(seconds int32, defaultValue time.Duration) time.Duration {
|
|
if seconds <= 0 {
|
|
return defaultValue
|
|
}
|
|
return time.Duration(seconds) * time.Second
|
|
}
|
|
|
|
func secondsFromDuration(duration time.Duration) int32 {
|
|
if duration <= 0 {
|
|
return 0
|
|
}
|
|
return int32(duration / time.Second)
|
|
}
|
|
|
|
func waitForShutdownOrTimerWithContext(shutdown <-chan struct{}, ctx context.Context, duration time.Duration) bool {
|
|
if duration <= 0 {
|
|
return true
|
|
}
|
|
if ctx == nil {
|
|
ctx = context.Background()
|
|
}
|
|
|
|
timer := time.NewTimer(duration)
|
|
defer timer.Stop()
|
|
|
|
select {
|
|
case <-shutdown:
|
|
return false
|
|
case <-ctx.Done():
|
|
return false
|
|
case <-timer.C:
|
|
return true
|
|
}
|
|
}
|
|
|
|
// filterProposalsWithActiveJobs removes proposals whose dedupe keys already have active jobs.
|
|
// It first expires stale tracked jobs via expireStaleJobs, which can mutate scheduler state,
|
|
// so callers should treat this method as a stateful operation.
|
|
func (r *Plugin) filterProposalsWithActiveJobs(jobType string, proposals []*plugin_pb.JobProposal) ([]*plugin_pb.JobProposal, int) {
|
|
if len(proposals) == 0 {
|
|
return proposals, 0
|
|
}
|
|
|
|
r.expireStaleJobs(time.Now().UTC())
|
|
|
|
activeKeys := make(map[string]struct{})
|
|
r.jobsMu.RLock()
|
|
for _, job := range r.jobs {
|
|
if job == nil {
|
|
continue
|
|
}
|
|
if strings.TrimSpace(job.JobType) != strings.TrimSpace(jobType) {
|
|
continue
|
|
}
|
|
if !isActiveTrackedJobState(job.State) {
|
|
continue
|
|
}
|
|
|
|
key := strings.TrimSpace(job.DedupeKey)
|
|
if key == "" {
|
|
key = strings.TrimSpace(job.JobID)
|
|
}
|
|
if key == "" {
|
|
continue
|
|
}
|
|
activeKeys[key] = struct{}{}
|
|
}
|
|
r.jobsMu.RUnlock()
|
|
|
|
if len(activeKeys) == 0 {
|
|
return proposals, 0
|
|
}
|
|
|
|
filtered := make([]*plugin_pb.JobProposal, 0, len(proposals))
|
|
skipped := 0
|
|
for _, proposal := range proposals {
|
|
if proposal == nil {
|
|
continue
|
|
}
|
|
key := proposalExecutionKey(proposal)
|
|
if key != "" {
|
|
if _, exists := activeKeys[key]; exists {
|
|
skipped++
|
|
continue
|
|
}
|
|
}
|
|
filtered = append(filtered, proposal)
|
|
}
|
|
|
|
return filtered, skipped
|
|
}
|
|
|
|
func proposalExecutionKey(proposal *plugin_pb.JobProposal) string {
|
|
if proposal == nil {
|
|
return ""
|
|
}
|
|
key := strings.TrimSpace(proposal.DedupeKey)
|
|
if key != "" {
|
|
return key
|
|
}
|
|
return strings.TrimSpace(proposal.ProposalId)
|
|
}
|
|
|
|
func isActiveTrackedJobState(state string) bool {
|
|
normalized := strings.ToLower(strings.TrimSpace(state))
|
|
switch normalized {
|
|
case "pending", "assigned", "running", "in_progress", "job_state_pending", "job_state_assigned", "job_state_running":
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func isWaitingTrackedJobState(state string) bool {
|
|
normalized := strings.ToLower(strings.TrimSpace(state))
|
|
return normalized == "pending" || normalized == "job_state_pending"
|
|
}
|
|
|
|
// DispatchProposals dispatches a batch of proposals using the same capacity-aware
|
|
// dispatch logic as the scheduler loop: concurrent execution, executor reservation
|
|
// with backoff, and per-job retry on transient errors. The scheduler policy is
|
|
// loaded from the persisted job type config; if the job type has no config or is
|
|
// disabled a sensible default policy is used so manual runs always work.
|
|
func (r *Plugin) DispatchProposals(
|
|
ctx context.Context,
|
|
jobType string,
|
|
proposals []*plugin_pb.JobProposal,
|
|
clusterContext *plugin_pb.ClusterContext,
|
|
) (successCount, errorCount, canceledCount int) {
|
|
if len(proposals) == 0 {
|
|
return 0, 0, 0
|
|
}
|
|
|
|
policy, enabled, err := r.loadSchedulerPolicy(jobType)
|
|
if err != nil || !enabled {
|
|
policy = schedulerPolicy{
|
|
ExecutionConcurrency: defaultScheduledExecutionConcurrency,
|
|
PerWorkerConcurrency: defaultScheduledPerWorkerConcurrency,
|
|
ExecutionTimeout: defaultScheduledExecutionTimeout,
|
|
RetryBackoff: defaultScheduledRetryBackoff,
|
|
ExecutorReserveBackoff: 200 * time.Millisecond,
|
|
}
|
|
}
|
|
|
|
return r.dispatchScheduledProposals(ctx, jobType, proposals, clusterContext, policy)
|
|
}
|
|
|
|
func (r *Plugin) filterScheduledProposals(proposals []*plugin_pb.JobProposal) []*plugin_pb.JobProposal {
|
|
filtered := make([]*plugin_pb.JobProposal, 0, len(proposals))
|
|
seenInRun := make(map[string]struct{}, len(proposals))
|
|
|
|
for _, proposal := range proposals {
|
|
if proposal == nil {
|
|
continue
|
|
}
|
|
|
|
key := proposal.DedupeKey
|
|
if key == "" {
|
|
key = proposal.ProposalId
|
|
}
|
|
if key == "" {
|
|
filtered = append(filtered, proposal)
|
|
continue
|
|
}
|
|
|
|
if _, exists := seenInRun[key]; exists {
|
|
continue
|
|
}
|
|
|
|
seenInRun[key] = struct{}{}
|
|
filtered = append(filtered, proposal)
|
|
}
|
|
|
|
return filtered
|
|
}
|