plugin scheduler: run iceberg and lifecycle lanes concurrently (#8821)

* plugin scheduler: run iceberg and lifecycle lanes concurrently The default lane serialises job types under a single admin lock because volume-management operations share global state. Iceberg and lifecycle lanes have no such constraint, so run each of their job types independently in separate goroutines. * Fix concurrent lane scheduler status * plugin scheduler: address review feedback - Extract collectDueJobTypes helper to deduplicate policy loading between locked and concurrent iteration paths. - Use atomic.Bool instead of sync.Mutex for hadJobs in the concurrent path. - Set lane loop state to "busy" before launching concurrent goroutines so the lane is not reported as idle while work runs. - Convert TestLaneRequiresLock to table-driven style. - Add TestRunLaneSchedulerIterationLockBehavior to verify the scheduler acquires the admin lock only for lanes that require it. - Fix flaky TestGetLaneSchedulerStatusShowsActiveConcurrentLaneWork by not starting background scheduler goroutines that race with the direct runJobTypeIteration call.
2026-03-29 00:06:20 -07:00
parent e8a6fcaafb
commit a95b8396e4
6 changed files with 292 additions and 17 deletions
--- a/weed/admin/plugin/plugin_scheduler.go
+++ b/weed/admin/plugin/plugin_scheduler.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"

 	"github.com/seaweedfs/seaweedfs/weed/glog"
@@ -106,6 +107,14 @@ func (r *Plugin) schedulerLoop() {

 // runLaneSchedulerIteration runs one scheduling pass for a single lane,
 // processing only the job types assigned to that lane.
+//
+// For lanes that require a lock (e.g. LaneDefault), all job types are
+// processed sequentially under one admin lock because their volume
+// management operations share global state.
+//
+// For lanes that do not require a lock (e.g. LaneIceberg, LaneLifecycle),
+// each job type runs independently in its own goroutine so they do not
+// block each other.
 func (r *Plugin) runLaneSchedulerIteration(ls *schedulerLaneState) bool {
 	r.expireStaleJobs(time.Now().UTC())

@@ -122,21 +131,23 @@ func (r *Plugin) runLaneSchedulerIteration(ls *schedulerLaneState) bool {
 		return false
 	}

-	r.setLaneLoopState(ls, "", "waiting_for_lock")
-	lockName := fmt.Sprintf("plugin scheduler:%s", ls.lane)
-	releaseLock, err := r.acquireAdminLock(lockName)
-	if err != nil {
-		glog.Warningf("Plugin scheduler [%s] failed to acquire lock: %v", ls.lane, err)
-		r.setLaneLoopState(ls, "", "idle")
-		return false
-	}
-	if releaseLock != nil {
-		defer releaseLock()
+	if LaneRequiresLock(ls.lane) {
+		return r.runLaneSchedulerIterationLocked(ls, jobTypes)
 	}
+	return r.runLaneSchedulerIterationConcurrent(ls, jobTypes)
+}

-	active := make(map[string]struct{}, len(jobTypes))
-	hadJobs := false
+// dueJobType pairs a job type with its resolved scheduling policy.
+type dueJobType struct {
+	jobType string
+	policy  schedulerPolicy
+}

+// collectDueJobTypes loads policies for all job types in the lane and
+// returns those whose detection interval has elapsed. It also returns
+// the full set of active job type names for later pruning.
+func (r *Plugin) collectDueJobTypes(ls *schedulerLaneState, jobTypes []string) (active map[string]struct{}, due []dueJobType) {
+	active = make(map[string]struct{}, len(jobTypes))
 	for _, jobType := range jobTypes {
 		active[jobType] = struct{}{}

@@ -156,9 +167,31 @@ func (r *Plugin) runLaneSchedulerIteration(ls *schedulerLaneState) bool {
 		if !r.markDetectionDue(jobType, policy.DetectionInterval, initialDelay) {
 			continue
 		}
+		due = append(due, dueJobType{jobType: jobType, policy: policy})
+	}
+	return active, due
+}

-		detected := r.runJobTypeIteration(jobType, policy)
-		if detected {
+// runLaneSchedulerIterationLocked processes job types sequentially under a
+// single admin lock. Used by the default lane where volume management
+// operations must be serialised.
+func (r *Plugin) runLaneSchedulerIterationLocked(ls *schedulerLaneState, jobTypes []string) bool {
+	r.setLaneLoopState(ls, "", "waiting_for_lock")
+	lockName := fmt.Sprintf("plugin scheduler:%s", ls.lane)
+	releaseLock, err := r.acquireAdminLock(lockName)
+	if err != nil {
+		glog.Warningf("Plugin scheduler [%s] failed to acquire lock: %v", ls.lane, err)
+		r.setLaneLoopState(ls, "", "idle")
+		return false
+	}
+	if releaseLock != nil {
+		defer releaseLock()
+	}
+
+	active, due := r.collectDueJobTypes(ls, jobTypes)
+	hadJobs := false
+	for _, w := range due {
+		if r.runJobTypeIteration(w.jobType, w.policy) {
 			hadJobs = true
 		}
 	}
@@ -169,6 +202,33 @@ func (r *Plugin) runLaneSchedulerIteration(ls *schedulerLaneState) bool {
 	return hadJobs
 }

+// runLaneSchedulerIterationConcurrent processes each job type in its own
+// goroutine so they run independently. Used by lanes (e.g. iceberg,
+// lifecycle) whose job types do not share global state.
+func (r *Plugin) runLaneSchedulerIterationConcurrent(ls *schedulerLaneState, jobTypes []string) bool {
+	active, due := r.collectDueJobTypes(ls, jobTypes)
+
+	r.setLaneLoopState(ls, "", "busy")
+
+	var hadJobs atomic.Bool
+	var wg sync.WaitGroup
+	for _, w := range due {
+		wg.Add(1)
+		go func(jobType string, policy schedulerPolicy) {
+			defer wg.Done()
+			if r.runJobTypeIteration(jobType, policy) {
+				hadJobs.Store(true)
+			}
+		}(w.jobType, w.policy)
+	}
+	wg.Wait()
+
+	r.pruneSchedulerState(active)
+	r.pruneDetectorLeases(active)
+	r.setLaneLoopState(ls, "", "idle")
+	return hadJobs.Load()
+}
+
 // runSchedulerIteration is kept for backward compatibility. It runs a
 // single iteration across ALL job types (equivalent to the old single-loop
 // behavior). It is only used by the legacy schedulerLoop() fallback.
@@ -267,7 +327,7 @@ func (r *Plugin) wakeScheduler() {
 func (r *Plugin) runJobTypeIteration(jobType string, policy schedulerPolicy) bool {
 	r.recordSchedulerRunStart(jobType)
 	r.clearWaitingJobQueue(jobType)
-	r.setSchedulerLoopState(jobType, "detecting")
+	r.setSchedulerLoopStateForJobType(jobType, "detecting")
 	r.markJobTypeInFlight(jobType)
 	defer r.finishDetection(jobType)

@@ -399,7 +459,7 @@ func (r *Plugin) runJobTypeIteration(jobType string, policy schedulerPolicy) boo
 		return detected
 	}

-	r.setSchedulerLoopState(jobType, "executing")
+	r.setSchedulerLoopStateForJobType(jobType, "executing")

 	// Scan proposals for the maximum estimated_runtime_seconds so the
 	// execution phase gets enough time for large jobs (e.g. vacuum on