plugin worker: support job type categories (all, default, heavy) (#8547)

* plugin worker: add handler registry with job categories

Introduce a self-registration pattern for plugin worker job handlers.
Each handler can register itself via init() with a HandlerFactory that
declares its job type, category (default/heavy), CLI aliases, and a
builder function.

ResolveHandlerFactories accepts a mix of category names ("all",
"default", "heavy") and explicit job type names/aliases, returning the
matching factories. This enables workers to be configured by resource
profile rather than requiring explicit job type enumeration.

* plugin worker: register all handlers via init()

Each job handler now self-registers into the global handler registry
with its canonical job type, category, CLI aliases, and build function:

  - vacuum:              category=default
  - volume_balance:      category=default
  - admin_script:        category=default
  - erasure_coding:      category=heavy
  - iceberg_maintenance: category=heavy

Adding a new job type now only requires adding the init() call in the
handler file itself — no other files need to be touched.

* plugin worker: replace hardcoded job type switch with registry

Remove buildPluginWorkerHandler, parsePluginWorkerJobTypes, and
canonicalPluginWorkerJobType from worker_runtime.go. The simplified
buildPluginWorkerHandlers now delegates to
pluginworker.ResolveHandlerFactories, which resolves category names
("all", "default", "heavy") and explicit job type names/aliases.

The default job type is changed from an explicit list to "all", so new
handlers registered via init() are automatically picked up.

Update all tests to use the new API.

* plugin worker: update CLI help text for job categories

Update the -jobType flag description and command examples to document
category support (all, default, heavy) alongside explicit job type names.

* plugin worker: address review feedback

- Add CategoryAll constant; use typed constants in tokenAsCategory
- Pre-allocate result slice in ResolveHandlerFactories
- Add vacuum aliases (vol.vacuum, volume.vacuum)
- List alias examples (ec, balance, iceberg) in -jobType flag help
- Create handlers aggregator package for subpackage blank imports so
  new handler subpackages only need to be added in one place
- Make category tests relationship-based (subset/union checks) instead
  of asserting exact handler counts
- Add clarifying comments to worker_test.go and mini_plugin_test.go
  listing expected handler names next to count assertions

---------

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
Chris Lu
2026-03-07 18:30:58 -08:00
committed by GitHub
parent f249fb7e63
commit 587c24ec89
12 changed files with 350 additions and 176 deletions

View File

@@ -33,6 +33,17 @@ s3.clean.uploads -timeAgo=24h`
var adminScriptTokenRegex = regexp.MustCompile(`'.*?'|".*?"|\S+`)
func init() {
RegisterHandler(HandlerFactory{
JobType: "admin_script",
Category: CategoryDefault,
Aliases: []string{"admin-script", "admin.script", "script", "admin"},
Build: func(opts HandlerBuildOptions) (JobHandler, error) {
return NewAdminScriptHandler(opts.GrpcDialOption), nil
},
})
}
type AdminScriptHandler struct {
grpcDialOption grpc.DialOption
}

View File

@@ -20,6 +20,17 @@ import (
"google.golang.org/protobuf/proto"
)
func init() {
RegisterHandler(HandlerFactory{
JobType: "erasure_coding",
Category: CategoryHeavy,
Aliases: []string{"erasure-coding", "erasure.coding", "ec"},
Build: func(opts HandlerBuildOptions) (JobHandler, error) {
return NewErasureCodingHandler(opts.GrpcDialOption, opts.WorkingDir), nil
},
})
}
type erasureCodingWorkerConfig struct {
TaskConfig *erasurecodingtask.Config
MinIntervalSeconds int

View File

@@ -0,0 +1,130 @@
package pluginworker
import (
"fmt"
"strings"
"sync"
"google.golang.org/grpc"
)
// JobCategory groups job types by resource profile so that workers can be
// configured with a category name instead of an explicit list of job types.
type JobCategory string
const (
CategoryAll JobCategory = "all" // pseudo-category matching every handler
CategoryDefault JobCategory = "default" // lightweight, safe for any worker
CategoryHeavy JobCategory = "heavy" // resource-intensive jobs
)
// HandlerFactory describes how to build a JobHandler for a single job type.
type HandlerFactory struct {
// JobType is the canonical job type string (e.g. "vacuum").
JobType string
// Category controls which category label selects this handler.
Category JobCategory
// Aliases are alternative CLI names that resolve to this job type
// (e.g. "ec" for "erasure_coding").
Aliases []string
// Build constructs the JobHandler.
Build func(opts HandlerBuildOptions) (JobHandler, error)
}
// HandlerBuildOptions carries parameters forwarded from the CLI to handler
// constructors.
type HandlerBuildOptions struct {
GrpcDialOption grpc.DialOption
MaxExecute int
WorkingDir string
}
var (
registryMu sync.Mutex
registry []HandlerFactory
)
// RegisterHandler adds a handler factory to the global registry.
// It is intended to be called from handler init() functions.
func RegisterHandler(f HandlerFactory) {
registryMu.Lock()
defer registryMu.Unlock()
registry = append(registry, f)
}
// ResolveHandlerFactories takes a comma-separated token list that can contain
// category names ("all", "default", "heavy") and/or explicit job type names
// (including aliases). It returns a deduplicated, ordered slice of factories.
func ResolveHandlerFactories(tokens string) ([]HandlerFactory, error) {
registryMu.Lock()
snapshot := make([]HandlerFactory, len(registry))
copy(snapshot, registry)
registryMu.Unlock()
parts := strings.Split(tokens, ",")
result := make([]HandlerFactory, 0, len(snapshot))
seen := make(map[string]bool)
for _, raw := range parts {
tok := strings.ToLower(strings.TrimSpace(raw))
if tok == "" {
continue
}
if cat, ok := tokenAsCategory(tok); ok {
for _, f := range snapshot {
if cat == CategoryAll || f.Category == cat {
if !seen[f.JobType] {
seen[f.JobType] = true
result = append(result, f)
}
}
}
continue
}
f, err := findFactory(snapshot, tok)
if err != nil {
return nil, err
}
if !seen[f.JobType] {
seen[f.JobType] = true
result = append(result, f)
}
}
if len(result) == 0 {
return nil, fmt.Errorf("no job types resolved from %q", tokens)
}
return result, nil
}
// tokenAsCategory returns the category and true when tok is a known category
// keyword. "all" is treated as a special pseudo-category that matches every
// registered handler.
func tokenAsCategory(tok string) (JobCategory, bool) {
switch tok {
case string(CategoryAll):
return CategoryAll, true
case string(CategoryDefault):
return CategoryDefault, true
case string(CategoryHeavy):
return CategoryHeavy, true
default:
return "", false
}
}
func findFactory(factories []HandlerFactory, tok string) (HandlerFactory, error) {
for _, f := range factories {
if strings.EqualFold(f.JobType, tok) {
return f, nil
}
for _, alias := range f.Aliases {
if strings.EqualFold(alias, tok) {
return f, nil
}
}
}
return HandlerFactory{}, fmt.Errorf("unknown job type %q", tok)
}

View File

@@ -0,0 +1,9 @@
// Package handlers is an aggregator that blank-imports every plugin worker
// handler subpackage so their init() functions register with the handler
// registry. Import this package instead of individual subpackages when you
// need all handlers available.
package handlers
import (
_ "github.com/seaweedfs/seaweedfs/weed/plugin/worker/iceberg" // register iceberg_maintenance handler
)

View File

@@ -14,6 +14,17 @@ import (
"google.golang.org/protobuf/types/known/timestamppb"
)
func init() {
pluginworker.RegisterHandler(pluginworker.HandlerFactory{
JobType: jobType,
Category: pluginworker.CategoryHeavy,
Aliases: []string{"iceberg-maintenance", "iceberg.maintenance", "iceberg"},
Build: func(opts pluginworker.HandlerBuildOptions) (pluginworker.JobHandler, error) {
return NewHandler(opts.GrpcDialOption), nil
},
})
}
// Handler implements the JobHandler interface for Iceberg table maintenance:
// snapshot expiration, orphan file removal, and manifest rewriting.
type Handler struct {

View File

@@ -23,6 +23,17 @@ const (
DefaultMaxExecutionConcurrency = int32(2)
)
func init() {
RegisterHandler(HandlerFactory{
JobType: "vacuum",
Category: CategoryDefault,
Aliases: []string{"vol.vacuum", "volume.vacuum"},
Build: func(opts HandlerBuildOptions) (JobHandler, error) {
return NewVacuumHandler(opts.GrpcDialOption, int32(opts.MaxExecute)), nil
},
})
}
// VacuumHandler is the plugin job handler for vacuum job type.
type VacuumHandler struct {
grpcDialOption grpc.DialOption

View File

@@ -21,6 +21,17 @@ const (
defaultBalanceTimeoutSeconds = int32(10 * 60)
)
func init() {
RegisterHandler(HandlerFactory{
JobType: "volume_balance",
Category: CategoryDefault,
Aliases: []string{"balance", "volume.balance", "volume-balance"},
Build: func(opts HandlerBuildOptions) (JobHandler, error) {
return NewVolumeBalanceHandler(opts.GrpcDialOption), nil
},
})
}
type volumeBalanceWorkerConfig struct {
TaskConfig *balancetask.Config
MinIntervalSeconds int