add admin script worker (#8491)
* admin: add plugin lock coordination * shell: allow bypassing lock checks * plugin worker: add admin script handler * mini: include admin_script in plugin defaults * admin script UI: drop name and enlarge text * admin script: add default script * admin_script: make run interval configurable * plugin: gate other jobs during admin_script runs * plugin: use last completed admin_script run * admin: backfill plugin config defaults * templ Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * comparable to default version Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * default to run Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * format Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * shell: respect pre-set noLock for fix.replication * shell: add force no-lock mode for admin scripts * volume balance worker already exists Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * admin: expose scheduler status JSON * shell: add sleep command * shell: restrict sleep syntax * Revert "shell: respect pre-set noLock for fix.replication" This reverts commit 2b14e8b82602a740d3a473c085e3b3a14f1ddbb3. * templ Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * fix import Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * less logs Co-Authored-By: Copilot <223556219+Copilot@users.noreply.github.com> * Reduce master client logs on canceled contexts * Update mini default job type count --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -98,6 +98,7 @@ type AdminServer struct {
|
||||
// Maintenance system
|
||||
maintenanceManager *maintenance.MaintenanceManager
|
||||
plugin *adminplugin.Plugin
|
||||
pluginLock *AdminLockManager
|
||||
expireJobHandler func(jobID string, reason string) (*adminplugin.TrackedJob, bool, error)
|
||||
|
||||
// Topic retention purger
|
||||
@@ -135,6 +136,8 @@ func NewAdminServer(masters string, templateFS http.FileSystem, dataDir string,
|
||||
ctx := context.Background()
|
||||
go masterClient.KeepConnectedToMaster(ctx)
|
||||
|
||||
lockManager := NewAdminLockManager(masterClient, adminLockClientName)
|
||||
|
||||
server := &AdminServer{
|
||||
masterClient: masterClient,
|
||||
templateFS: templateFS,
|
||||
@@ -146,6 +149,7 @@ func NewAdminServer(masters string, templateFS http.FileSystem, dataDir string,
|
||||
collectionStatsCacheThreshold: defaultStatsCacheTimeout,
|
||||
s3TablesManager: newS3TablesManager(),
|
||||
icebergPort: icebergPort,
|
||||
pluginLock: lockManager,
|
||||
}
|
||||
|
||||
// Initialize topic retention purger
|
||||
@@ -229,6 +233,7 @@ func NewAdminServer(masters string, templateFS http.FileSystem, dataDir string,
|
||||
ClusterContextProvider: func(_ context.Context) (*plugin_pb.ClusterContext, error) {
|
||||
return server.buildDefaultPluginClusterContext(), nil
|
||||
},
|
||||
LockManager: lockManager,
|
||||
})
|
||||
if err != nil && dataDir != "" {
|
||||
glog.Warningf("Failed to initialize plugin with dataDir=%q: %v. Falling back to in-memory plugin state.", dataDir, err)
|
||||
@@ -237,6 +242,7 @@ func NewAdminServer(masters string, templateFS http.FileSystem, dataDir string,
|
||||
ClusterContextProvider: func(_ context.Context) (*plugin_pb.ClusterContext, error) {
|
||||
return server.buildDefaultPluginClusterContext(), nil
|
||||
},
|
||||
LockManager: lockManager,
|
||||
})
|
||||
}
|
||||
if err != nil {
|
||||
@@ -890,6 +896,13 @@ func (s *AdminServer) GetPlugin() *adminplugin.Plugin {
|
||||
return s.plugin
|
||||
}
|
||||
|
||||
func (s *AdminServer) acquirePluginLock(reason string) (func(), error) {
|
||||
if s == nil || s.pluginLock == nil {
|
||||
return func() {}, nil
|
||||
}
|
||||
return s.pluginLock.Acquire(reason)
|
||||
}
|
||||
|
||||
// RequestPluginJobTypeDescriptor asks one worker for job type schema and returns the descriptor.
|
||||
func (s *AdminServer) RequestPluginJobTypeDescriptor(ctx context.Context, jobType string, forceRefresh bool) (*plugin_pb.JobTypeDescriptor, error) {
|
||||
if s.plugin == nil {
|
||||
@@ -932,6 +945,13 @@ func (s *AdminServer) RunPluginDetection(
|
||||
if s.plugin == nil {
|
||||
return nil, fmt.Errorf("plugin is not enabled")
|
||||
}
|
||||
releaseLock, err := s.acquirePluginLock(fmt.Sprintf("plugin detection %s", jobType))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if releaseLock != nil {
|
||||
defer releaseLock()
|
||||
}
|
||||
return s.plugin.RunDetection(ctx, jobType, clusterContext, maxResults)
|
||||
}
|
||||
|
||||
@@ -957,6 +977,13 @@ func (s *AdminServer) RunPluginDetectionWithReport(
|
||||
if s.plugin == nil {
|
||||
return nil, fmt.Errorf("plugin is not enabled")
|
||||
}
|
||||
releaseLock, err := s.acquirePluginLock(fmt.Sprintf("plugin detection %s", jobType))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if releaseLock != nil {
|
||||
defer releaseLock()
|
||||
}
|
||||
return s.plugin.RunDetectionWithReport(ctx, jobType, clusterContext, maxResults)
|
||||
}
|
||||
|
||||
@@ -970,6 +997,17 @@ func (s *AdminServer) ExecutePluginJob(
|
||||
if s.plugin == nil {
|
||||
return nil, fmt.Errorf("plugin is not enabled")
|
||||
}
|
||||
jobType := ""
|
||||
if job != nil {
|
||||
jobType = strings.TrimSpace(job.JobType)
|
||||
}
|
||||
releaseLock, err := s.acquirePluginLock(fmt.Sprintf("plugin execution %s", jobType))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if releaseLock != nil {
|
||||
defer releaseLock()
|
||||
}
|
||||
return s.plugin.ExecuteJob(ctx, job, clusterContext, attempt)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user