Fix Maintenance Task Sorting and Refactor Log Persistence (#8199)
* fix float stepping * do not auto refresh * only logs when non 200 status * fix maintenance task sorting and cleanup redundant handler logic * Refactor log retrieval to persist to disk and fix slowness - Move log retrieval to disk-based persistence in GetMaintenanceTaskDetail - Implement background log fetching on task completion in worker_grpc_server.go - Implement async background refresh for in-progress tasks - Completely remove blocking gRPC calls from the UI path to fix 10s timeouts - Cleanup debug logs and performance profiling code * Ensure consistent deterministic sorting in config_persistence cleanup * Replace magic numbers with constants and remove debug logs - Added descriptive constants for truncation limits and timeouts in admin_server.go and worker_grpc_server.go - Replaced magic numbers with these constants throughout the codebase - Verified removal of stdout debug printing - Ensured consistent truncation logic during log persistence * Address code review feedback on history truncation and logging logic - Fix AssignmentHistory double-serialization by copying task in GetMaintenanceTaskDetail - Fix handleTaskCompletion logging logic (mutually exclusive success/failure logs) - Remove unused Timeout field from LogRequestContext and sync select timeouts with constants - Ensure AssignmentHistory is only provided in the top-level field for better JSON structure * Implement goroutine leak protection and request deduplication - Add request deduplication in RequestTaskLogs to prevent multiple concurrent fetches for the same task - Implement safe cleanup in timeout handlers to avoid race conditions in pendingLogRequests map - Add a 10s cooldown for background log refreshes in GetMaintenanceTaskDetail to prevent spamming - Ensure all persistent log-fetching goroutines are bounded and efficiently managed * Fix potential nil pointer panics in maintenance handlers - Add nil checks for adminServer in ShowTaskDetail, ShowMaintenanceWorkers, and UpdateTaskConfig - Update getMaintenanceQueueData to return a descriptive error instead of nil when adminServer is uninitialized - Ensure internal helper methods consistently check for adminServer initialization before use * Strictly enforce disk-only log reading - Remove background log fetching from GetMaintenanceTaskDetail to prevent timeouts and network calls during page view - Remove unused lastLogFetch tracking fields to clean up dead code - Ensure logs are only updated upon task completion via handleTaskCompletion * Refactor GetWorkerLogs to read from disk - Update /api/maintenance/workers/:id/logs endpoint to use configPersistence.LoadTaskExecutionLogs - Remove synchronous gRPC call RequestTaskLogs to prevent timeouts and bad gateway errors - Ensure consistent log retrieval behavior across the application (disk-only) * Fix timestamp parsing in log viewer - Update task_detail.templ JS to handle both ISO 8601 strings and Unix timestamps - Fix "Invalid time value" error when displaying logs fetched from disk - Regenerate templates * master: fallback to HDD if SSD volumes are full in Assign * worker: improve EC detection logging and fix skip counters * worker: add Sync method to TaskLogger interface * worker: implement Sync and ensure logs are flushed before task completion * admin: improve task log retrieval with retries and better timeouts * admin: robust timestamp parsing in task detail view
This commit is contained in:
@@ -5,7 +5,6 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -33,6 +32,17 @@ import (
|
||||
_ "github.com/seaweedfs/seaweedfs/weed/credential/grpc" // Register gRPC credential store
|
||||
)
|
||||
|
||||
const (
|
||||
maxAssignmentHistoryDisplay = 50
|
||||
maxLogMessageLength = 2000
|
||||
maxLogFields = 20
|
||||
maxRelatedTasksDisplay = 50
|
||||
maxRecentTasksDisplay = 10
|
||||
defaultCacheTimeout = 10 * time.Second
|
||||
defaultFilerCacheTimeout = 30 * time.Second
|
||||
defaultStatsCacheTimeout = 30 * time.Second
|
||||
)
|
||||
|
||||
// FilerConfig holds filer configuration needed for bucket operations
|
||||
type FilerConfig struct {
|
||||
BucketsPath string
|
||||
@@ -132,10 +142,10 @@ func NewAdminServer(masters string, templateFS http.FileSystem, dataDir string,
|
||||
templateFS: templateFS,
|
||||
dataDir: dataDir,
|
||||
grpcDialOption: grpcDialOption,
|
||||
cacheExpiration: 10 * time.Second,
|
||||
filerCacheExpiration: 30 * time.Second, // Cache filers for 30 seconds
|
||||
cacheExpiration: defaultCacheTimeout,
|
||||
filerCacheExpiration: defaultFilerCacheTimeout,
|
||||
configPersistence: NewConfigPersistence(dataDir),
|
||||
collectionStatsCacheThreshold: 30 * time.Second,
|
||||
collectionStatsCacheThreshold: defaultStatsCacheTimeout,
|
||||
s3TablesManager: newS3TablesManager(),
|
||||
icebergPort: icebergPort,
|
||||
}
|
||||
@@ -779,7 +789,7 @@ func (s *AdminServer) GetClusterBrokers() (*ClusterBrokersData, error) {
|
||||
|
||||
// ShowMaintenanceQueue displays the maintenance queue page
|
||||
func (as *AdminServer) ShowMaintenanceQueue(c *gin.Context) {
|
||||
data, err := as.getMaintenanceQueueData()
|
||||
data, err := as.GetMaintenanceQueueData()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
@@ -868,7 +878,7 @@ func (as *AdminServer) TriggerMaintenanceScan(c *gin.Context) {
|
||||
|
||||
// GetMaintenanceTasks returns all maintenance tasks
|
||||
func (as *AdminServer) GetMaintenanceTasks(c *gin.Context) {
|
||||
tasks, err := as.getMaintenanceTasks()
|
||||
tasks, err := as.GetAllMaintenanceTasks()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
@@ -1032,9 +1042,9 @@ func (as *AdminServer) UpdateMaintenanceConfigData(config *maintenance.Maintenan
|
||||
|
||||
// Helper methods for maintenance operations
|
||||
|
||||
// getMaintenanceQueueData returns data for the maintenance queue UI
|
||||
func (as *AdminServer) getMaintenanceQueueData() (*maintenance.MaintenanceQueueData, error) {
|
||||
tasks, err := as.getMaintenanceTasks()
|
||||
// GetMaintenanceQueueData returns data for the maintenance queue UI
|
||||
func (as *AdminServer) GetMaintenanceQueueData() (*maintenance.MaintenanceQueueData, error) {
|
||||
tasks, err := as.GetAllMaintenanceTasks()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1089,14 +1099,16 @@ func (as *AdminServer) getMaintenanceQueueStats() (*maintenance.QueueStats, erro
|
||||
return queueStats, nil
|
||||
}
|
||||
|
||||
// getMaintenanceTasks returns all maintenance tasks
|
||||
func (as *AdminServer) getMaintenanceTasks() ([]*maintenance.MaintenanceTask, error) {
|
||||
// GetAllMaintenanceTasks returns all maintenance tasks
|
||||
func (as *AdminServer) GetAllMaintenanceTasks() ([]*maintenance.MaintenanceTask, error) {
|
||||
if as.maintenanceManager == nil {
|
||||
return []*maintenance.MaintenanceTask{}, nil
|
||||
}
|
||||
|
||||
// Collect all tasks from memory across all statuses
|
||||
allTasks := []*maintenance.MaintenanceTask{}
|
||||
// 1. Collect all tasks from memory
|
||||
tasksMap := make(map[string]*maintenance.MaintenanceTask)
|
||||
|
||||
// Collect from memory via GetTasks loop to ensure we catch everything
|
||||
statuses := []maintenance.MaintenanceTaskStatus{
|
||||
maintenance.TaskStatusPending,
|
||||
maintenance.TaskStatusAssigned,
|
||||
@@ -1108,29 +1120,92 @@ func (as *AdminServer) getMaintenanceTasks() ([]*maintenance.MaintenanceTask, er
|
||||
|
||||
for _, status := range statuses {
|
||||
tasks := as.maintenanceManager.GetTasks(status, "", 0)
|
||||
allTasks = append(allTasks, tasks...)
|
||||
for _, t := range tasks {
|
||||
tasksMap[t.ID] = t
|
||||
}
|
||||
}
|
||||
|
||||
// Also load any persisted tasks that might not be in memory
|
||||
// 2. Merge persisted tasks
|
||||
if as.configPersistence != nil {
|
||||
persistedTasks, err := as.configPersistence.LoadAllTaskStates()
|
||||
if err == nil {
|
||||
// Add any persisted tasks not already in memory
|
||||
for _, persistedTask := range persistedTasks {
|
||||
found := false
|
||||
for _, memoryTask := range allTasks {
|
||||
if memoryTask.ID == persistedTask.ID {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
allTasks = append(allTasks, persistedTask)
|
||||
for _, t := range persistedTasks {
|
||||
if _, exists := tasksMap[t.ID]; !exists {
|
||||
tasksMap[t.ID] = t
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Bucketize buckets
|
||||
var pendingTasks, activeTasks, finishedTasks []*maintenance.MaintenanceTask
|
||||
|
||||
for _, t := range tasksMap {
|
||||
switch t.Status {
|
||||
case maintenance.TaskStatusPending:
|
||||
pendingTasks = append(pendingTasks, t)
|
||||
case maintenance.TaskStatusAssigned, maintenance.TaskStatusInProgress:
|
||||
activeTasks = append(activeTasks, t)
|
||||
case maintenance.TaskStatusCompleted, maintenance.TaskStatusFailed, maintenance.TaskStatusCancelled:
|
||||
finishedTasks = append(finishedTasks, t)
|
||||
default:
|
||||
// Treat unknown as finished/archived? Or pending?
|
||||
// Safest to add to finished so they appear somewhere
|
||||
finishedTasks = append(finishedTasks, t)
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Sort buckets
|
||||
// Pending: Newest Created First
|
||||
sort.Slice(pendingTasks, func(i, j int) bool {
|
||||
return pendingTasks[i].CreatedAt.After(pendingTasks[j].CreatedAt)
|
||||
})
|
||||
|
||||
// Active: Newest Created First (or StartedAt?)
|
||||
sort.Slice(activeTasks, func(i, j int) bool {
|
||||
return activeTasks[i].CreatedAt.After(activeTasks[j].CreatedAt)
|
||||
})
|
||||
|
||||
// Finished: Newest Completed First
|
||||
sort.Slice(finishedTasks, func(i, j int) bool {
|
||||
t1 := finishedTasks[i].CompletedAt
|
||||
t2 := finishedTasks[j].CompletedAt
|
||||
|
||||
// Handle nil completion times
|
||||
if t1 == nil && t2 == nil {
|
||||
// Both nil, fallback to CreatedAt
|
||||
if !finishedTasks[i].CreatedAt.Equal(finishedTasks[j].CreatedAt) {
|
||||
return finishedTasks[i].CreatedAt.After(finishedTasks[j].CreatedAt)
|
||||
}
|
||||
return finishedTasks[i].ID > finishedTasks[j].ID
|
||||
}
|
||||
if t1 == nil {
|
||||
return false // t1 (nil) goes to bottom
|
||||
}
|
||||
if t2 == nil {
|
||||
return true // t2 (nil) goes to bottom
|
||||
}
|
||||
|
||||
// Compare completion times
|
||||
if !t1.Equal(*t2) {
|
||||
return t1.After(*t2)
|
||||
}
|
||||
|
||||
// Fallback to CreatedAt if completion times are identical
|
||||
if !finishedTasks[i].CreatedAt.Equal(finishedTasks[j].CreatedAt) {
|
||||
return finishedTasks[i].CreatedAt.After(finishedTasks[j].CreatedAt)
|
||||
}
|
||||
|
||||
// Final tie-breaker: ID
|
||||
return finishedTasks[i].ID > finishedTasks[j].ID
|
||||
})
|
||||
|
||||
// 5. Recombine
|
||||
allTasks := make([]*maintenance.MaintenanceTask, 0, len(tasksMap))
|
||||
allTasks = append(allTasks, pendingTasks...)
|
||||
allTasks = append(allTasks, activeTasks...)
|
||||
allTasks = append(allTasks, finishedTasks...)
|
||||
|
||||
return allTasks, nil
|
||||
}
|
||||
|
||||
@@ -1181,15 +1256,25 @@ func (as *AdminServer) GetMaintenanceTaskDetail(taskID string) (*maintenance.Tas
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Copy task and truncate assignment history for display
|
||||
displayTask := *task
|
||||
displayTask.AssignmentHistory = nil // History is provided separately in taskDetail
|
||||
|
||||
// Create task detail structure from the loaded task
|
||||
taskDetail := &maintenance.TaskDetailData{
|
||||
Task: task,
|
||||
Task: &displayTask,
|
||||
AssignmentHistory: task.AssignmentHistory, // Use assignment history from persisted task
|
||||
ExecutionLogs: []*maintenance.TaskExecutionLog{},
|
||||
RelatedTasks: []*maintenance.MaintenanceTask{},
|
||||
LastUpdated: time.Now(),
|
||||
}
|
||||
|
||||
// Truncate assignment history if it's too long (display last N only)
|
||||
if len(taskDetail.AssignmentHistory) > maxAssignmentHistoryDisplay {
|
||||
startIdx := len(taskDetail.AssignmentHistory) - maxAssignmentHistoryDisplay
|
||||
taskDetail.AssignmentHistory = taskDetail.AssignmentHistory[startIdx:]
|
||||
}
|
||||
|
||||
if taskDetail.AssignmentHistory == nil {
|
||||
taskDetail.AssignmentHistory = []*maintenance.TaskAssignmentRecord{}
|
||||
}
|
||||
@@ -1205,72 +1290,19 @@ func (as *AdminServer) GetMaintenanceTaskDetail(taskID string) (*maintenance.Tas
|
||||
}
|
||||
}
|
||||
|
||||
// Get execution logs from worker if task is active/completed and worker is connected
|
||||
if task.Status == maintenance.TaskStatusInProgress || task.Status == maintenance.TaskStatusCompleted {
|
||||
if as.workerGrpcServer != nil && task.WorkerID != "" {
|
||||
workerLogs, err := as.workerGrpcServer.RequestTaskLogs(task.WorkerID, taskID, 100, "")
|
||||
if err == nil && len(workerLogs) > 0 {
|
||||
// Convert worker logs to maintenance logs
|
||||
for _, workerLog := range workerLogs {
|
||||
maintenanceLog := &maintenance.TaskExecutionLog{
|
||||
Timestamp: time.Unix(workerLog.Timestamp, 0),
|
||||
Level: workerLog.Level,
|
||||
Message: workerLog.Message,
|
||||
Source: "worker",
|
||||
TaskID: taskID,
|
||||
WorkerID: task.WorkerID,
|
||||
}
|
||||
// carry structured fields if present
|
||||
if len(workerLog.Fields) > 0 {
|
||||
maintenanceLog.Fields = make(map[string]string, len(workerLog.Fields))
|
||||
for k, v := range workerLog.Fields {
|
||||
maintenanceLog.Fields[k] = v
|
||||
}
|
||||
}
|
||||
// carry optional progress/status
|
||||
if workerLog.Progress != 0 {
|
||||
p := float64(workerLog.Progress)
|
||||
maintenanceLog.Progress = &p
|
||||
}
|
||||
if workerLog.Status != "" {
|
||||
maintenanceLog.Status = workerLog.Status
|
||||
}
|
||||
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, maintenanceLog)
|
||||
}
|
||||
} else if err != nil {
|
||||
// Add a diagnostic log entry when worker logs cannot be retrieved
|
||||
diagnosticLog := &maintenance.TaskExecutionLog{
|
||||
Timestamp: time.Now(),
|
||||
Level: "WARNING",
|
||||
Message: fmt.Sprintf("Failed to retrieve worker logs: %v", err),
|
||||
Source: "admin",
|
||||
TaskID: taskID,
|
||||
WorkerID: task.WorkerID,
|
||||
}
|
||||
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, diagnosticLog)
|
||||
glog.V(1).Infof("Failed to get worker logs for task %s from worker %s: %v", taskID, task.WorkerID, err)
|
||||
}
|
||||
// Load execution logs from disk
|
||||
if as.configPersistence != nil {
|
||||
logs, err := as.configPersistence.LoadTaskExecutionLogs(taskID)
|
||||
if err == nil {
|
||||
taskDetail.ExecutionLogs = logs
|
||||
} else {
|
||||
// Add diagnostic information when worker is not available
|
||||
reason := "worker gRPC server not available"
|
||||
if task.WorkerID == "" {
|
||||
reason = "no worker assigned to task"
|
||||
}
|
||||
diagnosticLog := &maintenance.TaskExecutionLog{
|
||||
Timestamp: time.Now(),
|
||||
Level: "INFO",
|
||||
Message: fmt.Sprintf("Worker logs not available: %s", reason),
|
||||
Source: "admin",
|
||||
TaskID: taskID,
|
||||
WorkerID: task.WorkerID,
|
||||
}
|
||||
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, diagnosticLog)
|
||||
glog.V(2).Infof("No execution logs found on disk for task %s", taskID)
|
||||
}
|
||||
}
|
||||
|
||||
// Get related tasks (other tasks on same volume/server)
|
||||
if task.VolumeID != 0 || task.Server != "" {
|
||||
allTasks := as.maintenanceManager.GetTasks("", "", 50) // Get recent tasks
|
||||
allTasks := as.maintenanceManager.GetTasks("", "", maxRelatedTasksDisplay) // Get recent tasks
|
||||
for _, relatedTask := range allTasks {
|
||||
if relatedTask.ID != taskID &&
|
||||
(relatedTask.VolumeID == task.VolumeID || relatedTask.Server == task.Server) {
|
||||
@@ -1324,7 +1356,7 @@ func (as *AdminServer) getMaintenanceWorkerDetails(workerID string) (*WorkerDeta
|
||||
}
|
||||
|
||||
// Get recent tasks for this worker
|
||||
recentTasks := as.maintenanceManager.GetTasks(TaskStatusCompleted, "", 10)
|
||||
recentTasks := as.maintenanceManager.GetTasks(TaskStatusCompleted, "", maxRecentTasksDisplay)
|
||||
var workerRecentTasks []*MaintenanceTask
|
||||
for _, task := range recentTasks {
|
||||
if task.WorkerID == workerID {
|
||||
@@ -1336,12 +1368,13 @@ func (as *AdminServer) getMaintenanceWorkerDetails(workerID string) (*WorkerDeta
|
||||
var totalDuration time.Duration
|
||||
var completedTasks, failedTasks int
|
||||
for _, task := range workerRecentTasks {
|
||||
if task.Status == TaskStatusCompleted {
|
||||
switch task.Status {
|
||||
case TaskStatusCompleted:
|
||||
completedTasks++
|
||||
if task.StartedAt != nil && task.CompletedAt != nil {
|
||||
totalDuration += task.CompletedAt.Sub(*task.StartedAt)
|
||||
}
|
||||
} else if task.Status == TaskStatusFailed {
|
||||
case TaskStatusFailed:
|
||||
failedTasks++
|
||||
}
|
||||
}
|
||||
@@ -1370,31 +1403,29 @@ func (as *AdminServer) getMaintenanceWorkerDetails(workerID string) (*WorkerDeta
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetWorkerLogs fetches logs from a specific worker for a task
|
||||
// GetWorkerLogs fetches logs from a specific worker for a task (now reads from disk)
|
||||
func (as *AdminServer) GetWorkerLogs(c *gin.Context) {
|
||||
workerID := c.Param("id")
|
||||
taskID := c.Query("taskId")
|
||||
maxEntriesStr := c.DefaultQuery("maxEntries", "100")
|
||||
logLevel := c.DefaultQuery("logLevel", "")
|
||||
|
||||
maxEntries := int32(100)
|
||||
if maxEntriesStr != "" {
|
||||
if parsed, err := strconv.ParseInt(maxEntriesStr, 10, 32); err == nil {
|
||||
maxEntries = int32(parsed)
|
||||
}
|
||||
}
|
||||
|
||||
if as.workerGrpcServer == nil {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "Worker gRPC server not available"})
|
||||
// Check config persistence first
|
||||
if as.configPersistence == nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Config persistence not available"})
|
||||
return
|
||||
}
|
||||
|
||||
logs, err := as.workerGrpcServer.RequestTaskLogs(workerID, taskID, maxEntries, logLevel)
|
||||
// Load logs strictly from disk to avoid timeouts and network dependency
|
||||
// This matches the behavior of the Task Detail page
|
||||
logs, err := as.configPersistence.LoadTaskExecutionLogs(taskID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadGateway, gin.H{"error": fmt.Sprintf("Failed to get logs from worker: %v", err)})
|
||||
return
|
||||
glog.V(2).Infof("No execution logs found on disk for task %s: %v", taskID, err)
|
||||
logs = []*maintenance.TaskExecutionLog{}
|
||||
}
|
||||
|
||||
// Filter logs by workerID if strictly needed, but usually task logs are what we want
|
||||
// The persistent logs struct (TaskExecutionLog) matches what the frontend expects for the detail view
|
||||
// ensuring consistent display.
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"worker_id": workerID, "task_id": taskID, "logs": logs, "count": len(logs)})
|
||||
}
|
||||
|
||||
|
||||
@@ -962,7 +962,36 @@ func (cp *ConfigPersistence) CleanupCompletedTasks() error {
|
||||
|
||||
// Sort by completion time (most recent first)
|
||||
sort.Slice(completedTasks, func(i, j int) bool {
|
||||
return completedTasks[i].CompletedAt.After(*completedTasks[j].CompletedAt)
|
||||
t1 := completedTasks[i].CompletedAt
|
||||
t2 := completedTasks[j].CompletedAt
|
||||
|
||||
// Handle nil completion times
|
||||
if t1 == nil && t2 == nil {
|
||||
// Both nil, fallback to CreatedAt
|
||||
if !completedTasks[i].CreatedAt.Equal(completedTasks[j].CreatedAt) {
|
||||
return completedTasks[i].CreatedAt.After(completedTasks[j].CreatedAt)
|
||||
}
|
||||
return completedTasks[i].ID > completedTasks[j].ID
|
||||
}
|
||||
if t1 == nil {
|
||||
return false // t1 (nil) goes to bottom
|
||||
}
|
||||
if t2 == nil {
|
||||
return true // t2 (nil) goes to bottom
|
||||
}
|
||||
|
||||
// Compare completion times
|
||||
if !t1.Equal(*t2) {
|
||||
return t1.After(*t2)
|
||||
}
|
||||
|
||||
// Fallback to CreatedAt if completion times are identical
|
||||
if !completedTasks[i].CreatedAt.Equal(completedTasks[j].CreatedAt) {
|
||||
return completedTasks[i].CreatedAt.After(completedTasks[j].CreatedAt)
|
||||
}
|
||||
|
||||
// Final tie-breaker: ID
|
||||
return completedTasks[i].ID > completedTasks[j].ID
|
||||
})
|
||||
|
||||
// Keep only the most recent MaxCompletedTasks, delete the rest
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/admin/maintenance"
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
|
||||
@@ -17,6 +18,15 @@ import (
|
||||
"google.golang.org/grpc/peer"
|
||||
)
|
||||
|
||||
const (
|
||||
maxLogFetchLimit = 1000
|
||||
maxLogMessageSize = 2000
|
||||
maxLogFieldsCount = 20
|
||||
logRequestTimeout = 10 * time.Second
|
||||
logResponseTimeout = 30 * time.Second
|
||||
logSendTimeout = 10 * time.Second
|
||||
)
|
||||
|
||||
// WorkerGrpcServer implements the WorkerService gRPC interface
|
||||
type WorkerGrpcServer struct {
|
||||
worker_pb.UnimplementedWorkerServiceServer
|
||||
@@ -42,7 +52,6 @@ type LogRequestContext struct {
|
||||
TaskID string
|
||||
WorkerID string
|
||||
ResponseCh chan *worker_pb.TaskLogResponse
|
||||
Timeout time.Time
|
||||
}
|
||||
|
||||
// WorkerConnection represents an active worker connection
|
||||
@@ -89,8 +98,9 @@ func (s *WorkerGrpcServer) StartWithTLS(port int) error {
|
||||
s.listener = listener
|
||||
s.running = true
|
||||
|
||||
// Start cleanup routine
|
||||
// Start background routines
|
||||
go s.cleanupRoutine()
|
||||
go s.activeLogFetchLoop()
|
||||
|
||||
// Start serving in a goroutine
|
||||
go func() {
|
||||
@@ -437,9 +447,90 @@ func (s *WorkerGrpcServer) handleTaskCompletion(conn *WorkerConnection, completi
|
||||
} else {
|
||||
glog.Errorf("Worker %s failed task %s: %s", conn.workerID, completion.TaskId, completion.ErrorMessage)
|
||||
}
|
||||
|
||||
// Fetch and persist logs
|
||||
go s.FetchAndSaveLogs(conn.workerID, completion.TaskId)
|
||||
}
|
||||
}
|
||||
|
||||
// FetchAndSaveLogs retrieves logs from a worker and saves them to disk
|
||||
func (s *WorkerGrpcServer) FetchAndSaveLogs(workerID, taskID string) error {
|
||||
// Add a small initial delay to allow worker to finalize and sync logs
|
||||
// especially when this is called immediately after TaskComplete
|
||||
time.Sleep(300 * time.Millisecond)
|
||||
|
||||
var workerLogs []*worker_pb.TaskLogEntry
|
||||
var err error
|
||||
|
||||
// Retry a few times if fetch fails, as logs might be in the middle of a terminal sync
|
||||
for attempt := 1; attempt <= 3; attempt++ {
|
||||
workerLogs, err = s.RequestTaskLogs(workerID, taskID, maxLogFetchLimit, "")
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
if attempt < 3 {
|
||||
glog.V(1).Infof("Fetch logs attempt %d failed for task %s: %v. Retrying in 1s...", attempt, taskID, err)
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
glog.Warningf("Failed to fetch logs for task %s after 3 attempts: %v", taskID, err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Convert logs
|
||||
var maintenanceLogs []*maintenance.TaskExecutionLog
|
||||
for _, workerLog := range workerLogs {
|
||||
maintenanceLog := &maintenance.TaskExecutionLog{
|
||||
Timestamp: time.Unix(workerLog.Timestamp, 0),
|
||||
Level: workerLog.Level,
|
||||
Message: workerLog.Message,
|
||||
Source: "worker",
|
||||
TaskID: taskID,
|
||||
WorkerID: workerID,
|
||||
}
|
||||
|
||||
// Truncate very long messages to prevent rendering issues and disk bloat
|
||||
if len(maintenanceLog.Message) > maxLogMessageSize {
|
||||
maintenanceLog.Message = maintenanceLog.Message[:maxLogMessageSize] + "... (truncated)"
|
||||
}
|
||||
|
||||
// carry structured fields if present
|
||||
if len(workerLog.Fields) > 0 {
|
||||
maintenanceLog.Fields = make(map[string]string)
|
||||
fieldCount := 0
|
||||
for k, v := range workerLog.Fields {
|
||||
if fieldCount >= maxLogFieldsCount {
|
||||
maintenanceLog.Fields["..."] = fmt.Sprintf("(%d more fields truncated)", len(workerLog.Fields)-maxLogFieldsCount)
|
||||
break
|
||||
}
|
||||
maintenanceLog.Fields[k] = v
|
||||
fieldCount++
|
||||
}
|
||||
}
|
||||
|
||||
// carry optional progress/status
|
||||
if workerLog.Progress != 0 {
|
||||
p := float64(workerLog.Progress)
|
||||
maintenanceLog.Progress = &p
|
||||
}
|
||||
if workerLog.Status != "" {
|
||||
maintenanceLog.Status = workerLog.Status
|
||||
}
|
||||
maintenanceLogs = append(maintenanceLogs, maintenanceLog)
|
||||
}
|
||||
|
||||
// Persist logs
|
||||
if s.adminServer.configPersistence != nil {
|
||||
if err := s.adminServer.configPersistence.SaveTaskExecutionLogs(taskID, maintenanceLogs); err != nil {
|
||||
glog.Errorf("Failed to persist logs for task %s: %v", taskID, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleTaskLogResponse processes task log responses from workers
|
||||
func (s *WorkerGrpcServer) handleTaskLogResponse(conn *WorkerConnection, response *worker_pb.TaskLogResponse) {
|
||||
requestKey := fmt.Sprintf("%s:%s", response.WorkerId, response.TaskId)
|
||||
@@ -575,10 +666,13 @@ func (s *WorkerGrpcServer) RequestTaskLogs(workerID, taskID string, maxEntries i
|
||||
TaskID: taskID,
|
||||
WorkerID: workerID,
|
||||
ResponseCh: responseCh,
|
||||
Timeout: time.Now().Add(10 * time.Second),
|
||||
}
|
||||
|
||||
s.logRequestsMutex.Lock()
|
||||
if _, exists := s.pendingLogRequests[requestKey]; exists {
|
||||
s.logRequestsMutex.Unlock()
|
||||
return nil, fmt.Errorf("a log request for task %s is already in progress", taskID)
|
||||
}
|
||||
s.pendingLogRequests[requestKey] = requestContext
|
||||
s.logRequestsMutex.Unlock()
|
||||
|
||||
@@ -601,10 +695,12 @@ func (s *WorkerGrpcServer) RequestTaskLogs(workerID, taskID string, maxEntries i
|
||||
select {
|
||||
case conn.outgoing <- logRequest:
|
||||
glog.V(1).Infof("Log request sent to worker %s for task %s", workerID, taskID)
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-time.After(logSendTimeout):
|
||||
// Clean up pending request on timeout
|
||||
s.logRequestsMutex.Lock()
|
||||
delete(s.pendingLogRequests, requestKey)
|
||||
if s.pendingLogRequests[requestKey] == requestContext {
|
||||
delete(s.pendingLogRequests, requestKey)
|
||||
}
|
||||
s.logRequestsMutex.Unlock()
|
||||
return nil, fmt.Errorf("timeout sending log request to worker %s", workerID)
|
||||
}
|
||||
@@ -617,10 +713,12 @@ func (s *WorkerGrpcServer) RequestTaskLogs(workerID, taskID string, maxEntries i
|
||||
}
|
||||
glog.V(1).Infof("Received %d log entries for task %s from worker %s", len(response.LogEntries), taskID, workerID)
|
||||
return response.LogEntries, nil
|
||||
case <-time.After(10 * time.Second):
|
||||
case <-time.After(logResponseTimeout):
|
||||
// Clean up pending request on timeout
|
||||
s.logRequestsMutex.Lock()
|
||||
delete(s.pendingLogRequests, requestKey)
|
||||
if s.pendingLogRequests[requestKey] == requestContext {
|
||||
delete(s.pendingLogRequests, requestKey)
|
||||
}
|
||||
s.logRequestsMutex.Unlock()
|
||||
return nil, fmt.Errorf("timeout waiting for log response from worker %s", workerID)
|
||||
}
|
||||
@@ -684,3 +782,38 @@ func findClientAddress(ctx context.Context) string {
|
||||
}
|
||||
return pr.Addr.String()
|
||||
}
|
||||
|
||||
// activeLogFetchLoop periodically fetches logs for all in-progress tasks
|
||||
func (s *WorkerGrpcServer) activeLogFetchLoop() {
|
||||
ticker := time.NewTicker(30 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-s.stopChan:
|
||||
return
|
||||
case <-ticker.C:
|
||||
if !s.running || s.adminServer == nil || s.adminServer.maintenanceManager == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get all in-progress tasks
|
||||
tasks := s.adminServer.maintenanceManager.GetTasks(maintenance.TaskStatusInProgress, "", 0)
|
||||
if len(tasks) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
glog.V(2).Infof("Background log fetcher: found %d in-progress tasks", len(tasks))
|
||||
for _, task := range tasks {
|
||||
if task.WorkerID != "" {
|
||||
// Use a goroutine to avoid blocking the loop
|
||||
go func(wID, tID string) {
|
||||
if err := s.FetchAndSaveLogs(wID, tID); err != nil {
|
||||
glog.V(2).Infof("Background log fetch failed for task %s on worker %s: %v", tID, wID, err)
|
||||
}
|
||||
}(task.WorkerID, task.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,6 +39,11 @@ func NewMaintenanceHandlers(adminServer *dash.AdminServer) *MaintenanceHandlers
|
||||
func (h *MaintenanceHandlers) ShowTaskDetail(c *gin.Context) {
|
||||
taskID := c.Param("id")
|
||||
|
||||
if h.adminServer == nil {
|
||||
c.String(http.StatusInternalServerError, "Admin server not initialized")
|
||||
return
|
||||
}
|
||||
|
||||
taskDetail, err := h.adminServer.GetMaintenanceTaskDetail(taskID)
|
||||
if err != nil {
|
||||
glog.Errorf("DEBUG ShowTaskDetail: error getting task detail for %s: %v", taskID, err)
|
||||
@@ -111,6 +116,10 @@ func (h *MaintenanceHandlers) ShowMaintenanceQueue(c *gin.Context) {
|
||||
|
||||
// ShowMaintenanceWorkers displays the maintenance workers page
|
||||
func (h *MaintenanceHandlers) ShowMaintenanceWorkers(c *gin.Context) {
|
||||
if h.adminServer == nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Admin server not initialized"})
|
||||
return
|
||||
}
|
||||
workersData, err := h.adminServer.GetMaintenanceWorkersData()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
@@ -339,6 +348,8 @@ func (h *MaintenanceHandlers) UpdateTaskConfig(c *gin.Context) {
|
||||
glog.Warningf("Failed to save task config to protobuf file: %v", err)
|
||||
// Don't fail the request, just log the warning
|
||||
}
|
||||
} else if h.adminServer == nil {
|
||||
glog.Warningf("Failed to save task config: admin server not initialized")
|
||||
}
|
||||
|
||||
// Trigger a configuration reload in the maintenance manager
|
||||
@@ -492,74 +503,25 @@ func (h *MaintenanceHandlers) UpdateMaintenanceConfig(c *gin.Context) {
|
||||
// Helper methods that delegate to AdminServer
|
||||
|
||||
func (h *MaintenanceHandlers) getMaintenanceQueueData() (*maintenance.MaintenanceQueueData, error) {
|
||||
tasks, err := h.getMaintenanceTasks()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
workers, err := h.getMaintenanceWorkers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stats, err := h.getMaintenanceQueueStats()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data := &maintenance.MaintenanceQueueData{
|
||||
Tasks: tasks,
|
||||
Workers: workers,
|
||||
Stats: stats,
|
||||
LastUpdated: time.Now(),
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (h *MaintenanceHandlers) getMaintenanceQueueStats() (*maintenance.QueueStats, error) {
|
||||
// Use the exported method from AdminServer
|
||||
return h.adminServer.GetMaintenanceQueueStats()
|
||||
}
|
||||
|
||||
func (h *MaintenanceHandlers) getMaintenanceTasks() ([]*maintenance.MaintenanceTask, error) {
|
||||
// Call the maintenance manager directly to get recent tasks (limit for performance)
|
||||
if h.adminServer == nil {
|
||||
return []*maintenance.MaintenanceTask{}, nil
|
||||
return nil, fmt.Errorf("admin server not initialized")
|
||||
}
|
||||
|
||||
manager := h.adminServer.GetMaintenanceManager()
|
||||
if manager == nil {
|
||||
return []*maintenance.MaintenanceTask{}, nil
|
||||
}
|
||||
|
||||
// Get recent tasks only (last 100) to prevent slow page loads
|
||||
// Users can view more tasks via pagination if needed
|
||||
allTasks := manager.GetTasks("", "", 100)
|
||||
return allTasks, nil
|
||||
}
|
||||
|
||||
func (h *MaintenanceHandlers) getMaintenanceWorkers() ([]*maintenance.MaintenanceWorker, error) {
|
||||
// Get workers from the admin server's maintenance manager
|
||||
if h.adminServer == nil {
|
||||
return []*maintenance.MaintenanceWorker{}, nil
|
||||
}
|
||||
|
||||
if h.adminServer.GetMaintenanceManager() == nil {
|
||||
return []*maintenance.MaintenanceWorker{}, nil
|
||||
}
|
||||
|
||||
// Get workers from the maintenance manager
|
||||
workers := h.adminServer.GetMaintenanceManager().GetWorkers()
|
||||
return workers, nil
|
||||
// Use the exported method from AdminServer used by the JSON API
|
||||
return h.adminServer.GetMaintenanceQueueData()
|
||||
}
|
||||
|
||||
func (h *MaintenanceHandlers) getMaintenanceConfig() (*maintenance.MaintenanceConfigData, error) {
|
||||
if h.adminServer == nil {
|
||||
return nil, fmt.Errorf("admin server not initialized")
|
||||
}
|
||||
// Delegate to AdminServer's real persistence method
|
||||
return h.adminServer.GetMaintenanceConfigData()
|
||||
}
|
||||
|
||||
func (h *MaintenanceHandlers) updateMaintenanceConfig(config *maintenance.MaintenanceConfig) error {
|
||||
if h.adminServer == nil {
|
||||
return fmt.Errorf("admin server not initialized")
|
||||
}
|
||||
// Delegate to AdminServer's real persistence method
|
||||
return h.adminServer.UpdateMaintenanceConfigData(config)
|
||||
}
|
||||
|
||||
@@ -587,15 +587,35 @@ func (mq *MaintenanceQueue) GetTasks(status MaintenanceTaskStatus, taskType Main
|
||||
continue
|
||||
}
|
||||
tasks = append(tasks, task)
|
||||
if limit > 0 && len(tasks) >= limit {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by creation time (newest first)
|
||||
sort.Slice(tasks, func(i, j int) bool {
|
||||
return tasks[i].CreatedAt.After(tasks[j].CreatedAt)
|
||||
})
|
||||
// Sort based on status
|
||||
if status == TaskStatusCompleted || status == TaskStatusFailed || status == TaskStatusCancelled {
|
||||
sort.Slice(tasks, func(i, j int) bool {
|
||||
t1 := tasks[i].CompletedAt
|
||||
t2 := tasks[j].CompletedAt
|
||||
if t1 == nil && t2 == nil {
|
||||
return tasks[i].CreatedAt.After(tasks[j].CreatedAt)
|
||||
}
|
||||
if t1 == nil {
|
||||
return false
|
||||
}
|
||||
if t2 == nil {
|
||||
return true
|
||||
}
|
||||
return t1.After(*t2)
|
||||
})
|
||||
} else {
|
||||
// Default to creation time (newest first)
|
||||
sort.Slice(tasks, func(i, j int) bool {
|
||||
return tasks[i].CreatedAt.After(tasks[j].CreatedAt)
|
||||
})
|
||||
}
|
||||
|
||||
// Apply limit after sorting
|
||||
if limit > 0 && len(tasks) > limit {
|
||||
tasks = tasks[:limit]
|
||||
}
|
||||
|
||||
return tasks
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -302,12 +302,7 @@ templ MaintenanceQueue(data *maintenance.MaintenanceQueueData) {
|
||||
// Debug output to browser console
|
||||
console.log("DEBUG: Maintenance Queue Template loaded");
|
||||
|
||||
// Auto-refresh every 10 seconds
|
||||
setInterval(function() {
|
||||
if (!document.hidden) {
|
||||
window.location.reload();
|
||||
}
|
||||
}, 10000);
|
||||
|
||||
|
||||
window.triggerScan = function() {
|
||||
console.log("triggerScan called");
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
@@ -610,7 +610,7 @@ func MaintenanceQueue(data *maintenance.MaintenanceQueueData) templ.Component {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 63, "</div></div></div></div></div><script>\n // Debug output to browser console\n console.log(\"DEBUG: Maintenance Queue Template loaded\");\n \n // Auto-refresh every 10 seconds\n setInterval(function() {\n if (!document.hidden) {\n window.location.reload();\n }\n }, 10000);\n\n window.triggerScan = function() {\n console.log(\"triggerScan called\");\n fetch('/api/maintenance/scan', {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n }\n })\n .then(response => response.json())\n .then(data => {\n if (data.success) {\n showToast('Success', 'Maintenance scan triggered successfully', 'success');\n setTimeout(() => window.location.reload(), 2000);\n } else {\n showToast('Error', 'Failed to trigger scan: ' + (data.error || 'Unknown error'), 'danger');\n }\n })\n .catch(error => {\n showToast('Error', 'Error: ' + error.message, 'danger');\n });\n };\n\n window.refreshPage = function() {\n console.log(\"refreshPage called\");\n window.location.reload();\n };\n\n window.navigateToTask = function(element) {\n const taskId = element.getAttribute('data-task-id');\n if (taskId) {\n window.location.href = '/maintenance/tasks/' + taskId;\n }\n };\n </script>")
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 63, "</div></div></div></div></div><script>\n // Debug output to browser console\n console.log(\"DEBUG: Maintenance Queue Template loaded\");\n \n\n\n window.triggerScan = function() {\n console.log(\"triggerScan called\");\n fetch('/api/maintenance/scan', {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n }\n })\n .then(response => response.json())\n .then(data => {\n if (data.success) {\n showToast('Success', 'Maintenance scan triggered successfully', 'success');\n setTimeout(() => window.location.reload(), 2000);\n } else {\n showToast('Error', 'Failed to trigger scan: ' + (data.error || 'Unknown error'), 'danger');\n }\n })\n .catch(error => {\n showToast('Error', 'Error: ' + error.message, 'danger');\n });\n };\n\n window.refreshPage = function() {\n console.log(\"refreshPage called\");\n window.location.reload();\n };\n\n window.navigateToTask = function(element) {\n const taskId = element.getAttribute('data-task-id');\n if (taskId) {\n window.location.href = '/maintenance/tasks/' + taskId;\n }\n };\n </script>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
@@ -809,7 +809,7 @@ func ProgressBar(progress float64, status maintenance.MaintenanceTaskStatus) tem
|
||||
var templ_7745c5c3_Var35 string
|
||||
templ_7745c5c3_Var35, templ_7745c5c3_Err = templruntime.SanitizeStyleAttributeValues(fmt.Sprintf("width: %.1f%%", progress))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 390, Col: 102}
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 385, Col: 102}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var35))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
@@ -822,7 +822,7 @@ func ProgressBar(progress float64, status maintenance.MaintenanceTaskStatus) tem
|
||||
var templ_7745c5c3_Var36 string
|
||||
templ_7745c5c3_Var36, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%.1f%%", progress))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 393, Col: 66}
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 388, Col: 66}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var36))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -430,7 +430,7 @@ func getTaskConfigStringField(config interface{}, fieldName string) string {
|
||||
|
||||
func getTaskNumberStep(field *config.Field) string {
|
||||
if field.Type == config.FieldTypeFloat {
|
||||
return "0.01"
|
||||
return "any"
|
||||
}
|
||||
return "1"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
@@ -891,7 +891,7 @@ func getTaskConfigStringField(config interface{}, fieldName string) string {
|
||||
|
||||
func getTaskNumberStep(field *config.Field) string {
|
||||
if field.Type == config.FieldTypeFloat {
|
||||
return "0.01"
|
||||
return "any"
|
||||
}
|
||||
return "1"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -942,10 +942,48 @@ templ TaskDetail(data *maintenance.TaskDetailData) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Format and display logs with structured fields
|
||||
let logText = '';
|
||||
|
||||
// Helper function to format timestamps robustly
|
||||
function formatTimestamp(timestamp) {
|
||||
if (!timestamp) {
|
||||
return 'N/A';
|
||||
}
|
||||
|
||||
let date;
|
||||
|
||||
// Check if timestamp is a numeric string (e.g., "1738652668")
|
||||
if (typeof timestamp === 'string' && /^\d+$/.test(timestamp)) {
|
||||
const numericTimestamp = parseInt(timestamp, 10);
|
||||
// Treat values > 10^10 as milliseconds, otherwise as seconds
|
||||
date = numericTimestamp > 10000000000
|
||||
? new Date(numericTimestamp)
|
||||
: new Date(numericTimestamp * 1000);
|
||||
} else if (typeof timestamp === 'string') {
|
||||
// ISO date string
|
||||
date = new Date(timestamp);
|
||||
} else if (typeof timestamp === 'number') {
|
||||
// Numeric timestamp (seconds or milliseconds)
|
||||
date = timestamp > 10000000000
|
||||
? new Date(timestamp)
|
||||
: new Date(timestamp * 1000);
|
||||
} else {
|
||||
return 'N/A';
|
||||
}
|
||||
|
||||
// Validate the date
|
||||
if (isNaN(date.getTime())) {
|
||||
return 'N/A';
|
||||
}
|
||||
|
||||
return date.toISOString();
|
||||
}
|
||||
|
||||
logs.forEach(entry => {
|
||||
const timestamp = entry.timestamp ? new Date(entry.timestamp * 1000).toISOString() : 'N/A';
|
||||
const timestamp = formatTimestamp(entry.timestamp);
|
||||
|
||||
const level = entry.level || 'INFO';
|
||||
const message = entry.message || '';
|
||||
|
||||
@@ -1011,7 +1049,12 @@ templ TaskDetail(data *maintenance.TaskDetailData) {
|
||||
let logContent = '';
|
||||
if (data.logs && data.logs.length > 0) {
|
||||
data.logs.forEach(entry => {
|
||||
const timestamp = entry.timestamp ? new Date(entry.timestamp * 1000).toISOString() : 'N/A';
|
||||
let timestamp;
|
||||
if (typeof entry.timestamp === 'string') {
|
||||
timestamp = new Date(entry.timestamp).toISOString();
|
||||
} else {
|
||||
timestamp = entry.timestamp ? new Date(entry.timestamp * 1000).toISOString() : 'N/A';
|
||||
}
|
||||
const level = entry.level || 'INFO';
|
||||
const message = entry.message || '';
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package app
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package components
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package components
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.960
|
||||
// templ: version: v0.3.977
|
||||
package layout
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
@@ -230,7 +230,20 @@ func startAdminServer(ctx context.Context, options AdminOptions, enableUI bool,
|
||||
|
||||
// Create router
|
||||
r := gin.New()
|
||||
r.Use(gin.Logger(), gin.Recovery())
|
||||
r.Use(gin.LoggerWithFormatter(func(param gin.LogFormatterParams) string {
|
||||
if param.StatusCode == 200 {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf("[GIN] %v | %3d | %13v | %15s | %-7s %s\n%s",
|
||||
param.TimeStamp.Format("2006/01/02 - 15:04:05"),
|
||||
param.StatusCode,
|
||||
param.Latency,
|
||||
param.ClientIP,
|
||||
param.Method,
|
||||
param.Path,
|
||||
param.ErrorMessage,
|
||||
)
|
||||
}), gin.Recovery())
|
||||
|
||||
// Create data directory first if specified (needed for session key storage)
|
||||
var dataDir string
|
||||
|
||||
@@ -79,6 +79,16 @@ func (ms *MasterServer) Assign(ctx context.Context, req *master_pb.AssignRequest
|
||||
}
|
||||
|
||||
vl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType)
|
||||
if req.DiskType == "" {
|
||||
if writable, _ := vl.GetWritableVolumeCount(); writable == 0 {
|
||||
if hddVl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, types.ToDiskType(types.HddType)); hddVl != nil {
|
||||
if writable, _ := hddVl.GetWritableVolumeCount(); writable > 0 {
|
||||
option.DiskType = types.ToDiskType(types.HddType)
|
||||
vl = hddVl
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
vl.SetLastGrowCount(req.WritableVolumeCount)
|
||||
|
||||
var (
|
||||
|
||||
@@ -241,13 +241,15 @@ func Detection(metrics []*types.VolumeHealthMetrics, clusterInfo *types.ClusterI
|
||||
results = append(results, result)
|
||||
} else {
|
||||
// Count debug reasons
|
||||
if metric.Age < quietThreshold {
|
||||
skippedQuietTime++
|
||||
}
|
||||
if metric.FullnessRatio < ecConfig.FullnessRatio {
|
||||
skippedFullness++
|
||||
}
|
||||
|
||||
if debugCount < 5 { // Limit to avoid spam
|
||||
if metric.Age < quietThreshold {
|
||||
skippedQuietTime++
|
||||
}
|
||||
if metric.FullnessRatio < ecConfig.FullnessRatio {
|
||||
skippedFullness++
|
||||
}
|
||||
// Logic moved outside
|
||||
}
|
||||
debugCount++
|
||||
}
|
||||
@@ -256,7 +258,7 @@ func Detection(metrics []*types.VolumeHealthMetrics, clusterInfo *types.ClusterI
|
||||
// Log debug summary if no tasks were created
|
||||
if len(results) == 0 && len(metrics) > 0 {
|
||||
totalVolumes := len(metrics)
|
||||
glog.V(1).Infof("EC detection: No tasks created for %d volumes (skipped: %d already EC, %d too small, %d filtered, %d not quiet, %d not full)",
|
||||
glog.Infof("EC detection: No tasks created for %d volumes (skipped: %d already EC, %d too small, %d filtered, %d not quiet, %d not full)",
|
||||
totalVolumes, skippedAlreadyEC, skippedTooSmall, skippedCollectionFilter, skippedQuietTime, skippedFullness)
|
||||
|
||||
// Show details for first few volumes
|
||||
|
||||
@@ -30,6 +30,7 @@ type TaskLogger interface {
|
||||
LogWithFields(level string, message string, fields map[string]interface{})
|
||||
|
||||
// Lifecycle
|
||||
Sync() error
|
||||
Close() error
|
||||
GetLogDir() string
|
||||
}
|
||||
@@ -230,6 +231,17 @@ func (l *FileTaskLogger) LogWithFields(level string, message string, fields map[
|
||||
l.writeLogEntry(entry)
|
||||
}
|
||||
|
||||
// Sync flushes buffered data to disk
|
||||
func (l *FileTaskLogger) Sync() error {
|
||||
l.mutex.Lock()
|
||||
defer l.mutex.Unlock()
|
||||
|
||||
if l.logFile != nil {
|
||||
return l.logFile.Sync()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes the logger and finalizes metadata
|
||||
func (l *FileTaskLogger) Close() error {
|
||||
l.Info("Task logger closed for %s", l.taskID)
|
||||
@@ -423,7 +435,10 @@ func ReadTaskLogs(logDir string) ([]TaskLogEntry, error) {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return nil, fmt.Errorf("failed to decode log entry: %w", err)
|
||||
// If we fail to decode an entry, it might be a partial write at the end of the file
|
||||
// Return what we have so far instead of failing the entire request
|
||||
glog.V(1).Infof("Failed to decode log entry in %s: %v (returning %d partial logs)", logPath, err, len(entries))
|
||||
break
|
||||
}
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ type TaskLogger interface {
|
||||
Error(message string, args ...interface{})
|
||||
Debug(message string, args ...interface{})
|
||||
LogWithFields(level string, message string, fields map[string]interface{})
|
||||
Sync() error
|
||||
Close() error
|
||||
}
|
||||
|
||||
|
||||
@@ -707,6 +707,9 @@ func (w *Worker) executeTask(task *types.TaskInput) {
|
||||
err = taskInstance.Execute(ctx, task.TypedParams)
|
||||
|
||||
// Report completion
|
||||
if fileLogger != nil {
|
||||
fileLogger.Sync()
|
||||
}
|
||||
if err != nil {
|
||||
w.completeTask(task.ID, false, err.Error())
|
||||
w.cmds <- workerCommand{
|
||||
@@ -718,14 +721,15 @@ func (w *Worker) executeTask(task *types.TaskInput) {
|
||||
fileLogger.Error("Task %s failed: %v", task.ID, err)
|
||||
}
|
||||
} else {
|
||||
if fileLogger != nil {
|
||||
fileLogger.Info("Task %s completed successfully", task.ID)
|
||||
fileLogger.Sync()
|
||||
}
|
||||
w.completeTask(task.ID, true, "")
|
||||
w.cmds <- workerCommand{
|
||||
action: ActionIncTaskComplete,
|
||||
}
|
||||
glog.Infof("Worker %s completed task %s successfully", w.id, task.ID)
|
||||
if fileLogger != nil {
|
||||
fileLogger.Info("Task %s completed successfully", task.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user