Fix Maintenance Task Sorting and Refactor Log Persistence (#8199)
* fix float stepping * do not auto refresh * only logs when non 200 status * fix maintenance task sorting and cleanup redundant handler logic * Refactor log retrieval to persist to disk and fix slowness - Move log retrieval to disk-based persistence in GetMaintenanceTaskDetail - Implement background log fetching on task completion in worker_grpc_server.go - Implement async background refresh for in-progress tasks - Completely remove blocking gRPC calls from the UI path to fix 10s timeouts - Cleanup debug logs and performance profiling code * Ensure consistent deterministic sorting in config_persistence cleanup * Replace magic numbers with constants and remove debug logs - Added descriptive constants for truncation limits and timeouts in admin_server.go and worker_grpc_server.go - Replaced magic numbers with these constants throughout the codebase - Verified removal of stdout debug printing - Ensured consistent truncation logic during log persistence * Address code review feedback on history truncation and logging logic - Fix AssignmentHistory double-serialization by copying task in GetMaintenanceTaskDetail - Fix handleTaskCompletion logging logic (mutually exclusive success/failure logs) - Remove unused Timeout field from LogRequestContext and sync select timeouts with constants - Ensure AssignmentHistory is only provided in the top-level field for better JSON structure * Implement goroutine leak protection and request deduplication - Add request deduplication in RequestTaskLogs to prevent multiple concurrent fetches for the same task - Implement safe cleanup in timeout handlers to avoid race conditions in pendingLogRequests map - Add a 10s cooldown for background log refreshes in GetMaintenanceTaskDetail to prevent spamming - Ensure all persistent log-fetching goroutines are bounded and efficiently managed * Fix potential nil pointer panics in maintenance handlers - Add nil checks for adminServer in ShowTaskDetail, ShowMaintenanceWorkers, and UpdateTaskConfig - Update getMaintenanceQueueData to return a descriptive error instead of nil when adminServer is uninitialized - Ensure internal helper methods consistently check for adminServer initialization before use * Strictly enforce disk-only log reading - Remove background log fetching from GetMaintenanceTaskDetail to prevent timeouts and network calls during page view - Remove unused lastLogFetch tracking fields to clean up dead code - Ensure logs are only updated upon task completion via handleTaskCompletion * Refactor GetWorkerLogs to read from disk - Update /api/maintenance/workers/:id/logs endpoint to use configPersistence.LoadTaskExecutionLogs - Remove synchronous gRPC call RequestTaskLogs to prevent timeouts and bad gateway errors - Ensure consistent log retrieval behavior across the application (disk-only) * Fix timestamp parsing in log viewer - Update task_detail.templ JS to handle both ISO 8601 strings and Unix timestamps - Fix "Invalid time value" error when displaying logs fetched from disk - Regenerate templates * master: fallback to HDD if SSD volumes are full in Assign * worker: improve EC detection logging and fix skip counters * worker: add Sync method to TaskLogger interface * worker: implement Sync and ensure logs are flushed before task completion * admin: improve task log retrieval with retries and better timeouts * admin: robust timestamp parsing in task detail view
This commit is contained in:
@@ -39,6 +39,11 @@ func NewMaintenanceHandlers(adminServer *dash.AdminServer) *MaintenanceHandlers
|
||||
func (h *MaintenanceHandlers) ShowTaskDetail(c *gin.Context) {
|
||||
taskID := c.Param("id")
|
||||
|
||||
if h.adminServer == nil {
|
||||
c.String(http.StatusInternalServerError, "Admin server not initialized")
|
||||
return
|
||||
}
|
||||
|
||||
taskDetail, err := h.adminServer.GetMaintenanceTaskDetail(taskID)
|
||||
if err != nil {
|
||||
glog.Errorf("DEBUG ShowTaskDetail: error getting task detail for %s: %v", taskID, err)
|
||||
@@ -111,6 +116,10 @@ func (h *MaintenanceHandlers) ShowMaintenanceQueue(c *gin.Context) {
|
||||
|
||||
// ShowMaintenanceWorkers displays the maintenance workers page
|
||||
func (h *MaintenanceHandlers) ShowMaintenanceWorkers(c *gin.Context) {
|
||||
if h.adminServer == nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Admin server not initialized"})
|
||||
return
|
||||
}
|
||||
workersData, err := h.adminServer.GetMaintenanceWorkersData()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
@@ -339,6 +348,8 @@ func (h *MaintenanceHandlers) UpdateTaskConfig(c *gin.Context) {
|
||||
glog.Warningf("Failed to save task config to protobuf file: %v", err)
|
||||
// Don't fail the request, just log the warning
|
||||
}
|
||||
} else if h.adminServer == nil {
|
||||
glog.Warningf("Failed to save task config: admin server not initialized")
|
||||
}
|
||||
|
||||
// Trigger a configuration reload in the maintenance manager
|
||||
@@ -492,74 +503,25 @@ func (h *MaintenanceHandlers) UpdateMaintenanceConfig(c *gin.Context) {
|
||||
// Helper methods that delegate to AdminServer
|
||||
|
||||
func (h *MaintenanceHandlers) getMaintenanceQueueData() (*maintenance.MaintenanceQueueData, error) {
|
||||
tasks, err := h.getMaintenanceTasks()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
workers, err := h.getMaintenanceWorkers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stats, err := h.getMaintenanceQueueStats()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data := &maintenance.MaintenanceQueueData{
|
||||
Tasks: tasks,
|
||||
Workers: workers,
|
||||
Stats: stats,
|
||||
LastUpdated: time.Now(),
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (h *MaintenanceHandlers) getMaintenanceQueueStats() (*maintenance.QueueStats, error) {
|
||||
// Use the exported method from AdminServer
|
||||
return h.adminServer.GetMaintenanceQueueStats()
|
||||
}
|
||||
|
||||
func (h *MaintenanceHandlers) getMaintenanceTasks() ([]*maintenance.MaintenanceTask, error) {
|
||||
// Call the maintenance manager directly to get recent tasks (limit for performance)
|
||||
if h.adminServer == nil {
|
||||
return []*maintenance.MaintenanceTask{}, nil
|
||||
return nil, fmt.Errorf("admin server not initialized")
|
||||
}
|
||||
|
||||
manager := h.adminServer.GetMaintenanceManager()
|
||||
if manager == nil {
|
||||
return []*maintenance.MaintenanceTask{}, nil
|
||||
}
|
||||
|
||||
// Get recent tasks only (last 100) to prevent slow page loads
|
||||
// Users can view more tasks via pagination if needed
|
||||
allTasks := manager.GetTasks("", "", 100)
|
||||
return allTasks, nil
|
||||
}
|
||||
|
||||
func (h *MaintenanceHandlers) getMaintenanceWorkers() ([]*maintenance.MaintenanceWorker, error) {
|
||||
// Get workers from the admin server's maintenance manager
|
||||
if h.adminServer == nil {
|
||||
return []*maintenance.MaintenanceWorker{}, nil
|
||||
}
|
||||
|
||||
if h.adminServer.GetMaintenanceManager() == nil {
|
||||
return []*maintenance.MaintenanceWorker{}, nil
|
||||
}
|
||||
|
||||
// Get workers from the maintenance manager
|
||||
workers := h.adminServer.GetMaintenanceManager().GetWorkers()
|
||||
return workers, nil
|
||||
// Use the exported method from AdminServer used by the JSON API
|
||||
return h.adminServer.GetMaintenanceQueueData()
|
||||
}
|
||||
|
||||
func (h *MaintenanceHandlers) getMaintenanceConfig() (*maintenance.MaintenanceConfigData, error) {
|
||||
if h.adminServer == nil {
|
||||
return nil, fmt.Errorf("admin server not initialized")
|
||||
}
|
||||
// Delegate to AdminServer's real persistence method
|
||||
return h.adminServer.GetMaintenanceConfigData()
|
||||
}
|
||||
|
||||
func (h *MaintenanceHandlers) updateMaintenanceConfig(config *maintenance.MaintenanceConfig) error {
|
||||
if h.adminServer == nil {
|
||||
return fmt.Errorf("admin server not initialized")
|
||||
}
|
||||
// Delegate to AdminServer's real persistence method
|
||||
return h.adminServer.UpdateMaintenanceConfigData(config)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user