Fix Maintenance Task Sorting and Refactor Log Persistence (#8199)

* fix float stepping

* do not auto refresh

* only logs when non 200 status

* fix maintenance task sorting and cleanup redundant handler logic

* Refactor log retrieval to persist to disk and fix slowness

- Move log retrieval to disk-based persistence in GetMaintenanceTaskDetail
- Implement background log fetching on task completion in worker_grpc_server.go
- Implement async background refresh for in-progress tasks
- Completely remove blocking gRPC calls from the UI path to fix 10s timeouts
- Cleanup debug logs and performance profiling code

* Ensure consistent deterministic sorting in config_persistence cleanup

* Replace magic numbers with constants and remove debug logs

- Added descriptive constants for truncation limits and timeouts in admin_server.go and worker_grpc_server.go
- Replaced magic numbers with these constants throughout the codebase
- Verified removal of stdout debug printing
- Ensured consistent truncation logic during log persistence

* Address code review feedback on history truncation and logging logic

- Fix AssignmentHistory double-serialization by copying task in GetMaintenanceTaskDetail
- Fix handleTaskCompletion logging logic (mutually exclusive success/failure logs)
- Remove unused Timeout field from LogRequestContext and sync select timeouts with constants
- Ensure AssignmentHistory is only provided in the top-level field for better JSON structure

* Implement goroutine leak protection and request deduplication

- Add request deduplication in RequestTaskLogs to prevent multiple concurrent fetches for the same task
- Implement safe cleanup in timeout handlers to avoid race conditions in pendingLogRequests map
- Add a 10s cooldown for background log refreshes in GetMaintenanceTaskDetail to prevent spamming
- Ensure all persistent log-fetching goroutines are bounded and efficiently managed

* Fix potential nil pointer panics in maintenance handlers

- Add nil checks for adminServer in ShowTaskDetail, ShowMaintenanceWorkers, and UpdateTaskConfig
- Update getMaintenanceQueueData to return a descriptive error instead of nil when adminServer is uninitialized
- Ensure internal helper methods consistently check for adminServer initialization before use

* Strictly enforce disk-only log reading

- Remove background log fetching from GetMaintenanceTaskDetail to prevent timeouts and network calls during page view
- Remove unused lastLogFetch tracking fields to clean up dead code
- Ensure logs are only updated upon task completion via handleTaskCompletion

* Refactor GetWorkerLogs to read from disk

- Update /api/maintenance/workers/:id/logs endpoint to use configPersistence.LoadTaskExecutionLogs
- Remove synchronous gRPC call RequestTaskLogs to prevent timeouts and bad gateway errors
- Ensure consistent log retrieval behavior across the application (disk-only)

* Fix timestamp parsing in log viewer

- Update task_detail.templ JS to handle both ISO 8601 strings and Unix timestamps
- Fix "Invalid time value" error when displaying logs fetched from disk
- Regenerate templates

* master: fallback to HDD if SSD volumes are full in Assign

* worker: improve EC detection logging and fix skip counters

* worker: add Sync method to TaskLogger interface

* worker: implement Sync and ensure logs are flushed before task completion

* admin: improve task log retrieval with retries and better timeouts

* admin: robust timestamp parsing in task detail view
This commit is contained in:
Chris Lu
2026-02-04 08:48:55 -08:00
committed by GitHub
parent 2ff1cd9fc9
commit 72a8f598f2
51 changed files with 499 additions and 241 deletions

View File

@@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.960
// templ: version: v0.3.977
package app
//lint:file-ignore SA4006 This context is only used if a nested component is present.
@@ -610,7 +610,7 @@ func MaintenanceQueue(data *maintenance.MaintenanceQueueData) templ.Component {
return templ_7745c5c3_Err
}
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 63, "</div></div></div></div></div><script>\n // Debug output to browser console\n console.log(\"DEBUG: Maintenance Queue Template loaded\");\n \n // Auto-refresh every 10 seconds\n setInterval(function() {\n if (!document.hidden) {\n window.location.reload();\n }\n }, 10000);\n\n window.triggerScan = function() {\n console.log(\"triggerScan called\");\n fetch('/api/maintenance/scan', {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n }\n })\n .then(response => response.json())\n .then(data => {\n if (data.success) {\n showToast('Success', 'Maintenance scan triggered successfully', 'success');\n setTimeout(() => window.location.reload(), 2000);\n } else {\n showToast('Error', 'Failed to trigger scan: ' + (data.error || 'Unknown error'), 'danger');\n }\n })\n .catch(error => {\n showToast('Error', 'Error: ' + error.message, 'danger');\n });\n };\n\n window.refreshPage = function() {\n console.log(\"refreshPage called\");\n window.location.reload();\n };\n\n window.navigateToTask = function(element) {\n const taskId = element.getAttribute('data-task-id');\n if (taskId) {\n window.location.href = '/maintenance/tasks/' + taskId;\n }\n };\n </script>")
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 63, "</div></div></div></div></div><script>\n // Debug output to browser console\n console.log(\"DEBUG: Maintenance Queue Template loaded\");\n \n\n\n window.triggerScan = function() {\n console.log(\"triggerScan called\");\n fetch('/api/maintenance/scan', {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n }\n })\n .then(response => response.json())\n .then(data => {\n if (data.success) {\n showToast('Success', 'Maintenance scan triggered successfully', 'success');\n setTimeout(() => window.location.reload(), 2000);\n } else {\n showToast('Error', 'Failed to trigger scan: ' + (data.error || 'Unknown error'), 'danger');\n }\n })\n .catch(error => {\n showToast('Error', 'Error: ' + error.message, 'danger');\n });\n };\n\n window.refreshPage = function() {\n console.log(\"refreshPage called\");\n window.location.reload();\n };\n\n window.navigateToTask = function(element) {\n const taskId = element.getAttribute('data-task-id');\n if (taskId) {\n window.location.href = '/maintenance/tasks/' + taskId;\n }\n };\n </script>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
@@ -809,7 +809,7 @@ func ProgressBar(progress float64, status maintenance.MaintenanceTaskStatus) tem
var templ_7745c5c3_Var35 string
templ_7745c5c3_Var35, templ_7745c5c3_Err = templruntime.SanitizeStyleAttributeValues(fmt.Sprintf("width: %.1f%%", progress))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 390, Col: 102}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 385, Col: 102}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var35))
if templ_7745c5c3_Err != nil {
@@ -822,7 +822,7 @@ func ProgressBar(progress float64, status maintenance.MaintenanceTaskStatus) tem
var templ_7745c5c3_Var36 string
templ_7745c5c3_Var36, templ_7745c5c3_Err = templ.JoinStringErrs(fmt.Sprintf("%.1f%%", progress))
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 393, Col: 66}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `view/app/maintenance_queue.templ`, Line: 388, Col: 66}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var36))
if templ_7745c5c3_Err != nil {