* fix(admin): reduce memory usage and verbose logging for large clusters (#8919) The admin server used excessive memory and produced thousands of log lines on clusters with many volumes (e.g., 33k volumes). Three root causes: 1. Scanner duplicated all volume metrics: getVolumeHealthMetrics() created VolumeHealthMetrics objects, then convertToTaskMetrics() copied them all into identical types.VolumeHealthMetrics. Now uses the task-system type directly, eliminating the duplicate allocation and removing convertToTaskMetrics. 2. All previous task states loaded at startup: LoadTasksFromPersistence read and deserialized every .pb file from disk, logging each one. With thousands of balance tasks persisted, this caused massive startup I/O, memory usage, and log noise (including unguarded DEBUG glog.Infof per task). Now starts with an empty queue — the scanner re-detects current needs from live cluster state. Terminal tasks are purged from memory and disk when new scan results arrive. 3. Verbose per-volume/per-node logging: V(2) and V(3) logs produced thousands of lines per scan. Per-volume logs bumped to V(4), per-node/rack/disk logs bumped to V(3). Topology summary now logs counts instead of full node ID arrays. Also removes lastTopologyInfo field from MaintenanceScanner — the raw protobuf topology is returned as a local value and not retained between 30-minute scans. * fix(admin): delete stale task files at startup, add DeleteAllTaskStates Old task .pb files from previous runs were left on disk. The periodic CleanupCompletedTasks still loads all files to find completed ones — the same expensive 4GB path from the pprof profile. Now at startup, DeleteAllTaskStates removes all .pb files by scanning the directory without reading or deserializing them. The scanner will re-detect any tasks still needed from live cluster state. * fix(admin): don't persist terminal tasks to disk CompleteTask was saving failed/completed tasks to disk where they'd accumulate. The periodic cleanup only triggered for completed tasks, not failed ones. Now terminal tasks are deleted from disk immediately and only kept in memory for the current session's UI. * fix(admin): cap in-memory tasks to 100 per job type Without a limit, the task map grows unbounded — balance could create thousands of pending tasks for a cluster with many imbalanced volumes. Now AddTask rejects new tasks when a job type already has 100 in the queue. The scanner will re-detect skipped volumes on the next scan. * fix(admin): address PR review - memory-only purge, active-only capacity - purgeTerminalTasks now only cleans in-memory map (terminal tasks are already deleted from disk by CompleteTask) - Per-type capacity limit counts only active tasks (pending/assigned/ in_progress), not terminal ones - When at capacity, purge terminal tasks first before rejecting * fix(admin): fix orphaned comment, add TaskStatusCancelled to terminal switch - Move hasQueuedOrActiveTaskForVolume comment to its function definition - Add TaskStatusCancelled to the terminal state switch in CompleteTask so cancelled task files are deleted from disk
198 lines
7.1 KiB
Go
198 lines
7.1 KiB
Go
package maintenance
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/worker/types"
|
|
)
|
|
|
|
// NewMaintenanceScanner creates a new maintenance scanner
|
|
func NewMaintenanceScanner(adminClient AdminClient, policy *MaintenancePolicy, queue *MaintenanceQueue) *MaintenanceScanner {
|
|
scanner := &MaintenanceScanner{
|
|
adminClient: adminClient,
|
|
policy: policy,
|
|
queue: queue,
|
|
lastScan: make(map[MaintenanceTaskType]time.Time),
|
|
}
|
|
|
|
// Initialize integration
|
|
scanner.integration = NewMaintenanceIntegration(queue, policy)
|
|
|
|
// Set up bidirectional relationship
|
|
queue.SetIntegration(scanner.integration)
|
|
|
|
glog.V(1).Infof("Initialized maintenance scanner with task system")
|
|
|
|
return scanner
|
|
}
|
|
|
|
// ScanForMaintenanceTasks analyzes the cluster and generates maintenance tasks
|
|
func (ms *MaintenanceScanner) ScanForMaintenanceTasks() ([]*TaskDetectionResult, error) {
|
|
// Get volume health metrics directly in task-system format, along with topology info
|
|
taskMetrics, topologyInfo, err := ms.getVolumeHealthMetrics()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get volume health metrics: %w", err)
|
|
}
|
|
|
|
// Use task system for all task types
|
|
if ms.integration != nil {
|
|
// Update topology information for complete cluster view (including empty servers)
|
|
// This must happen before task detection to ensure EC placement can consider all servers
|
|
if topologyInfo != nil {
|
|
if err := ms.integration.UpdateTopologyInfo(topologyInfo); err != nil {
|
|
glog.Errorf("Failed to update topology info for empty servers: %v", err)
|
|
// Don't fail the scan - continue with just volume-bearing servers
|
|
} else {
|
|
glog.V(1).Infof("Updated topology info for complete cluster view including empty servers")
|
|
}
|
|
}
|
|
|
|
// Use task detection system with complete cluster information
|
|
results, err := ms.integration.ScanWithTaskDetectors(taskMetrics)
|
|
if err != nil {
|
|
glog.Errorf("Task scanning failed: %v", err)
|
|
return nil, err
|
|
}
|
|
|
|
glog.V(1).Infof("Maintenance scan completed: found %d tasks", len(results))
|
|
return results, nil
|
|
}
|
|
|
|
// No integration available
|
|
glog.Warningf("No integration available, no tasks will be scheduled")
|
|
return []*TaskDetectionResult{}, nil
|
|
}
|
|
|
|
// getVolumeHealthMetrics collects health information for all volumes.
|
|
// Returns metrics in task-system format directly (no intermediate copy) and
|
|
// the topology info for updating the active topology.
|
|
func (ms *MaintenanceScanner) getVolumeHealthMetrics() ([]*types.VolumeHealthMetrics, *master_pb.TopologyInfo, error) {
|
|
var metrics []*types.VolumeHealthMetrics
|
|
var topologyInfo *master_pb.TopologyInfo
|
|
|
|
glog.V(1).Infof("Collecting volume health metrics from master")
|
|
err := ms.adminClient.WithMasterClient(func(client master_pb.SeaweedClient) error {
|
|
|
|
resp, err := client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if resp.TopologyInfo == nil {
|
|
glog.Warningf("No topology info received from master")
|
|
return nil
|
|
}
|
|
|
|
volumeSizeLimitBytes := uint64(resp.VolumeSizeLimitMb) * 1024 * 1024 // Convert MB to bytes
|
|
|
|
// Track node counts for summary logging (avoid accumulating full ID slices)
|
|
var totalNodes, nodesWithVolumes, nodesWithoutVolumes int
|
|
|
|
for _, dc := range resp.TopologyInfo.DataCenterInfos {
|
|
glog.V(3).Infof("Processing datacenter: %s", dc.Id)
|
|
for _, rack := range dc.RackInfos {
|
|
glog.V(3).Infof("Processing rack: %s in datacenter: %s", rack.Id, dc.Id)
|
|
for _, node := range rack.DataNodeInfos {
|
|
totalNodes++
|
|
glog.V(3).Infof("Found volume server in topology: %s (disks: %d)", node.Id, len(node.DiskInfos))
|
|
|
|
hasVolumes := false
|
|
// Process each disk on this node
|
|
for diskType, diskInfo := range node.DiskInfos {
|
|
if len(diskInfo.VolumeInfos) > 0 {
|
|
hasVolumes = true
|
|
glog.V(3).Infof("Volume server %s disk %s has %d volumes", node.Id, diskType, len(diskInfo.VolumeInfos))
|
|
}
|
|
|
|
// Process volumes on this specific disk
|
|
for _, volInfo := range diskInfo.VolumeInfos {
|
|
metric := &types.VolumeHealthMetrics{
|
|
VolumeID: volInfo.Id,
|
|
Server: node.Id,
|
|
ServerAddress: node.Address,
|
|
DiskType: diskType, // Track which disk this volume is on
|
|
DiskId: volInfo.DiskId, // Use disk ID from volume info
|
|
DataCenter: dc.Id, // Data center from current loop
|
|
Rack: rack.Id, // Rack from current loop
|
|
Collection: volInfo.Collection,
|
|
Size: volInfo.Size,
|
|
DeletedBytes: volInfo.DeletedByteCount,
|
|
LastModified: time.Unix(int64(volInfo.ModifiedAtSecond), 0),
|
|
IsReadOnly: volInfo.ReadOnly,
|
|
IsECVolume: false, // Will be determined from volume structure
|
|
ReplicaCount: 1, // Will be counted
|
|
ExpectedReplicas: int(volInfo.ReplicaPlacement),
|
|
}
|
|
|
|
// Calculate derived metrics
|
|
if metric.Size > 0 {
|
|
metric.GarbageRatio = float64(metric.DeletedBytes) / float64(metric.Size)
|
|
// Calculate fullness ratio using actual volume size limit from master
|
|
metric.FullnessRatio = float64(metric.Size) / float64(volumeSizeLimitBytes)
|
|
}
|
|
metric.Age = time.Since(metric.LastModified)
|
|
|
|
glog.V(4).Infof("Volume %d on %s:%s (ID %d): size=%d, limit=%d, fullness=%.2f",
|
|
metric.VolumeID, metric.Server, metric.DiskType, metric.DiskId, metric.Size, volumeSizeLimitBytes, metric.FullnessRatio)
|
|
|
|
metrics = append(metrics, metric)
|
|
}
|
|
}
|
|
|
|
if hasVolumes {
|
|
nodesWithVolumes++
|
|
} else {
|
|
nodesWithoutVolumes++
|
|
glog.V(1).Infof("Volume server %s found in topology but has no volumes", node.Id)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
glog.Infof("Topology discovery: %d volume servers (%d with volumes, %d without)",
|
|
totalNodes, nodesWithVolumes, nodesWithoutVolumes)
|
|
|
|
// Return topology info as a local value (not retained on the scanner struct)
|
|
topologyInfo = resp.TopologyInfo
|
|
|
|
return nil
|
|
})
|
|
|
|
if err != nil {
|
|
glog.Errorf("Failed to get volume health metrics: %v", err)
|
|
return nil, nil, err
|
|
}
|
|
|
|
glog.V(1).Infof("Successfully collected metrics for %d actual volumes with disk ID information", len(metrics))
|
|
|
|
// Count actual replicas and identify EC volumes
|
|
ms.enrichVolumeMetrics(metrics)
|
|
|
|
return metrics, topologyInfo, nil
|
|
}
|
|
|
|
// enrichVolumeMetrics adds additional information like replica counts
|
|
func (ms *MaintenanceScanner) enrichVolumeMetrics(metrics []*types.VolumeHealthMetrics) {
|
|
// Group volumes by ID to count replicas
|
|
volumeGroups := make(map[uint32][]*types.VolumeHealthMetrics)
|
|
for _, metric := range metrics {
|
|
volumeGroups[metric.VolumeID] = append(volumeGroups[metric.VolumeID], metric)
|
|
}
|
|
|
|
// Update replica counts for actual volumes
|
|
for volumeID, replicas := range volumeGroups {
|
|
replicaCount := len(replicas)
|
|
for _, replica := range replicas {
|
|
replica.ReplicaCount = replicaCount
|
|
}
|
|
glog.V(4).Infof("Volume %d has %d replicas", volumeID, replicaCount)
|
|
}
|
|
|
|
// TODO: Identify EC volumes by checking volume structure
|
|
// This would require querying volume servers for EC shard information
|
|
}
|