* fix float stepping * do not auto refresh * only logs when non 200 status * fix maintenance task sorting and cleanup redundant handler logic * Refactor log retrieval to persist to disk and fix slowness - Move log retrieval to disk-based persistence in GetMaintenanceTaskDetail - Implement background log fetching on task completion in worker_grpc_server.go - Implement async background refresh for in-progress tasks - Completely remove blocking gRPC calls from the UI path to fix 10s timeouts - Cleanup debug logs and performance profiling code * Ensure consistent deterministic sorting in config_persistence cleanup * Replace magic numbers with constants and remove debug logs - Added descriptive constants for truncation limits and timeouts in admin_server.go and worker_grpc_server.go - Replaced magic numbers with these constants throughout the codebase - Verified removal of stdout debug printing - Ensured consistent truncation logic during log persistence * Address code review feedback on history truncation and logging logic - Fix AssignmentHistory double-serialization by copying task in GetMaintenanceTaskDetail - Fix handleTaskCompletion logging logic (mutually exclusive success/failure logs) - Remove unused Timeout field from LogRequestContext and sync select timeouts with constants - Ensure AssignmentHistory is only provided in the top-level field for better JSON structure * Implement goroutine leak protection and request deduplication - Add request deduplication in RequestTaskLogs to prevent multiple concurrent fetches for the same task - Implement safe cleanup in timeout handlers to avoid race conditions in pendingLogRequests map - Add a 10s cooldown for background log refreshes in GetMaintenanceTaskDetail to prevent spamming - Ensure all persistent log-fetching goroutines are bounded and efficiently managed * Fix potential nil pointer panics in maintenance handlers - Add nil checks for adminServer in ShowTaskDetail, ShowMaintenanceWorkers, and UpdateTaskConfig - Update getMaintenanceQueueData to return a descriptive error instead of nil when adminServer is uninitialized - Ensure internal helper methods consistently check for adminServer initialization before use * Strictly enforce disk-only log reading - Remove background log fetching from GetMaintenanceTaskDetail to prevent timeouts and network calls during page view - Remove unused lastLogFetch tracking fields to clean up dead code - Ensure logs are only updated upon task completion via handleTaskCompletion * Refactor GetWorkerLogs to read from disk - Update /api/maintenance/workers/:id/logs endpoint to use configPersistence.LoadTaskExecutionLogs - Remove synchronous gRPC call RequestTaskLogs to prevent timeouts and bad gateway errors - Ensure consistent log retrieval behavior across the application (disk-only) * Fix timestamp parsing in log viewer - Update task_detail.templ JS to handle both ISO 8601 strings and Unix timestamps - Fix "Invalid time value" error when displaying logs fetched from disk - Regenerate templates * master: fallback to HDD if SSD volumes are full in Assign * worker: improve EC detection logging and fix skip counters * worker: add Sync method to TaskLogger interface * worker: implement Sync and ensure logs are flushed before task completion * admin: improve task log retrieval with retries and better timeouts * admin: robust timestamp parsing in task detail view
154 lines
4.3 KiB
Go
154 lines
4.3 KiB
Go
package weed_server
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/stats"
|
|
|
|
"github.com/seaweedfs/raft"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/security"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/types"
|
|
"github.com/seaweedfs/seaweedfs/weed/topology"
|
|
)
|
|
|
|
func (ms *MasterServer) StreamAssign(server master_pb.Seaweed_StreamAssignServer) error {
|
|
for {
|
|
req, err := server.Recv()
|
|
if err != nil {
|
|
glog.Errorf("StreamAssign failed to receive: %v", err)
|
|
return err
|
|
}
|
|
resp, err := ms.Assign(context.Background(), req)
|
|
if err != nil {
|
|
glog.Errorf("StreamAssign failed to assign: %v", err)
|
|
return err
|
|
}
|
|
if err = server.Send(resp); err != nil {
|
|
glog.Errorf("StreamAssign failed to send: %v", err)
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
func (ms *MasterServer) Assign(ctx context.Context, req *master_pb.AssignRequest) (*master_pb.AssignResponse, error) {
|
|
|
|
if !ms.Topo.IsLeader() {
|
|
return nil, raft.NotLeaderError
|
|
}
|
|
|
|
if req.Count == 0 {
|
|
req.Count = 1
|
|
}
|
|
|
|
if req.Replication == "" {
|
|
req.Replication = ms.option.DefaultReplicaPlacement
|
|
}
|
|
replicaPlacement, err := super_block.NewReplicaPlacementFromString(req.Replication)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ttl, err := needle.ReadTTL(req.Ttl)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
diskType := types.ToDiskType(req.DiskType)
|
|
|
|
ver := needle.GetCurrentVersion()
|
|
option := &topology.VolumeGrowOption{
|
|
Collection: req.Collection,
|
|
ReplicaPlacement: replicaPlacement,
|
|
Ttl: ttl,
|
|
DiskType: diskType,
|
|
Preallocate: ms.preallocateSize,
|
|
DataCenter: req.DataCenter,
|
|
Rack: req.Rack,
|
|
DataNode: req.DataNode,
|
|
MemoryMapMaxSizeMb: req.MemoryMapMaxSizeMb,
|
|
Version: uint32(ver),
|
|
}
|
|
|
|
if !ms.Topo.DataCenterExists(option.DataCenter) {
|
|
return nil, fmt.Errorf("data center %v not found in topology", option.DataCenter)
|
|
}
|
|
|
|
vl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType)
|
|
if req.DiskType == "" {
|
|
if writable, _ := vl.GetWritableVolumeCount(); writable == 0 {
|
|
if hddVl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, types.ToDiskType(types.HddType)); hddVl != nil {
|
|
if writable, _ := hddVl.GetWritableVolumeCount(); writable > 0 {
|
|
option.DiskType = types.ToDiskType(types.HddType)
|
|
vl = hddVl
|
|
}
|
|
}
|
|
}
|
|
}
|
|
vl.SetLastGrowCount(req.WritableVolumeCount)
|
|
|
|
var (
|
|
lastErr error
|
|
maxTimeout = time.Second * 10
|
|
startTime = time.Now()
|
|
)
|
|
|
|
for time.Now().Sub(startTime) < maxTimeout {
|
|
fid, count, dnList, shouldGrow, err := ms.Topo.PickForWrite(req.Count, option, vl)
|
|
if shouldGrow && !vl.HasGrowRequest() && !ms.option.VolumeGrowthDisabled {
|
|
if err != nil && ms.Topo.AvailableSpaceFor(option) <= 0 {
|
|
err = fmt.Errorf("%s and no free volumes left for %s", err.Error(), option.String())
|
|
}
|
|
vl.AddGrowRequest()
|
|
ms.volumeGrowthRequestChan <- &topology.VolumeGrowRequest{
|
|
Option: option,
|
|
Count: req.WritableVolumeCount,
|
|
Reason: "grpc assign",
|
|
}
|
|
}
|
|
if err != nil {
|
|
glog.V(1).Infof("assign %v %v: %v", req, option.String(), err)
|
|
stats.MasterPickForWriteErrorCounter.Inc()
|
|
lastErr = err
|
|
if (req.DataCenter != "" || req.Rack != "") && strings.Contains(err.Error(), topology.NoWritableVolumes) {
|
|
break
|
|
}
|
|
time.Sleep(200 * time.Millisecond)
|
|
continue
|
|
}
|
|
dn := dnList.Head()
|
|
if dn == nil {
|
|
continue
|
|
}
|
|
var replicas []*master_pb.Location
|
|
for _, r := range dnList.Rest() {
|
|
replicas = append(replicas, &master_pb.Location{
|
|
Url: r.Url(),
|
|
PublicUrl: r.PublicUrl,
|
|
GrpcPort: uint32(r.GrpcPort),
|
|
DataCenter: r.GetDataCenterId(),
|
|
})
|
|
}
|
|
return &master_pb.AssignResponse{
|
|
Fid: fid,
|
|
Location: &master_pb.Location{
|
|
Url: dn.Url(),
|
|
PublicUrl: dn.PublicUrl,
|
|
GrpcPort: uint32(dn.GrpcPort),
|
|
DataCenter: dn.GetDataCenterId(),
|
|
},
|
|
Count: count,
|
|
Auth: string(security.GenJwtForVolumeServer(ms.guard.SigningKey, ms.guard.ExpiresAfterSec, fid)),
|
|
Replicas: replicas,
|
|
}, nil
|
|
}
|
|
if lastErr != nil {
|
|
glog.V(0).Infof("assign %v %v: %v", req, option.String(), lastErr)
|
|
}
|
|
return nil, lastErr
|
|
}
|