filer.sync: show active chunk transfers when sync progress stalls (#8889)
* filer.sync: show active chunk transfers when sync progress stalls When the sync watermark is not advancing, print each in-progress chunk transfer with its file path, bytes received so far, and current status (downloading, uploading, or waiting with backoff duration). This helps diagnose which files are blocking progress during replication. Closes #8542 * filer.sync: include last error in stall diagnostics * filer.sync: fix data races in ChunkTransferStatus Add sync.RWMutex to ChunkTransferStatus and lock around all field mutations in fetchAndWrite. ActiveTransfers now returns value copies under RLock so callers get immutable snapshots.
This commit is contained in:
@@ -359,6 +359,7 @@ func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, sourceGrpcDi
|
||||
}
|
||||
|
||||
var lastLogTsNs = time.Now().UnixNano()
|
||||
var lastProgressedTsNs int64
|
||||
var clientName = fmt.Sprintf("syncFrom_%s_To_%s", string(sourceFiler), string(targetFiler))
|
||||
processEventFnWithOffset := pb.AddOffsetFunc(func(resp *filer_pb.SubscribeMetadataResponse) error {
|
||||
processor.AddSyncJob(resp)
|
||||
@@ -372,6 +373,18 @@ func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, sourceGrpcDi
|
||||
now := time.Now().UnixNano()
|
||||
glog.V(0).Infof("sync %s to %s progressed to %v %0.2f/sec", sourceFiler, targetFiler, time.Unix(0, offsetTsNs), float64(counter)/(float64(now-lastLogTsNs)/1e9))
|
||||
lastLogTsNs = now
|
||||
if offsetTsNs == lastProgressedTsNs {
|
||||
for _, t := range filerSink.ActiveTransfers() {
|
||||
if t.LastErr != "" {
|
||||
glog.V(0).Infof(" %s %s: %d bytes received, %s, last error: %s",
|
||||
t.ChunkFileId, t.Path, t.BytesReceived, t.Status, t.LastErr)
|
||||
} else {
|
||||
glog.V(0).Infof(" %s %s: %d bytes received, %s",
|
||||
t.ChunkFileId, t.Path, t.BytesReceived, t.Status)
|
||||
}
|
||||
}
|
||||
}
|
||||
lastProgressedTsNs = offsetTsNs
|
||||
// collect synchronous offset
|
||||
statsCollect.FilerSyncOffsetGauge.WithLabelValues(sourceFiler.String(), targetFiler.String(), clientName, sourcePath).Set(float64(offsetTsNs))
|
||||
return setOffset(targetGrpcDialOption, targetFiler, getSignaturePrefixByPath(sourcePath), sourceFilerSignature, offsetTsNs)
|
||||
|
||||
Reference in New Issue
Block a user