filer.sync: include last error in stall diagnostics

This commit is contained in:
Chris Lu
2026-04-02 12:18:56 -07:00
parent 2d4ea8c665
commit b5cdd71600
3 changed files with 9 additions and 2 deletions

View File

@@ -375,10 +375,15 @@ func doSubscribeFilerMetaChanges(clientId int32, clientEpoch int32, sourceGrpcDi
lastLogTsNs = now
if offsetTsNs == lastProgressedTsNs {
for _, t := range filerSink.ActiveTransfers() {
if t.LastErr != "" {
glog.V(0).Infof(" %s %s: %d bytes received, %s, last error: %s",
t.ChunkFileId, t.Path, t.BytesReceived, t.Status, t.LastErr)
} else {
glog.V(0).Infof(" %s %s: %d bytes received, %s",
t.ChunkFileId, t.Path, t.BytesReceived, t.Status)
}
}
}
lastProgressedTsNs = offsetTsNs
// collect synchronous offset
statsCollect.FilerSyncOffsetGauge.WithLabelValues(sourceFiler.String(), targetFiler.String(), clientName, sourcePath).Set(float64(offsetTsNs))

View File

@@ -335,6 +335,7 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk, path string,
glog.V(1).Infof("skip retrying stale source %s for %s: %v", sourceChunk.GetFileIdString(), path, retryErr)
return false
}
transferStatus.LastErr = retryErr.Error()
if isEofError(retryErr) {
eofBackoff = nextEofBackoff(eofBackoff)
transferStatus.BytesReceived = int64(len(partialData))

View File

@@ -27,6 +27,7 @@ type ChunkTransferStatus struct {
Path string
BytesReceived int64
Status string // "downloading", "uploading", or "waiting 10s" etc.
LastErr string
}
type FilerSink struct {