filer.sync: show active chunk transfers when sync progress stalls
When the sync watermark is not advancing, print each in-progress chunk transfer with its file path, bytes received so far, and current status (downloading, uploading, or waiting with backoff duration). This helps diagnose which files are blocking progress during replication. Closes #8542
This commit is contained in:
@@ -241,6 +241,14 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk, path string,
|
||||
return "", fmt.Errorf("upload data: %w", err)
|
||||
}
|
||||
|
||||
transferStatus := &ChunkTransferStatus{
|
||||
ChunkFileId: sourceChunk.GetFileIdString(),
|
||||
Path: path,
|
||||
Status: "downloading",
|
||||
}
|
||||
fs.activeTransfers.Store(sourceChunk.GetFileIdString(), transferStatus)
|
||||
defer fs.activeTransfers.Delete(sourceChunk.GetFileIdString())
|
||||
|
||||
eofBackoff := time.Duration(0)
|
||||
var partialData []byte
|
||||
var savedFilename string
|
||||
@@ -282,6 +290,9 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk, path string,
|
||||
fullData = data
|
||||
}
|
||||
|
||||
transferStatus.BytesReceived = int64(len(fullData))
|
||||
transferStatus.Status = "uploading"
|
||||
|
||||
currentFileId, uploadResult, uploadErr, _ := uploader.UploadWithRetry(
|
||||
fs,
|
||||
&filer_pb.AssignVolumeRequest{
|
||||
@@ -326,9 +337,12 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk, path string,
|
||||
}
|
||||
if isEofError(retryErr) {
|
||||
eofBackoff = nextEofBackoff(eofBackoff)
|
||||
transferStatus.BytesReceived = int64(len(partialData))
|
||||
transferStatus.Status = fmt.Sprintf("waiting %v", eofBackoff)
|
||||
glog.V(0).Infof("source connection interrupted while replicating %s for %s (%d bytes received so far), backing off %v: %v",
|
||||
sourceChunk.GetFileIdString(), path, len(partialData), eofBackoff, retryErr)
|
||||
time.Sleep(eofBackoff)
|
||||
transferStatus.Status = "downloading"
|
||||
} else {
|
||||
glog.V(0).Infof("replicate %s for %s: %v", sourceChunk.GetFileIdString(), path, retryErr)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user