filer.sync: fix data races in ChunkTransferStatus

Add sync.RWMutex to ChunkTransferStatus and lock around all field
mutations in fetchAndWrite. ActiveTransfers now returns value copies
under RLock so callers get immutable snapshots.
This commit is contained in:
Chris Lu
2026-04-02 13:04:21 -07:00
parent b5cdd71600
commit 597d383ca4
2 changed files with 25 additions and 4 deletions

View File

@@ -290,8 +290,10 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk, path string,
fullData = data fullData = data
} }
transferStatus.mu.Lock()
transferStatus.BytesReceived = int64(len(fullData)) transferStatus.BytesReceived = int64(len(fullData))
transferStatus.Status = "uploading" transferStatus.Status = "uploading"
transferStatus.mu.Unlock()
currentFileId, uploadResult, uploadErr, _ := uploader.UploadWithRetry( currentFileId, uploadResult, uploadErr, _ := uploader.UploadWithRetry(
fs, fs,
@@ -335,15 +337,21 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk, path string,
glog.V(1).Infof("skip retrying stale source %s for %s: %v", sourceChunk.GetFileIdString(), path, retryErr) glog.V(1).Infof("skip retrying stale source %s for %s: %v", sourceChunk.GetFileIdString(), path, retryErr)
return false return false
} }
transferStatus.mu.Lock()
transferStatus.LastErr = retryErr.Error() transferStatus.LastErr = retryErr.Error()
transferStatus.mu.Unlock()
if isEofError(retryErr) { if isEofError(retryErr) {
eofBackoff = nextEofBackoff(eofBackoff) eofBackoff = nextEofBackoff(eofBackoff)
transferStatus.mu.Lock()
transferStatus.BytesReceived = int64(len(partialData)) transferStatus.BytesReceived = int64(len(partialData))
transferStatus.Status = fmt.Sprintf("waiting %v", eofBackoff) transferStatus.Status = fmt.Sprintf("waiting %v", eofBackoff)
transferStatus.mu.Unlock()
glog.V(0).Infof("source connection interrupted while replicating %s for %s (%d bytes received so far), backing off %v: %v", glog.V(0).Infof("source connection interrupted while replicating %s for %s (%d bytes received so far), backing off %v: %v",
sourceChunk.GetFileIdString(), path, len(partialData), eofBackoff, retryErr) sourceChunk.GetFileIdString(), path, len(partialData), eofBackoff, retryErr)
time.Sleep(eofBackoff) time.Sleep(eofBackoff)
transferStatus.mu.Lock()
transferStatus.Status = "downloading" transferStatus.Status = "downloading"
transferStatus.mu.Unlock()
} else { } else {
glog.V(0).Infof("replicate %s for %s: %v", sourceChunk.GetFileIdString(), path, retryErr) glog.V(0).Infof("replicate %s for %s: %v", sourceChunk.GetFileIdString(), path, retryErr)
} }

View File

@@ -22,7 +22,11 @@ import (
) )
// ChunkTransferStatus tracks the progress of a single chunk being replicated. // ChunkTransferStatus tracks the progress of a single chunk being replicated.
// Fields are guarded by mu: ChunkFileId and Path are immutable after creation,
// while BytesReceived, Status, and LastErr are updated by fetchAndWrite and
// read by ActiveTransfers.
type ChunkTransferStatus struct { type ChunkTransferStatus struct {
mu sync.RWMutex
ChunkFileId string ChunkFileId string
Path string Path string
BytesReceived int64 BytesReceived int64
@@ -112,11 +116,20 @@ func (fs *FilerSink) SetChunkConcurrency(concurrency int) {
} }
} }
// ActiveTransfers returns a snapshot of all in-progress chunk transfers. // ActiveTransfers returns an immutable snapshot of all in-progress chunk transfers.
func (fs *FilerSink) ActiveTransfers() []*ChunkTransferStatus { func (fs *FilerSink) ActiveTransfers() []ChunkTransferStatus {
var transfers []*ChunkTransferStatus var transfers []ChunkTransferStatus
fs.activeTransfers.Range(func(key, value any) bool { fs.activeTransfers.Range(func(key, value any) bool {
transfers = append(transfers, value.(*ChunkTransferStatus)) t := value.(*ChunkTransferStatus)
t.mu.RLock()
transfers = append(transfers, ChunkTransferStatus{
ChunkFileId: t.ChunkFileId,
Path: t.Path,
BytesReceived: t.BytesReceived,
Status: t.Status,
LastErr: t.LastErr,
})
t.mu.RUnlock()
return true return true
}) })
return transfers return transfers