fix: skip log files with deleted volumes in filer backup (#7692)

fix: skip log files with deleted volumes in filer backup (#3720)

When filer.backup or filer.meta.backup resumes after being stopped, it may
encounter persisted log files stored on volumes that have since been deleted
(via volume.deleteEmpty -force). Previously, this caused the backup to get
stuck in an infinite retry loop with 'volume X not found' errors.

This fix catches 'volume not found' errors when reading log files and skips
the problematic file instead of failing. The backup will now:
- Log a warning about the missing volume
- Skip the problematic log file
- Continue with the next log file, allowing progress

The VolumeNotFoundPattern regex was already defined but never used - this
change puts it to use.

Fixes #3720
This commit is contained in:
Chris Lu
2025-12-09 19:03:15 -08:00
committed by GitHub
parent 4f382b77c8
commit 1b13324fb7
3 changed files with 78 additions and 15 deletions

View File

@@ -152,9 +152,21 @@ func (f *Filer) logFlushFunc(logBuffer *log_buffer.LogBuffer, startTime, stopTim
}
var (
VolumeNotFoundPattern = regexp.MustCompile(`volume \d+? not found`)
volumeNotFoundPattern = regexp.MustCompile(`volume \d+? not found`)
chunkNotFoundPattern = regexp.MustCompile(`(urls not found|File Not Found)`)
)
// isChunkNotFoundError checks if the error indicates that a volume or chunk
// has been deleted and is no longer available. These errors can be skipped
// when reading persisted log files since the data is unrecoverable.
func isChunkNotFoundError(err error) bool {
if err == nil {
return false
}
errMsg := err.Error()
return volumeNotFoundPattern.MatchString(errMsg) || chunkNotFoundPattern.MatchString(errMsg)
}
func (f *Filer) ReadPersistedLogBuffer(startPosition log_buffer.MessagePosition, stopTsNs int64, eachLogEntryFn log_buffer.EachLogEntryFuncType) (lastTsNs int64, isDone bool, err error) {
visitor, visitErr := f.collectPersistedLogBuffer(startPosition, stopTsNs)