volume.fsck: increase default cutoffTimeAgo from 5 minutes to 5 hours (#7730)

* volume.fsck: increase default cutoffTimeAgo from 5 minutes to 5 hours

This change makes the fsck check more conservative by only considering
chunks older than 5 hours as potential orphans. A 5 minute window was
too aggressive and could incorrectly flag recently written chunks,
especially in busy systems or during backup operations.

Addresses #7649

* Update command_volume_fsck.go

* volume.fsck: add help text explaining cutoffTimeAgo parameter

* Update command_volume_fsck.go
This commit is contained in:
Chris Lu
2025-12-12 23:42:27 -08:00
committed by GitHub
parent 72853a3bbf
commit 93cca3a96b

View File

@@ -76,6 +76,12 @@ func (c *commandVolumeFsck) Help() string {
2. collect all file ids from the filer, as set B
3. find out the set B subtract A
-cutoffTimeAgo is used to only check chunks older than the cutoff time.
This is important because:
Chunks are uploaded to volume servers before metadata is committed to filer.
A newly uploaded chunk may appear as orphan if metadata commit is still pending.
The default 5h cutoff provides sufficient buffer for metadata commits.
`
}
@@ -94,7 +100,7 @@ func (c *commandVolumeFsck) Do(args []string, commandEnv *CommandEnv, writer io.
c.forcePurging = fsckCommand.Bool("forcePurging", false, "delete missing data from volumes in one replica used together with applyPurging")
purgeAbsent := fsckCommand.Bool("reallyDeleteFilerEntries", false, "<expert only!> delete missing file entries from filer if the corresponding volume is missing for any reason, please ensure all still existing/expected volumes are connected! used together with findMissingChunksInFiler")
tempPath := fsckCommand.String("tempPath", path.Join(os.TempDir()), "path for temporary idx files")
cutoffTimeAgo := fsckCommand.Duration("cutoffTimeAgo", 5*time.Minute, "only include entries on volume servers before this cutoff time to check orphan chunks")
cutoffTimeAgo := fsckCommand.Duration("cutoffTimeAgo", 5*time.Hour, "only include entries on volume servers before this cutoff time to check orphan chunks")
modifyTimeAgo := fsckCommand.Duration("modifyTimeAgo", 0, "only include entries after this modify time to check orphan chunks")
c.verifyNeedle = fsckCommand.Bool("verifyNeedles", false, "check needles status from volume server")