Fix disk errors handling in vacuum compaction (#8244)

When a disk reports IO errors during vacuum compaction (e.g., 'read /mnt/d1/weed/oc_xyz.dat: input/output error'), the vacuum task should signal the error to the master so it can:
1. Drop the faulty volume replica
2. Rebuild the replica from healthy copies

Changes:
- Add checkReadWriteError() calls in vacuum read paths (ReadNeedleBlob, ReadData, ScanVolumeFile) to flag EIO errors in volume.lastIoError
- Preserve error wrapping using %w format instead of %v so EIO propagates correctly
- The existing heartbeat logic will detect lastIoError and remove the bad volume

Fixes issue #8237
This commit is contained in:
Chris Lu
2026-02-07 21:33:02 -08:00
committed by GitHub
parent e6ee293c17
commit 330ba7d9dc
2 changed files with 8 additions and 5 deletions

View File

@@ -219,10 +219,10 @@ func ScanVolumeFile(dirname string, collection string, id needle.VolumeId,
volumeFileScanner VolumeFileScanner) (err error) {
var v *Volume
if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind, needle.GetCurrentVersion()); err != nil {
return fmt.Errorf("failed to load volume %d: %v", id, err)
return fmt.Errorf("failed to load volume %d: %w", id, err)
}
if err = volumeFileScanner.VisitSuperBlock(v.SuperBlock); err != nil {
return fmt.Errorf("failed to process volume %d super block: %v", id, err)
return fmt.Errorf("failed to process volume %d super block: %w", id, err)
}
defer v.Close()
@@ -239,7 +239,7 @@ func ScanVolumeFileFrom(version needle.Version, datBackend backend.BackendStorag
if e == io.EOF {
return nil
}
return fmt.Errorf("cannot read %s at offset %d: %v", datBackend.Name(), offset, e)
return fmt.Errorf("cannot read %s at offset %d: %w", datBackend.Name(), offset, e)
}
for n != nil {
var needleBody []byte