shell: volume.check.disk adds retries in case the volumes are just moved

related to https://github.com/chrislusf/seaweedfs/issues/2194
This commit is contained in:
Chris Lu
2021-07-13 11:19:56 -07:00
parent 49c66e88a0
commit 6103649ffb

View File

@@ -89,25 +89,28 @@ func (c *commandVolumeCheckDisk) Do(args []string, commandEnv *CommandEnv, write
continue continue
} }
// reset index db aHasChanges, bHasChanges := true, true
aDB.Close() for aHasChanges || bHasChanges {
bDB.Close() // reset index db
aDB, bDB = needle_map.NewMemDb(), needle_map.NewMemDb() aDB.Close()
bDB.Close()
aDB, bDB = needle_map.NewMemDb(), needle_map.NewMemDb()
// read index db // read index db
if err := c.readIndexDatabase(aDB, a.info.Collection, a.info.Id, a.location.dataNode.Id, *verbose, writer); err != nil { if err := c.readIndexDatabase(aDB, a.info.Collection, a.info.Id, a.location.dataNode.Id, *verbose, writer); err != nil {
return err return err
} }
if err := c.readIndexDatabase(bDB, b.info.Collection, b.info.Id, b.location.dataNode.Id, *verbose, writer); err != nil { if err := c.readIndexDatabase(bDB, b.info.Collection, b.info.Id, b.location.dataNode.Id, *verbose, writer); err != nil {
return err return err
} }
// find and make up the differnces // find and make up the differences
if err := c.doVolumeCheckDisk(aDB, bDB, a, b, *verbose, writer, *applyChanges, *nonRepairThreshold); err != nil { if aHasChanges, err = c.doVolumeCheckDisk(aDB, bDB, a, b, *verbose, writer, *applyChanges, *nonRepairThreshold); err != nil {
return err return err
} }
if err := c.doVolumeCheckDisk(bDB, aDB, b, a, *verbose, writer, *applyChanges, *nonRepairThreshold); err != nil { if bHasChanges, err = c.doVolumeCheckDisk(bDB, aDB, b, a, *verbose, writer, *applyChanges, *nonRepairThreshold); err != nil {
return err return err
}
} }
replicas = replicas[1:] replicas = replicas[1:]
} }
@@ -116,7 +119,7 @@ func (c *commandVolumeCheckDisk) Do(args []string, commandEnv *CommandEnv, write
return nil return nil
} }
func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_map.MemDb, source, target *VolumeReplica, verbose bool, writer io.Writer, applyChanges bool, nonRepairThreshold float64) error { func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_map.MemDb, source, target *VolumeReplica, verbose bool, writer io.Writer, applyChanges bool, nonRepairThreshold float64) (hasChanges bool, err error) {
// find missing keys // find missing keys
// hash join, can be more efficient // hash join, can be more efficient
@@ -133,12 +136,12 @@ func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_m
fmt.Fprintf(writer, "volume %d %s has %d entries, %s missed %d entries\n", source.info.Id, source.location.dataNode.Id, counter, target.location.dataNode.Id, len(missingNeedles)) fmt.Fprintf(writer, "volume %d %s has %d entries, %s missed %d entries\n", source.info.Id, source.location.dataNode.Id, counter, target.location.dataNode.Id, len(missingNeedles))
if counter == 0 || len(missingNeedles) == 0 { if counter == 0 || len(missingNeedles) == 0 {
return nil return false, nil
} }
missingNeedlesFraction := float64(len(missingNeedles)) / float64(counter) missingNeedlesFraction := float64(len(missingNeedles)) / float64(counter)
if missingNeedlesFraction > nonRepairThreshold { if missingNeedlesFraction > nonRepairThreshold {
return fmt.Errorf( return false, fmt.Errorf(
"failed to start repair volume %d, percentage of missing keys is greater than the threshold: %.2f > %.2f", "failed to start repair volume %d, percentage of missing keys is greater than the threshold: %.2f > %.2f",
source.info.Id, missingNeedlesFraction, nonRepairThreshold) source.info.Id, missingNeedlesFraction, nonRepairThreshold)
} }
@@ -147,7 +150,7 @@ func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_m
needleBlob, err := c.readSourceNeedleBlob(source.location.dataNode.Id, source.info.Id, needleValue) needleBlob, err := c.readSourceNeedleBlob(source.location.dataNode.Id, source.info.Id, needleValue)
if err != nil { if err != nil {
return err return
} }
if !applyChanges { if !applyChanges {
@@ -158,13 +161,15 @@ func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_m
fmt.Fprintf(writer, "read %d,%x %s => %s \n", source.info.Id, needleValue.Key, source.location.dataNode.Id, target.location.dataNode.Id) fmt.Fprintf(writer, "read %d,%x %s => %s \n", source.info.Id, needleValue.Key, source.location.dataNode.Id, target.location.dataNode.Id)
} }
if err := c.writeNeedleBlobToTarget(target.location.dataNode.Id, source.info.Id, needleValue, needleBlob); err != nil { hasChanges = true
return err
if err = c.writeNeedleBlobToTarget(target.location.dataNode.Id, source.info.Id, needleValue, needleBlob); err != nil {
return
} }
} }
return nil return
} }
func (c *commandVolumeCheckDisk) readSourceNeedleBlob(sourceVolumeServer string, volumeId uint32, needleValue needle_map.NeedleValue) (needleBlob []byte, err error) { func (c *commandVolumeCheckDisk) readSourceNeedleBlob(sourceVolumeServer string, volumeId uint32, needleValue needle_map.NeedleValue) (needleBlob []byte, err error) {