shell: volume.check.disk adds retries in case the volumes are just moved
related to https://github.com/chrislusf/seaweedfs/issues/2194
This commit is contained in:
@@ -89,25 +89,28 @@ func (c *commandVolumeCheckDisk) Do(args []string, commandEnv *CommandEnv, write
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset index db
|
aHasChanges, bHasChanges := true, true
|
||||||
aDB.Close()
|
for aHasChanges || bHasChanges {
|
||||||
bDB.Close()
|
// reset index db
|
||||||
aDB, bDB = needle_map.NewMemDb(), needle_map.NewMemDb()
|
aDB.Close()
|
||||||
|
bDB.Close()
|
||||||
|
aDB, bDB = needle_map.NewMemDb(), needle_map.NewMemDb()
|
||||||
|
|
||||||
// read index db
|
// read index db
|
||||||
if err := c.readIndexDatabase(aDB, a.info.Collection, a.info.Id, a.location.dataNode.Id, *verbose, writer); err != nil {
|
if err := c.readIndexDatabase(aDB, a.info.Collection, a.info.Id, a.location.dataNode.Id, *verbose, writer); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := c.readIndexDatabase(bDB, b.info.Collection, b.info.Id, b.location.dataNode.Id, *verbose, writer); err != nil {
|
if err := c.readIndexDatabase(bDB, b.info.Collection, b.info.Id, b.location.dataNode.Id, *verbose, writer); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// find and make up the differnces
|
// find and make up the differences
|
||||||
if err := c.doVolumeCheckDisk(aDB, bDB, a, b, *verbose, writer, *applyChanges, *nonRepairThreshold); err != nil {
|
if aHasChanges, err = c.doVolumeCheckDisk(aDB, bDB, a, b, *verbose, writer, *applyChanges, *nonRepairThreshold); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := c.doVolumeCheckDisk(bDB, aDB, b, a, *verbose, writer, *applyChanges, *nonRepairThreshold); err != nil {
|
if bHasChanges, err = c.doVolumeCheckDisk(bDB, aDB, b, a, *verbose, writer, *applyChanges, *nonRepairThreshold); err != nil {
|
||||||
return err
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
replicas = replicas[1:]
|
replicas = replicas[1:]
|
||||||
}
|
}
|
||||||
@@ -116,7 +119,7 @@ func (c *commandVolumeCheckDisk) Do(args []string, commandEnv *CommandEnv, write
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_map.MemDb, source, target *VolumeReplica, verbose bool, writer io.Writer, applyChanges bool, nonRepairThreshold float64) error {
|
func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_map.MemDb, source, target *VolumeReplica, verbose bool, writer io.Writer, applyChanges bool, nonRepairThreshold float64) (hasChanges bool, err error) {
|
||||||
|
|
||||||
// find missing keys
|
// find missing keys
|
||||||
// hash join, can be more efficient
|
// hash join, can be more efficient
|
||||||
@@ -133,12 +136,12 @@ func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_m
|
|||||||
fmt.Fprintf(writer, "volume %d %s has %d entries, %s missed %d entries\n", source.info.Id, source.location.dataNode.Id, counter, target.location.dataNode.Id, len(missingNeedles))
|
fmt.Fprintf(writer, "volume %d %s has %d entries, %s missed %d entries\n", source.info.Id, source.location.dataNode.Id, counter, target.location.dataNode.Id, len(missingNeedles))
|
||||||
|
|
||||||
if counter == 0 || len(missingNeedles) == 0 {
|
if counter == 0 || len(missingNeedles) == 0 {
|
||||||
return nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
missingNeedlesFraction := float64(len(missingNeedles)) / float64(counter)
|
missingNeedlesFraction := float64(len(missingNeedles)) / float64(counter)
|
||||||
if missingNeedlesFraction > nonRepairThreshold {
|
if missingNeedlesFraction > nonRepairThreshold {
|
||||||
return fmt.Errorf(
|
return false, fmt.Errorf(
|
||||||
"failed to start repair volume %d, percentage of missing keys is greater than the threshold: %.2f > %.2f",
|
"failed to start repair volume %d, percentage of missing keys is greater than the threshold: %.2f > %.2f",
|
||||||
source.info.Id, missingNeedlesFraction, nonRepairThreshold)
|
source.info.Id, missingNeedlesFraction, nonRepairThreshold)
|
||||||
}
|
}
|
||||||
@@ -147,7 +150,7 @@ func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_m
|
|||||||
|
|
||||||
needleBlob, err := c.readSourceNeedleBlob(source.location.dataNode.Id, source.info.Id, needleValue)
|
needleBlob, err := c.readSourceNeedleBlob(source.location.dataNode.Id, source.info.Id, needleValue)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if !applyChanges {
|
if !applyChanges {
|
||||||
@@ -158,13 +161,15 @@ func (c *commandVolumeCheckDisk) doVolumeCheckDisk(subtrahend, minuend *needle_m
|
|||||||
fmt.Fprintf(writer, "read %d,%x %s => %s \n", source.info.Id, needleValue.Key, source.location.dataNode.Id, target.location.dataNode.Id)
|
fmt.Fprintf(writer, "read %d,%x %s => %s \n", source.info.Id, needleValue.Key, source.location.dataNode.Id, target.location.dataNode.Id)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := c.writeNeedleBlobToTarget(target.location.dataNode.Id, source.info.Id, needleValue, needleBlob); err != nil {
|
hasChanges = true
|
||||||
return err
|
|
||||||
|
if err = c.writeNeedleBlobToTarget(target.location.dataNode.Id, source.info.Id, needleValue, needleBlob); err != nil {
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *commandVolumeCheckDisk) readSourceNeedleBlob(sourceVolumeServer string, volumeId uint32, needleValue needle_map.NeedleValue) (needleBlob []byte, err error) {
|
func (c *commandVolumeCheckDisk) readSourceNeedleBlob(sourceVolumeServer string, volumeId uint32, needleValue needle_map.NeedleValue) (needleBlob []byte, err error) {
|
||||||
|
|||||||
Reference in New Issue
Block a user