Fix handling of fixed read-only volumes for volume.check.disk. (#7612)
There's unfortunatley no way to tell whether a volume is flagged read-only because it got full, or because it is faulty. To address this, modify the check logic so all read-only volumes are processed; if no changes are written (i.e. the volume is healthy) it is kept as read-only. Volumes which are modified in this process are deemed fixed, and switched to writable.
This commit is contained in:
@@ -64,9 +64,9 @@ func (c *commandVolumeCheckDisk) Help() string {
|
|||||||
append entries in B and not in A to A
|
append entries in B and not in A to A
|
||||||
|
|
||||||
optionally, for each non-writable volume replica A
|
optionally, for each non-writable volume replica A
|
||||||
if volume is not full
|
select a writable volume replica B
|
||||||
|
if entries in A don't match B
|
||||||
prune late volume entries not matching its index file
|
prune late volume entries not matching its index file
|
||||||
select a writable volume replica B
|
|
||||||
append missing entries from B into A
|
append missing entries from B into A
|
||||||
mark the volume as writable (healthy)
|
mark the volume as writable (healthy)
|
||||||
|
|
||||||
@@ -179,9 +179,16 @@ func (vcd *volumeCheckDisk) checkWritableVolumes(volumeReplicas map[uint32][]*Vo
|
|||||||
writableReplicas = append(writableReplicas[:1], writableReplicas[2:]...)
|
writableReplicas = append(writableReplicas[:1], writableReplicas[2:]...)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err := vcd.syncTwoReplicas(a, b, true); err != nil {
|
|
||||||
vcd.write("sync volume %d on %s and %s: %v", a.info.Id, a.location.dataNode.Id, b.location.dataNode.Id, err)
|
modified, err := vcd.syncTwoReplicas(a, b, true)
|
||||||
|
if err != nil {
|
||||||
|
vcd.write("failed to sync volumes %d on %s and %s: %v", a.info.Id, a.location.dataNode.Id, b.location.dataNode.Id, err)
|
||||||
|
} else {
|
||||||
|
if modified {
|
||||||
|
vcd.write("synced %s and %s for volume %d", a.location.dataNode.Id, b.location.dataNode.Id, a.info.Id)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// always choose the larger volume to be the source
|
// always choose the larger volume to be the source
|
||||||
if a.info.FileCount > b.info.FileCount {
|
if a.info.FileCount > b.info.FileCount {
|
||||||
writableReplicas = append(writableReplicas[:1], writableReplicas[2:]...)
|
writableReplicas = append(writableReplicas[:1], writableReplicas[2:]...)
|
||||||
@@ -280,19 +287,25 @@ func (vcd *volumeCheckDisk) checkReadOnlyVolumes(volumeReplicas map[uint32][]*Vo
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// ...fix it...
|
// ...try to fix it...
|
||||||
// TODO: test whether syncTwoReplicas() is enough to prune garbage entries on broken volumes.
|
// TODO: test whether syncTwoReplicas() is enough to prune garbage entries on broken volumes...
|
||||||
if err := vcd.syncTwoReplicas(source, r, false); err != nil {
|
modified, err := vcd.syncTwoReplicas(source, r, false)
|
||||||
vcd.write("sync read-only volume %d on %s from %s: %v\n", vid, r.location.dataNode.Id, source.location.dataNode.Id, err)
|
if err != nil {
|
||||||
|
vcd.write("sync read-only volume %d on %s from %s: %v", vid, r.location.dataNode.Id, source.location.dataNode.Id, err)
|
||||||
|
|
||||||
// ...or revert it back to read-only, if something went wrong.
|
|
||||||
// TODO: we should keep unchanged volumes as read-only, so we don't modify valid volumes which are full.
|
|
||||||
if roErr := vcd.makeVolumeReadonly(vid, r); roErr != nil {
|
if roErr := vcd.makeVolumeReadonly(vid, r); roErr != nil {
|
||||||
return fmt.Errorf("failed to make volume %d on %s readonly after: %v: %v", vid, r.location.dataNode.Id, err, roErr)
|
return fmt.Errorf("failed to revert volume %d on %s to readonly after: %v: %v", vid, r.location.dataNode.Id, err, roErr)
|
||||||
}
|
}
|
||||||
vcd.write("volume %d on %s is now read-only\n", vid, r.location.dataNode.Id)
|
|
||||||
|
|
||||||
return err
|
return err
|
||||||
|
} else {
|
||||||
|
if modified {
|
||||||
|
vcd.write("volume %d on %s is now synced to %d and writable", vid, r.location.dataNode.Id, source.location.dataNode.Id)
|
||||||
|
} else {
|
||||||
|
// ...or restore back to read-only, if no changes were made.
|
||||||
|
if err := vcd.makeVolumeReadonly(vid, r); err != nil {
|
||||||
|
return fmt.Errorf("failed to revert volume %d on %s to readonly: %v", vid, r.location.dataNode.Id, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@@ -411,35 +424,39 @@ func (vcd *volumeCheckDisk) shouldSkipVolume(a, b *VolumeReplica) (bool, error)
|
|||||||
|
|
||||||
// syncTwoReplicas attempts to sync all entries from a source volume replica into a target. If bi-directional mode
|
// syncTwoReplicas attempts to sync all entries from a source volume replica into a target. If bi-directional mode
|
||||||
// is enabled, changes from target are also synced back into the source.
|
// is enabled, changes from target are also synced back into the source.
|
||||||
func (vcd *volumeCheckDisk) syncTwoReplicas(source, target *VolumeReplica, bidi bool) (err error) {
|
// Returns true if source and/or target were modified, false otherwise.
|
||||||
|
func (vcd *volumeCheckDisk) syncTwoReplicas(source, target *VolumeReplica, bidi bool) (modified bool, err error) {
|
||||||
sourceHasChanges, targetHasChanges := true, true
|
sourceHasChanges, targetHasChanges := true, true
|
||||||
const maxIterations = 5
|
const maxIterations = 5
|
||||||
iteration := 0
|
iteration := 0
|
||||||
|
|
||||||
|
modified = false
|
||||||
|
|
||||||
for (sourceHasChanges || targetHasChanges) && iteration < maxIterations {
|
for (sourceHasChanges || targetHasChanges) && iteration < maxIterations {
|
||||||
iteration++
|
iteration++
|
||||||
vcd.writeVerbose("sync iteration %d/%d for volume %d", iteration, maxIterations, source.info.Id)
|
vcd.writeVerbose("sync iteration %d/%d for volume %d", iteration, maxIterations, source.info.Id)
|
||||||
|
|
||||||
prevSourceHasChanges, prevTargetHasChanges := sourceHasChanges, targetHasChanges
|
prevSourceHasChanges, prevTargetHasChanges := sourceHasChanges, targetHasChanges
|
||||||
if sourceHasChanges, targetHasChanges, err = vcd.checkBoth(source, target, bidi); err != nil {
|
if sourceHasChanges, targetHasChanges, err = vcd.checkBoth(source, target, bidi); err != nil {
|
||||||
return err
|
return modified, err
|
||||||
}
|
}
|
||||||
|
modified = modified || sourceHasChanges || targetHasChanges
|
||||||
|
|
||||||
// Detect if we're stuck in a loop with no progress
|
// Detect if we're stuck in a loop with no progress
|
||||||
if iteration > 1 && prevSourceHasChanges == sourceHasChanges && prevTargetHasChanges == targetHasChanges && (sourceHasChanges || targetHasChanges) {
|
if iteration > 1 && prevSourceHasChanges == sourceHasChanges && prevTargetHasChanges == targetHasChanges && (sourceHasChanges || targetHasChanges) {
|
||||||
vcd.write("volume %d sync is not making progress between %s and %s after iteration %d, stopping to prevent infinite loop",
|
vcd.write("volume %d sync is not making progress between %s and %s after iteration %d, stopping to prevent infinite loop",
|
||||||
source.info.Id, source.location.dataNode.Id, target.location.dataNode.Id, iteration)
|
source.info.Id, source.location.dataNode.Id, target.location.dataNode.Id, iteration)
|
||||||
return fmt.Errorf("sync not making progress after %d iterations", iteration)
|
return modified, fmt.Errorf("sync not making progress after %d iterations", iteration)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if iteration >= maxIterations && (sourceHasChanges || targetHasChanges) {
|
if iteration >= maxIterations && (sourceHasChanges || targetHasChanges) {
|
||||||
vcd.write("volume %d sync reached maximum iterations (%d) between %s and %s, may need manual intervention",
|
vcd.write("volume %d sync reached maximum iterations (%d) between %s and %s, may need manual intervention",
|
||||||
source.info.Id, maxIterations, source.location.dataNode.Id, target.location.dataNode.Id)
|
source.info.Id, maxIterations, source.location.dataNode.Id, target.location.dataNode.Id)
|
||||||
return fmt.Errorf("reached maximum sync iterations (%d)", maxIterations)
|
return modified, fmt.Errorf("reached maximum sync iterations (%d)", maxIterations)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return modified, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkBoth performs a sync between source and target volume replicas. If bi-directional mode is enabled, changes from target are also synced back into the source.
|
// checkBoth performs a sync between source and target volume replicas. If bi-directional mode is enabled, changes from target are also synced back into the source.
|
||||||
@@ -628,7 +645,7 @@ func (vcd *volumeCheckDisk) copyVolumeIndexFile(collection string, volumeId uint
|
|||||||
|
|
||||||
copyFileClient, err := volumeServerClient.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{
|
copyFileClient, err := volumeServerClient.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{
|
||||||
VolumeId: volumeId,
|
VolumeId: volumeId,
|
||||||
Ext: ".idx",
|
Ext: ext,
|
||||||
CompactionRevision: math.MaxUint32,
|
CompactionRevision: math.MaxUint32,
|
||||||
StopOffset: math.MaxInt64,
|
StopOffset: math.MaxInt64,
|
||||||
Collection: collection,
|
Collection: collection,
|
||||||
|
|||||||
Reference in New Issue
Block a user