Give the ScrubVolume() RPC an option to flag found broken volumes as read-only. (#8360)

* Give the `ScrubVolume()` RPC an option to flag found broken volumes as read-only.

Also exposes this option in the shell `volume.scrub` command.

* Remove redundant test in `TestVolumeMarkReadonlyWritableErrorPaths`.

417051bb slightly rearranges the logic for `VolumeMarkReadonly()` and `VolumeMarkWritable()`,
so calling them for invalid volume IDs will actually yield that error, instead of checking
maintnenance mode first.
This commit is contained in:
Lisandro Pin
2026-03-26 18:20:57 +01:00
committed by GitHub
parent 6cf34f2376
commit e5cf2d2a19
7 changed files with 152 additions and 116 deletions

View File

@@ -162,44 +162,56 @@ func (vs *VolumeServer) VolumeConfigure(ctx context.Context, req *volume_server_
}
func (vs *VolumeServer) VolumeMarkReadonly(ctx context.Context, req *volume_server_pb.VolumeMarkReadonlyRequest) (*volume_server_pb.VolumeMarkReadonlyResponse, error) {
resp := &volume_server_pb.VolumeMarkReadonlyResponse{}
func (vs *VolumeServer) makeVolumeReadonly(ctx context.Context, v *storage.Volume, persist bool) error {
if err := vs.CheckMaintenanceMode(); err != nil {
return resp, err
}
v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
if v == nil {
return nil, fmt.Errorf("volume %d not found", req.VolumeId)
return err
}
// step 1: stop master from redirecting traffic here
if err := vs.notifyMasterVolumeReadonly(v, true); err != nil {
return resp, err
if err := vs.notifyMasterVolumeReadonly(ctx, v, true); err != nil {
return err
}
// rare case 1.5: it will be unlucky if heartbeat happened between step 1 and 2.
// step 2: mark local volume as readonly
err := vs.store.MarkVolumeReadonly(needle.VolumeId(req.VolumeId), req.GetPersist())
if err != nil {
glog.Errorf("volume mark readonly %v: %v", req, err)
if err := vs.store.MarkVolumeReadonly(v.Id, persist); err != nil {
glog.Errorf("mark volume %d readonly: %v", v.Id, err)
return err
} else {
glog.V(2).Infof("volume mark readonly %v", req)
glog.V(2).Infof("volume %d marked readonly", v.Id)
}
// step 3: tell master from redirecting traffic here again, to prevent rare case 1.5
if err := vs.notifyMasterVolumeReadonly(v, true); err != nil {
return resp, err
if err := vs.notifyMasterVolumeReadonly(ctx, v, true); err != nil {
return err
}
return resp, err
return nil
}
func (vs *VolumeServer) notifyMasterVolumeReadonly(v *storage.Volume, isReadOnly bool) error {
if grpcErr := pb.WithMasterClient(false, vs.GetMaster(context.Background()), vs.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
func (vs *VolumeServer) makeVolumeWritable(ctx context.Context, v *storage.Volume) error {
if err := vs.CheckMaintenanceMode(); err != nil {
return err
}
if err := vs.store.MarkVolumeWritable(v.Id); err != nil {
glog.Errorf("mark volume %d writable: %v", v.Id, err)
return err
} else {
glog.V(2).Infof("volume %d marked writable", v.Id)
}
// enable master to redirect traffic here
if err := vs.notifyMasterVolumeReadonly(ctx, v, false); err != nil {
return err
}
return nil
}
func (vs *VolumeServer) notifyMasterVolumeReadonly(ctx context.Context, v *storage.Volume, isReadOnly bool) error {
if grpcErr := pb.WithMasterClient(false, vs.GetMaster(ctx), vs.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
_, err := client.VolumeMarkReadonly(context.Background(), &master_pb.VolumeMarkReadonlyRequest{
Ip: vs.store.Ip,
Port: uint32(vs.store.Port),
@@ -221,32 +233,34 @@ func (vs *VolumeServer) notifyMasterVolumeReadonly(v *storage.Volume, isReadOnly
return nil
}
func (vs *VolumeServer) VolumeMarkWritable(ctx context.Context, req *volume_server_pb.VolumeMarkWritableRequest) (*volume_server_pb.VolumeMarkWritableResponse, error) {
resp := &volume_server_pb.VolumeMarkWritableResponse{}
if err := vs.CheckMaintenanceMode(); err != nil {
return resp, err
}
func (vs *VolumeServer) VolumeMarkReadonly(ctx context.Context, req *volume_server_pb.VolumeMarkReadonlyRequest) (*volume_server_pb.VolumeMarkReadonlyResponse, error) {
resp := &volume_server_pb.VolumeMarkReadonlyResponse{}
v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
if v == nil {
return nil, fmt.Errorf("volume %d not found", req.VolumeId)
return resp, fmt.Errorf("volume %d not found", req.VolumeId)
}
err := vs.store.MarkVolumeWritable(needle.VolumeId(req.VolumeId))
if err != nil {
glog.Errorf("volume mark writable %v: %v", req, err)
} else {
glog.V(2).Infof("volume mark writable %v", req)
}
// enable master to redirect traffic here
if err := vs.notifyMasterVolumeReadonly(v, false); err != nil {
if err := vs.makeVolumeReadonly(ctx, v, req.GetPersist()); err != nil {
return resp, err
}
return resp, err
return resp, nil
}
func (vs *VolumeServer) VolumeMarkWritable(ctx context.Context, req *volume_server_pb.VolumeMarkWritableRequest) (*volume_server_pb.VolumeMarkWritableResponse, error) {
resp := &volume_server_pb.VolumeMarkWritableResponse{}
v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
if v == nil {
return resp, fmt.Errorf("volume %d not found", req.VolumeId)
}
if err := vs.makeVolumeWritable(ctx, v); err != nil {
return resp, err
}
return resp, nil
}
func (vs *VolumeServer) VolumeStatus(ctx context.Context, req *volume_server_pb.VolumeStatusRequest) (*volume_server_pb.VolumeStatusResponse, error) {

View File

@@ -2,9 +2,11 @@ package weed_server
import (
"context"
"errors"
"fmt"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/storage"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
)
@@ -22,6 +24,7 @@ func (vs *VolumeServer) ScrubVolume(ctx context.Context, req *volume_server_pb.S
var details []string
var totalVolumes, totalFiles uint64
var brokenVolumes []*storage.Volume
var brokenVolumeIds []uint32
for _, vid := range vids {
v := vs.store.GetVolume(vid)
@@ -46,13 +49,29 @@ func (vs *VolumeServer) ScrubVolume(ctx context.Context, req *volume_server_pb.S
totalVolumes += 1
totalFiles += uint64(files)
if len(serrs) != 0 {
brokenVolumeIds = append(brokenVolumeIds, uint32(vid))
brokenVolumes = append(brokenVolumes, v)
brokenVolumeIds = append(brokenVolumeIds, uint32(v.Id))
for _, err := range serrs {
details = append(details, err.Error())
}
}
}
errs := []error{}
if req.GetMarkBrokenVolumesReadonly() {
for _, v := range brokenVolumes {
if err := vs.makeVolumeReadonly(ctx, v, true); err != nil {
errs = append(errs, err)
details = append(details, err.Error())
} else {
details = append(details, fmt.Sprintf("volume %d is now read-only", v.Id))
}
}
}
if len(errs) != 0 {
return nil, errors.Join(errs...)
}
res := &volume_server_pb.ScrubVolumeResponse{
TotalVolumes: totalVolumes,
TotalFiles: totalFiles,
@@ -102,7 +121,7 @@ func (vs *VolumeServer) ScrubEcVolume(ctx context.Context, req *volume_server_pb
totalVolumes += 1
totalFiles += uint64(files)
if len(serrs) != 0 || len(shardInfos) != 0 {
brokenVolumeIds = append(brokenVolumeIds, uint32(vid))
brokenVolumeIds = append(brokenVolumeIds, uint32(v.VolumeId))
brokenShardInfos = append(brokenShardInfos, shardInfos...)
for _, err := range serrs {
details = append(details, err.Error())