Add a version token on RPCs to read/update volume server states. (#8191)

* Add a version token on `GetState()`/`SetState()` RPCs for volume server states.

* Make state version a property ov `VolumeServerState` instead of an in-memory counter.

Also extend state atomicity to reads, instead of just writes.
This commit is contained in:
Lisandro Pin
2026-02-06 19:58:43 +01:00
committed by GitHub
parent c44716f9af
commit 2cda4289f4
8 changed files with 104 additions and 54 deletions

View File

@@ -9,8 +9,10 @@ import "remote.proto";
// Persistent state for volume servers. // Persistent state for volume servers.
message VolumeServerState { message VolumeServerState {
// Whether the server is in maintenance (i.e. read-only) mode. // whether the server is in maintenance (i.e. read-only) mode.
bool maintenance = 1; bool maintenance = 1;
// incremental version counter
uint32 version = 2;
} }
////////////////////////////////////////////////// //////////////////////////////////////////////////

View File

@@ -77,8 +77,10 @@ func (VolumeScrubMode) EnumDescriptor() ([]byte, []int) {
// Persistent state for volume servers. // Persistent state for volume servers.
type VolumeServerState struct { type VolumeServerState struct {
state protoimpl.MessageState `protogen:"open.v1"` state protoimpl.MessageState `protogen:"open.v1"`
// Whether the server is in maintenance (i.e. read-only) mode. // whether the server is in maintenance (i.e. read-only) mode.
Maintenance bool `protobuf:"varint,1,opt,name=maintenance,proto3" json:"maintenance,omitempty"` Maintenance bool `protobuf:"varint,1,opt,name=maintenance,proto3" json:"maintenance,omitempty"`
// incremental version counter
Version uint32 `protobuf:"varint,2,opt,name=version,proto3" json:"version,omitempty"`
unknownFields protoimpl.UnknownFields unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache sizeCache protoimpl.SizeCache
} }
@@ -120,6 +122,13 @@ func (x *VolumeServerState) GetMaintenance() bool {
return false return false
} }
func (x *VolumeServerState) GetVersion() uint32 {
if x != nil {
return x.Version
}
return 0
}
type BatchDeleteRequest struct { type BatchDeleteRequest struct {
state protoimpl.MessageState `protogen:"open.v1"` state protoimpl.MessageState `protogen:"open.v1"`
FileIds []string `protobuf:"bytes,1,rep,name=file_ids,json=fileIds,proto3" json:"file_ids,omitempty"` FileIds []string `protobuf:"bytes,1,rep,name=file_ids,json=fileIds,proto3" json:"file_ids,omitempty"`
@@ -1858,7 +1867,7 @@ func (x *GetStateResponse) GetState() *VolumeServerState {
type SetStateRequest struct { type SetStateRequest struct {
state protoimpl.MessageState `protogen:"open.v1"` state protoimpl.MessageState `protogen:"open.v1"`
// SetState updates *all* volume server flags at once. Retrieve state with GetState(), // SetState updates *all* volume server flags at once. Retrieve state/version with GetState(),
// modify individual flags as required, then call this RPC to update. // modify individual flags as required, then call this RPC to update.
State *VolumeServerState `protobuf:"bytes,1,opt,name=state,proto3" json:"state,omitempty"` State *VolumeServerState `protobuf:"bytes,1,opt,name=state,proto3" json:"state,omitempty"`
unknownFields protoimpl.UnknownFields unknownFields protoimpl.UnknownFields
@@ -6690,9 +6699,10 @@ var File_volume_server_proto protoreflect.FileDescriptor
const file_volume_server_proto_rawDesc = "" + const file_volume_server_proto_rawDesc = "" +
"\n" + "\n" +
"\x13volume_server.proto\x12\x10volume_server_pb\x1a\fremote.proto\"5\n" + "\x13volume_server.proto\x12\x10volume_server_pb\x1a\fremote.proto\"O\n" +
"\x11VolumeServerState\x12 \n" + "\x11VolumeServerState\x12 \n" +
"\vmaintenance\x18\x01 \x01(\bR\vmaintenance\"[\n" + "\vmaintenance\x18\x01 \x01(\bR\vmaintenance\x12\x18\n" +
"\aversion\x18\x02 \x01(\rR\aversion\"[\n" +
"\x12BatchDeleteRequest\x12\x19\n" + "\x12BatchDeleteRequest\x12\x19\n" +
"\bfile_ids\x18\x01 \x03(\tR\afileIds\x12*\n" + "\bfile_ids\x18\x01 \x03(\tR\afileIds\x12*\n" +
"\x11skip_cookie_check\x18\x02 \x01(\bR\x0fskipCookieCheck\"O\n" + "\x11skip_cookie_check\x18\x02 \x01(\bR\x0fskipCookieCheck\"O\n" +

View File

@@ -273,7 +273,7 @@ func (vs *VolumeServer) VolumeStatus(ctx context.Context, req *volume_server_pb.
func (vs *VolumeServer) VolumeServerStatus(ctx context.Context, req *volume_server_pb.VolumeServerStatusRequest) (*volume_server_pb.VolumeServerStatusResponse, error) { func (vs *VolumeServer) VolumeServerStatus(ctx context.Context, req *volume_server_pb.VolumeServerStatusRequest) (*volume_server_pb.VolumeServerStatusResponse, error) {
resp := &volume_server_pb.VolumeServerStatusResponse{ resp := &volume_server_pb.VolumeServerStatusResponse{
State: vs.store.State.Pb, State: vs.store.State.Proto(),
MemoryStatus: stats.MemStat(), MemoryStatus: stats.MemStat(),
Version: version.Version(), Version: version.Version(),
DataCenter: vs.dataCenter, DataCenter: vs.dataCenter,

View File

@@ -9,7 +9,7 @@ import (
// GetState returns a volume server's state flags. // GetState returns a volume server's state flags.
func (vs *VolumeServer) GetState(ctx context.Context, req *volume_server_pb.GetStateRequest) (*volume_server_pb.GetStateResponse, error) { func (vs *VolumeServer) GetState(ctx context.Context, req *volume_server_pb.GetStateRequest) (*volume_server_pb.GetStateResponse, error) {
resp := &volume_server_pb.GetStateResponse{ resp := &volume_server_pb.GetStateResponse{
State: vs.store.State.Pb, State: vs.store.State.Proto(),
} }
return resp, nil return resp, nil
@@ -17,9 +17,9 @@ func (vs *VolumeServer) GetState(ctx context.Context, req *volume_server_pb.GetS
// SetState updates state flags for volume servers. // SetState updates state flags for volume servers.
func (vs *VolumeServer) SetState(ctx context.Context, req *volume_server_pb.SetStateRequest) (*volume_server_pb.SetStateResponse, error) { func (vs *VolumeServer) SetState(ctx context.Context, req *volume_server_pb.SetStateRequest) (*volume_server_pb.SetStateResponse, error) {
err := vs.store.State.Update(req.State) err := vs.store.State.Update(req.GetState())
resp := &volume_server_pb.SetStateResponse{ resp := &volume_server_pb.SetStateResponse{
State: vs.store.State.Pb, State: vs.store.State.Proto(),
} }
return resp, err return resp, err

View File

@@ -178,7 +178,7 @@ func (vs *VolumeServer) MaintenanceMode() bool {
if vs.store == nil { if vs.store == nil {
return false return false
} }
return vs.store.State.Pb.GetMaintenance() return vs.store.State.Proto().GetMaintenance()
} }
// Checks if a volume server is in maintenance mode, and returns an error explaining why. // Checks if a volume server is in maintenance mode, and returns an error explaining why.

View File

@@ -43,10 +43,7 @@ func TestMaintenanceMode(t *testing.T) {
vs := VolumeServer{ vs := VolumeServer{
store: &storage.Store{ store: &storage.Store{
Id: "test_1234", Id: "test_1234",
State: &storage.State{ State: storage.NewStateFromProto("/some/path.pb", tc.pb),
FilePath: "/some/path.pb",
Pb: tc.pb,
},
}, },
} }

View File

@@ -160,9 +160,9 @@ func NewStore(
func (s *Store) LoadState() error { func (s *Store) LoadState() error {
err := s.State.Load() err := s.State.Load()
if s.State.Pb != nil && err == nil { if s.State.Proto() != nil && err == nil {
select { select {
case s.StateUpdateChan <- s.State.Pb: case s.StateUpdateChan <- s.State.Proto():
default: default:
glog.V(2).Infof("StateUpdateChan full during LoadState, state will be reported in heartbeat") glog.V(2).Infof("StateUpdateChan full during LoadState, state will be reported in heartbeat")
} }
@@ -171,15 +171,15 @@ func (s *Store) LoadState() error {
} }
func (s *Store) SaveState() error { func (s *Store) SaveState() error {
if s.State.Pb == nil { if s.State.Proto() == nil {
glog.Warningf("tried to save empty state for store %s", s.Id) glog.Warningf("tried to save empty state for store %s", s.Id)
return nil return nil
} }
err := s.State.Save() err := s.State.Save()
if s.State.Pb != nil && err == nil { if s.State.Proto() != nil && err == nil {
select { select {
case s.StateUpdateChan <- s.State.Pb: case s.StateUpdateChan <- s.State.Proto():
default: default:
glog.V(2).Infof("StateUpdateChan full during SaveState, state will be reported in heartbeat") glog.V(2).Infof("StateUpdateChan full during SaveState, state will be reported in heartbeat")
} }

View File

@@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"sync"
"github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
@@ -17,70 +18,110 @@ const (
) )
type State struct { type State struct {
FilePath string filePath string
Pb *volume_server_pb.VolumeServerState pb *volume_server_pb.VolumeServerState
mu sync.Mutex
} }
func NewState(dir string) (*State, error) { func NewState(dir string) (*State, error) {
state := &State{ state := &State{
FilePath: filepath.Join(dir, StateFileName), filePath: filepath.Join(dir, StateFileName),
Pb: nil, pb: nil,
} }
err := state.Load() err := state.Load()
return state, err return state, err
} }
func (st *State) Load() error { func NewStateFromProto(filePath string, state *volume_server_pb.VolumeServerState) *State {
st.Pb = &volume_server_pb.VolumeServerState{} pb := &volume_server_pb.VolumeServerState{}
proto.Merge(pb, state)
if !util.FileExists(st.FilePath) { return &State{
glog.V(1).Infof("No preexisting store state at %s", st.FilePath) filePath: filePath,
pb: pb,
}
}
func (st *State) Proto() *volume_server_pb.VolumeServerState {
st.mu.Lock()
defer st.mu.Unlock()
return st.pb
}
func (st *State) Load() error {
st.mu.Lock()
defer st.mu.Unlock()
st.pb = &volume_server_pb.VolumeServerState{}
if !util.FileExists(st.filePath) {
glog.V(1).Infof("No preexisting store state at %s", st.filePath)
return nil return nil
} }
binPb, err := os.ReadFile(st.FilePath) binPb, err := os.ReadFile(st.filePath)
if err != nil { if err != nil {
st.Pb = nil st.pb = nil
return fmt.Errorf("failed to read store state from %s : %v", st.FilePath, err) return fmt.Errorf("failed to read store state from %s : %v", st.filePath, err)
} }
if err := proto.Unmarshal(binPb, st.Pb); err != nil { if err := proto.Unmarshal(binPb, st.pb); err != nil {
st.Pb = nil st.pb = nil
return fmt.Errorf("failed to parse store state from %s : %v", st.FilePath, err) return fmt.Errorf("failed to parse store state from %s : %v", st.filePath, err)
} }
glog.V(1).Infof("Got store state from %s: %v", st.FilePath, st.Pb) glog.V(1).Infof("Got store state from %s: %v", st.filePath, st.pb)
return nil
}
func (st *State) save(locking bool) error {
if locking {
st.mu.Lock()
defer st.mu.Unlock()
}
if st.pb == nil {
st.pb = &volume_server_pb.VolumeServerState{}
}
binPb, err := proto.Marshal(st.pb)
if err != nil {
return fmt.Errorf("failed to serialize store state %v: %s", st.pb, err)
}
if err := util.WriteFile(st.filePath, binPb, StateFileMode); err != nil {
return fmt.Errorf("failed to write store state to %s : %v", st.filePath, err)
}
glog.V(1).Infof("Saved store state %v to %s", st.pb, st.filePath)
return nil return nil
} }
func (st *State) Save() error { func (st *State) Save() error {
if st.Pb == nil { return st.save(true)
st.Pb = &volume_server_pb.VolumeServerState{}
}
binPb, err := proto.Marshal(st.Pb)
if err != nil {
return fmt.Errorf("failed to serialize store state %v: %s", st.Pb, err)
}
if err := util.WriteFile(st.FilePath, binPb, StateFileMode); err != nil {
return fmt.Errorf("failed to write store state to %s : %v", st.FilePath, err)
}
glog.V(1).Infof("Saved store state %v to %s", st.Pb, st.FilePath)
return nil
} }
func (st *State) Update(state *volume_server_pb.VolumeServerState) error { func (st *State) Update(state *volume_server_pb.VolumeServerState) error {
st.mu.Lock()
defer st.mu.Unlock()
if state == nil { if state == nil {
return nil return nil
} }
if got, want := st.pb.GetVersion(), state.GetVersion(); got != want {
return fmt.Errorf("version mismatch for VolumeServerState (got %d, want %d)", got, want)
}
origState := st.Pb origState := st.pb
st.Pb = state st.pb = &volume_server_pb.VolumeServerState{}
err := st.Save() proto.Merge(st.pb, state)
st.pb.Version = st.pb.GetVersion() + 1
err := st.save(false)
if err != nil { if err != nil {
// restore the original state upon save failures, to avoid skew between in-memory and disk state protos. // restore the original state upon save failures, to avoid skew between in-memory and disk state protos.
st.Pb = origState st.pb = origState
} }
return err return err