Add cluster.raft.leader.transfer command for graceful leader change (#7819)
* proto: add RaftLeadershipTransfer RPC for forced leader change Add new gRPC RPC and messages for leadership transfer: - RaftLeadershipTransferRequest: optional target_id and target_address - RaftLeadershipTransferResponse: previous_leader and new_leader This enables graceful leadership transfer before master maintenance, reducing errors in filers during planned maintenance windows. Ref: https://github.com/seaweedfs/seaweedfs/issues/7527 * proto: regenerate Go files for RaftLeadershipTransfer Generated from master.proto changes. * master: implement RaftLeadershipTransfer gRPC handler Add gRPC handler for leadership transfer with support for: - Transfer to any eligible follower (when target_id is empty) - Transfer to a specific server (when target_id and target_address are provided) Uses hashicorp/raft LeadershipTransfer() and LeadershipTransferToServer() APIs. Returns the previous and new leader in the response. * shell: add cluster.raft.leader.transfer command Add weed shell command for graceful leadership transfer: - Displays current cluster status before transfer - Supports auto-selection of target (any eligible follower) - Supports targeted transfer with -id and -address flags - Provides clear feedback on success/failure with troubleshooting tips Usage: cluster.raft.leader.transfer cluster.raft.leader.transfer -id <server_id> -address <grpc_address> * master: add unit tests for raft gRPC handlers Add tests covering: - RaftLeadershipTransfer with no raft initialized - RaftLeadershipTransfer with target_id but no address - RaftListClusterServers with no raft initialized - RaftAddServer with no raft initialized - RaftRemoveServer with no raft initialized These tests verify error handling when raft is not configured. * shell: add tests for cluster.raft.leader.transfer command Add tests covering: - Command name and help text validation - HasTag returns false for ResourceHeavy - Validation of -id without -address - Argument parsing with unknown flags * master: clarify that leadership transfer requires -raftHashicorp The default raft implementation (seaweedfs/raft, a goraft fork) does not support graceful leadership transfer. This feature is only available when using hashicorp raft (-raftHashicorp=true). Update error messages and help text to make this requirement clear: - gRPC handler returns specific error for goraft users - Shell command help text notes the requirement - Added test for goraft case * test: use strings.Contains instead of custom helper Replace custom contains/containsHelper functions with the standard library strings.Contains for better maintainability. * shell: return flag parsing errors instead of swallowing them - Return the error from flag.Parse() instead of returning nil - Update test to explicitly assert error for unknown flags * test: document integration test scenarios for Raft leadership transfer Add comments explaining: - Why these unit tests only cover 'Raft not initialized' scenarios - What integration tests should cover (with multi-master cluster) - hashicorp/raft uses concrete types that cannot be easily mocked * fix: address reviewer feedback on tests and leader routing - Remove misleading tests that couldn't properly validate their documented behavior without a real Raft cluster: - TestRaftLeadershipTransfer_GoraftNotSupported - TestRaftLeadershipTransfer_ValidationTargetIdWithoutAddress - Change WithClient(false) to WithClient(true) for RaftLeadershipTransfer RPC to ensure the request is routed to the current leader * Improve cluster.raft.transferLeader command - Rename command from cluster.raft.leader.transfer to cluster.raft.transferLeader - Add symmetric validation: -id and -address must be specified together - Handle case where same leader is re-elected after transfer - Add test for -address without -id validation - Add docker compose file for 5-master raft cluster testing
This commit is contained in:
@@ -41,6 +41,7 @@ const (
|
||||
Seaweed_RaftListClusterServers_FullMethodName = "/master_pb.Seaweed/RaftListClusterServers"
|
||||
Seaweed_RaftAddServer_FullMethodName = "/master_pb.Seaweed/RaftAddServer"
|
||||
Seaweed_RaftRemoveServer_FullMethodName = "/master_pb.Seaweed/RaftRemoveServer"
|
||||
Seaweed_RaftLeadershipTransfer_FullMethodName = "/master_pb.Seaweed/RaftLeadershipTransfer"
|
||||
Seaweed_VolumeGrow_FullMethodName = "/master_pb.Seaweed/VolumeGrow"
|
||||
)
|
||||
|
||||
@@ -70,6 +71,7 @@ type SeaweedClient interface {
|
||||
RaftListClusterServers(ctx context.Context, in *RaftListClusterServersRequest, opts ...grpc.CallOption) (*RaftListClusterServersResponse, error)
|
||||
RaftAddServer(ctx context.Context, in *RaftAddServerRequest, opts ...grpc.CallOption) (*RaftAddServerResponse, error)
|
||||
RaftRemoveServer(ctx context.Context, in *RaftRemoveServerRequest, opts ...grpc.CallOption) (*RaftRemoveServerResponse, error)
|
||||
RaftLeadershipTransfer(ctx context.Context, in *RaftLeadershipTransferRequest, opts ...grpc.CallOption) (*RaftLeadershipTransferResponse, error)
|
||||
VolumeGrow(ctx context.Context, in *VolumeGrowRequest, opts ...grpc.CallOption) (*VolumeGrowResponse, error)
|
||||
}
|
||||
|
||||
@@ -310,6 +312,16 @@ func (c *seaweedClient) RaftRemoveServer(ctx context.Context, in *RaftRemoveServ
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *seaweedClient) RaftLeadershipTransfer(ctx context.Context, in *RaftLeadershipTransferRequest, opts ...grpc.CallOption) (*RaftLeadershipTransferResponse, error) {
|
||||
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
|
||||
out := new(RaftLeadershipTransferResponse)
|
||||
err := c.cc.Invoke(ctx, Seaweed_RaftLeadershipTransfer_FullMethodName, in, out, cOpts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *seaweedClient) VolumeGrow(ctx context.Context, in *VolumeGrowRequest, opts ...grpc.CallOption) (*VolumeGrowResponse, error) {
|
||||
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
|
||||
out := new(VolumeGrowResponse)
|
||||
@@ -346,6 +358,7 @@ type SeaweedServer interface {
|
||||
RaftListClusterServers(context.Context, *RaftListClusterServersRequest) (*RaftListClusterServersResponse, error)
|
||||
RaftAddServer(context.Context, *RaftAddServerRequest) (*RaftAddServerResponse, error)
|
||||
RaftRemoveServer(context.Context, *RaftRemoveServerRequest) (*RaftRemoveServerResponse, error)
|
||||
RaftLeadershipTransfer(context.Context, *RaftLeadershipTransferRequest) (*RaftLeadershipTransferResponse, error)
|
||||
VolumeGrow(context.Context, *VolumeGrowRequest) (*VolumeGrowResponse, error)
|
||||
mustEmbedUnimplementedSeaweedServer()
|
||||
}
|
||||
@@ -423,6 +436,9 @@ func (UnimplementedSeaweedServer) RaftAddServer(context.Context, *RaftAddServerR
|
||||
func (UnimplementedSeaweedServer) RaftRemoveServer(context.Context, *RaftRemoveServerRequest) (*RaftRemoveServerResponse, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method RaftRemoveServer not implemented")
|
||||
}
|
||||
func (UnimplementedSeaweedServer) RaftLeadershipTransfer(context.Context, *RaftLeadershipTransferRequest) (*RaftLeadershipTransferResponse, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method RaftLeadershipTransfer not implemented")
|
||||
}
|
||||
func (UnimplementedSeaweedServer) VolumeGrow(context.Context, *VolumeGrowRequest) (*VolumeGrowResponse, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method VolumeGrow not implemented")
|
||||
}
|
||||
@@ -810,6 +826,24 @@ func _Seaweed_RaftRemoveServer_Handler(srv interface{}, ctx context.Context, dec
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _Seaweed_RaftLeadershipTransfer_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(RaftLeadershipTransferRequest)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(SeaweedServer).RaftLeadershipTransfer(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: Seaweed_RaftLeadershipTransfer_FullMethodName,
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(SeaweedServer).RaftLeadershipTransfer(ctx, req.(*RaftLeadershipTransferRequest))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _Seaweed_VolumeGrow_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(VolumeGrowRequest)
|
||||
if err := dec(in); err != nil {
|
||||
@@ -911,6 +945,10 @@ var Seaweed_ServiceDesc = grpc.ServiceDesc{
|
||||
MethodName: "RaftRemoveServer",
|
||||
Handler: _Seaweed_RaftRemoveServer_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "RaftLeadershipTransfer",
|
||||
Handler: _Seaweed_RaftLeadershipTransfer_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "VolumeGrow",
|
||||
Handler: _Seaweed_VolumeGrow_Handler,
|
||||
|
||||
Reference in New Issue
Block a user