* feat: auto-disable master vacuum when plugin vacuum worker is active When a vacuum-capable plugin worker connects to the admin server, the admin server calls DisableVacuum on the master to prevent the automatic scheduled vacuum from conflicting with the plugin worker's vacuum. When the worker disconnects, EnableVacuum is called to restore the default behavior. A safety net in the topology refresh loop re-enables vacuum if the admin server disconnects without cleanup. * rename isAdminServerConnected to isAdminServerConnectedFunc * add 5s timeout to DisableVacuum/EnableVacuum gRPC calls Prevents the monitor goroutine from blocking indefinitely if the master is unresponsive. * track plugin ownership of vacuum disable to avoid overriding operator - Add vacuumDisabledByPlugin flag to Topology, set when DisableVacuum is called while admin server is connected (i.e., by plugin monitor) - Safety net only re-enables vacuum when it was disabled by plugin, not when an operator intentionally disabled it via shell command - EnableVacuum clears the plugin flag * extract syncVacuumState for testability, add fake toggler tests Extract the single sync step into syncVacuumState() with a vacuumToggler interface. Add TestSyncVacuumState with a fake toggler that verifies disable/enable calls on state transitions. * use atomic.Bool for isDisableVacuum and vacuumDisabledByPlugin Both fields are written by gRPC handlers and read by the vacuum goroutine, causing a data race. Use atomic.Bool with Store/Load for thread-safe access. * use explicit by_plugin field instead of connection heuristic Add by_plugin bool to DisableVacuumRequest proto so the caller declares intent explicitly. The admin server monitor sets it to true; shell commands leave it false. This prevents an operator's intentional disable from being auto-reversed by the safety net. * use setter for admin server callback instead of function parameter Move isAdminServerConnected from StartRefreshWritableVolumes parameter to Topology.SetAdminServerConnectedFunc() setter. Keeps the function signature stable and decouples the topology layer from the admin server concept. * suppress repeated log messages on persistent sync failures Add retrying parameter to syncVacuumState so the initial state transition is logged at V(0) but subsequent retries of the same transition are silent until the call succeeds. * clear plugin ownership flag on manual DisableVacuum Prevents stale plugin flag from causing incorrect auto-enable when an operator manually disables vacuum after a plugin had previously disabled it. * add by_plugin to EnableVacuumRequest for symmetric ownership tracking Plugin-driven EnableVacuum now only re-enables if the plugin was the one that disabled it. If an operator manually disabled vacuum after the plugin, the plugin's EnableVacuum is a no-op. This prevents the plugin monitor from overriding operator intent on worker disconnect. * use cancellable context for monitorVacuumWorker goroutine Replace context.Background() with a cancellable context stored as bgCancel on AdminServer. Shutdown() calls bgCancel() so monitorVacuumWorker exits cleanly via ctx.Done(). * track operator and plugin vacuum disables independently Replace single isDisableVacuum flag with two independent flags: vacuumDisabledByOperator and vacuumDisabledByPlugin. Each caller only flips its own flag. The effective disabled state is the OR of both. This prevents a plugin connect/disconnect cycle from overriding an operator's manual disable, and vice versa. * fix safety net to clear plugin flag, not operator flag The safety net should call EnableVacuumByPlugin() to clear only the plugin disable flag when the admin server disconnects. The previous call to EnableVacuum() incorrectly cleared the operator flag instead.
477 lines
12 KiB
Protocol Buffer
477 lines
12 KiB
Protocol Buffer
syntax = "proto3";
|
|
|
|
package master_pb;
|
|
|
|
option go_package = "github.com/seaweedfs/seaweedfs/weed/pb/master_pb";
|
|
|
|
import "volume_server.proto";
|
|
|
|
//////////////////////////////////////////////////
|
|
|
|
service Seaweed {
|
|
rpc SendHeartbeat (stream Heartbeat) returns (stream HeartbeatResponse) {
|
|
}
|
|
rpc KeepConnected (stream KeepConnectedRequest) returns (stream KeepConnectedResponse) {
|
|
}
|
|
rpc LookupVolume (LookupVolumeRequest) returns (LookupVolumeResponse) {
|
|
}
|
|
rpc Assign (AssignRequest) returns (AssignResponse) {
|
|
}
|
|
rpc StreamAssign (stream AssignRequest) returns (stream AssignResponse) {
|
|
}
|
|
rpc Statistics (StatisticsRequest) returns (StatisticsResponse) {
|
|
}
|
|
rpc CollectionList (CollectionListRequest) returns (CollectionListResponse) {
|
|
}
|
|
rpc CollectionDelete (CollectionDeleteRequest) returns (CollectionDeleteResponse) {
|
|
}
|
|
rpc VolumeList (VolumeListRequest) returns (VolumeListResponse) {
|
|
}
|
|
rpc LookupEcVolume (LookupEcVolumeRequest) returns (LookupEcVolumeResponse) {
|
|
}
|
|
rpc VacuumVolume (VacuumVolumeRequest) returns (VacuumVolumeResponse) {
|
|
}
|
|
rpc DisableVacuum (DisableVacuumRequest) returns (DisableVacuumResponse) {
|
|
}
|
|
rpc EnableVacuum (EnableVacuumRequest) returns (EnableVacuumResponse) {
|
|
}
|
|
rpc VolumeMarkReadonly (VolumeMarkReadonlyRequest) returns (VolumeMarkReadonlyResponse) {
|
|
}
|
|
rpc GetMasterConfiguration (GetMasterConfigurationRequest) returns (GetMasterConfigurationResponse) {
|
|
}
|
|
rpc ListClusterNodes (ListClusterNodesRequest) returns (ListClusterNodesResponse) {
|
|
}
|
|
rpc LeaseAdminToken (LeaseAdminTokenRequest) returns (LeaseAdminTokenResponse) {
|
|
}
|
|
rpc ReleaseAdminToken (ReleaseAdminTokenRequest) returns (ReleaseAdminTokenResponse) {
|
|
}
|
|
rpc Ping (PingRequest) returns (PingResponse) {
|
|
}
|
|
rpc RaftListClusterServers (RaftListClusterServersRequest) returns (RaftListClusterServersResponse) {
|
|
}
|
|
rpc RaftAddServer (RaftAddServerRequest) returns (RaftAddServerResponse) {
|
|
}
|
|
rpc RaftRemoveServer (RaftRemoveServerRequest) returns (RaftRemoveServerResponse) {
|
|
}
|
|
rpc RaftLeadershipTransfer (RaftLeadershipTransferRequest) returns (RaftLeadershipTransferResponse) {
|
|
}
|
|
rpc VolumeGrow (VolumeGrowRequest) returns (VolumeGrowResponse) {
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////
|
|
|
|
message DiskTag {
|
|
uint32 disk_id = 1;
|
|
repeated string tags = 2;
|
|
}
|
|
|
|
message Heartbeat {
|
|
string ip = 1;
|
|
uint32 port = 2;
|
|
string public_url = 3;
|
|
uint64 max_file_key = 5;
|
|
string data_center = 6;
|
|
string rack = 7;
|
|
uint32 admin_port = 8;
|
|
repeated VolumeInformationMessage volumes = 9;
|
|
// delta volumes
|
|
repeated VolumeShortInformationMessage new_volumes = 10;
|
|
repeated VolumeShortInformationMessage deleted_volumes = 11;
|
|
bool has_no_volumes = 12;
|
|
|
|
// erasure coding
|
|
repeated VolumeEcShardInformationMessage ec_shards = 16;
|
|
// delta erasure coding shards
|
|
repeated VolumeEcShardInformationMessage new_ec_shards = 17;
|
|
repeated VolumeEcShardInformationMessage deleted_ec_shards = 18;
|
|
bool has_no_ec_shards = 19;
|
|
|
|
map<string, uint32> max_volume_counts = 4;
|
|
uint32 grpc_port = 20;
|
|
repeated string location_uuids = 21;
|
|
string id = 22; // volume server id, independent of ip:port for stable identification
|
|
|
|
// state flags
|
|
volume_server_pb.VolumeServerState state = 23;
|
|
|
|
repeated DiskTag disk_tags = 24;
|
|
}
|
|
|
|
message HeartbeatResponse {
|
|
uint64 volume_size_limit = 1;
|
|
string leader = 2;
|
|
string metrics_address = 3;
|
|
uint32 metrics_interval_seconds = 4;
|
|
repeated StorageBackend storage_backends = 5;
|
|
repeated string duplicated_uuids = 6;
|
|
bool preallocate = 7;
|
|
}
|
|
|
|
message VolumeInformationMessage {
|
|
uint32 id = 1;
|
|
uint64 size = 2;
|
|
string collection = 3;
|
|
uint64 file_count = 4;
|
|
uint64 delete_count = 5;
|
|
uint64 deleted_byte_count = 6;
|
|
bool read_only = 7;
|
|
uint32 replica_placement = 8;
|
|
uint32 version = 9;
|
|
uint32 ttl = 10;
|
|
uint32 compact_revision = 11;
|
|
int64 modified_at_second = 12;
|
|
string remote_storage_name = 13;
|
|
string remote_storage_key = 14;
|
|
string disk_type = 15;
|
|
uint32 disk_id = 16;
|
|
}
|
|
|
|
message VolumeShortInformationMessage {
|
|
uint32 id = 1;
|
|
string collection = 3;
|
|
uint32 replica_placement = 8;
|
|
uint32 version = 9;
|
|
uint32 ttl = 10;
|
|
string disk_type = 15;
|
|
uint32 disk_id = 16;
|
|
}
|
|
|
|
message VolumeEcShardInformationMessage {
|
|
uint32 id = 1;
|
|
string collection = 2;
|
|
uint32 ec_index_bits = 3;
|
|
string disk_type = 4;
|
|
uint64 expire_at_sec = 5; // used to record the destruction time of ec volume
|
|
uint32 disk_id = 6;
|
|
repeated int64 shard_sizes = 7; // optimized: sizes for shards in order of set bits in ec_index_bits
|
|
}
|
|
|
|
message StorageBackend {
|
|
string type = 1;
|
|
string id = 2;
|
|
map<string, string> properties = 3;
|
|
}
|
|
|
|
message Empty {
|
|
}
|
|
|
|
message SuperBlockExtra {
|
|
message ErasureCoding {
|
|
uint32 data = 1;
|
|
uint32 parity = 2;
|
|
repeated uint32 volume_ids = 3;
|
|
}
|
|
ErasureCoding erasure_coding = 1;
|
|
}
|
|
|
|
message KeepConnectedRequest {
|
|
string client_type = 1;
|
|
string client_address = 3;
|
|
string version = 4;
|
|
string filer_group = 5;
|
|
string data_center = 6;
|
|
string rack = 7;
|
|
}
|
|
|
|
message VolumeLocation {
|
|
string url = 1;
|
|
string public_url = 2;
|
|
repeated uint32 new_vids = 3;
|
|
repeated uint32 deleted_vids = 4;
|
|
string leader = 5; // optional when leader is not itself
|
|
string data_center = 6; // optional when DataCenter is in use
|
|
uint32 grpc_port = 7;
|
|
repeated uint32 new_ec_vids = 8;
|
|
repeated uint32 deleted_ec_vids = 9;
|
|
}
|
|
|
|
message ClusterNodeUpdate {
|
|
string node_type = 1;
|
|
string address = 2;
|
|
bool is_add = 4;
|
|
string filer_group = 5;
|
|
int64 created_at_ns = 6;
|
|
}
|
|
|
|
message KeepConnectedResponse {
|
|
VolumeLocation volume_location = 1;
|
|
ClusterNodeUpdate cluster_node_update = 2;
|
|
}
|
|
|
|
message LookupVolumeRequest {
|
|
repeated string volume_or_file_ids = 1;
|
|
string collection = 2; // optional, a bit faster if provided.
|
|
}
|
|
message LookupVolumeResponse {
|
|
message VolumeIdLocation {
|
|
string volume_or_file_id = 1;
|
|
repeated Location locations = 2;
|
|
string error = 3;
|
|
string auth = 4;
|
|
}
|
|
repeated VolumeIdLocation volume_id_locations = 1;
|
|
}
|
|
|
|
message Location {
|
|
string url = 1;
|
|
string public_url = 2;
|
|
uint32 grpc_port = 3;
|
|
string data_center = 4;
|
|
}
|
|
|
|
message AssignRequest {
|
|
uint64 count = 1;
|
|
string replication = 2;
|
|
string collection = 3;
|
|
string ttl = 4;
|
|
string data_center = 5;
|
|
string rack = 6;
|
|
string data_node = 7;
|
|
uint32 memory_map_max_size_mb = 8;
|
|
uint32 writable_volume_count = 9;
|
|
string disk_type = 10;
|
|
}
|
|
|
|
message VolumeGrowRequest {
|
|
uint32 writable_volume_count = 1;
|
|
string replication = 2;
|
|
string collection = 3;
|
|
string ttl = 4;
|
|
string data_center = 5;
|
|
string rack = 6;
|
|
string data_node = 7;
|
|
uint32 memory_map_max_size_mb = 8;
|
|
string disk_type = 9;
|
|
}
|
|
|
|
message AssignResponse {
|
|
string fid = 1;
|
|
uint64 count = 4;
|
|
string error = 5;
|
|
string auth = 6;
|
|
repeated Location replicas = 7;
|
|
Location location = 8;
|
|
}
|
|
|
|
message StatisticsRequest {
|
|
string replication = 1;
|
|
string collection = 2;
|
|
string ttl = 3;
|
|
string disk_type = 4;
|
|
}
|
|
message StatisticsResponse {
|
|
uint64 total_size = 4;
|
|
uint64 used_size = 5;
|
|
uint64 file_count = 6;
|
|
}
|
|
|
|
//
|
|
// collection related
|
|
//
|
|
message Collection {
|
|
string name = 1;
|
|
}
|
|
message CollectionListRequest {
|
|
bool include_normal_volumes = 1;
|
|
bool include_ec_volumes = 2;
|
|
}
|
|
message CollectionListResponse {
|
|
repeated Collection collections = 1;
|
|
}
|
|
|
|
message CollectionDeleteRequest {
|
|
string name = 1;
|
|
}
|
|
message CollectionDeleteResponse {
|
|
}
|
|
|
|
//
|
|
// volume related
|
|
//
|
|
message DiskInfo {
|
|
string type = 1;
|
|
int64 volume_count = 2;
|
|
int64 max_volume_count = 3;
|
|
int64 free_volume_count = 4;
|
|
int64 active_volume_count = 5;
|
|
repeated VolumeInformationMessage volume_infos = 6;
|
|
repeated VolumeEcShardInformationMessage ec_shard_infos = 7;
|
|
int64 remote_volume_count = 8;
|
|
uint32 disk_id = 9;
|
|
repeated string tags = 10;
|
|
}
|
|
message DataNodeInfo {
|
|
string id = 1;
|
|
map<string, DiskInfo> diskInfos = 2;
|
|
uint32 grpc_port = 3;
|
|
string address = 4; // ip:port for connecting to the volume server
|
|
}
|
|
message RackInfo {
|
|
string id = 1;
|
|
repeated DataNodeInfo data_node_infos = 2;
|
|
map<string, DiskInfo> diskInfos = 3;
|
|
}
|
|
message DataCenterInfo {
|
|
string id = 1;
|
|
repeated RackInfo rack_infos = 2;
|
|
map<string, DiskInfo> diskInfos = 3;
|
|
}
|
|
message TopologyInfo {
|
|
string id = 1;
|
|
repeated DataCenterInfo data_center_infos = 2;
|
|
map<string, DiskInfo> diskInfos = 3;
|
|
}
|
|
message VolumeListRequest {
|
|
}
|
|
message VolumeListResponse {
|
|
TopologyInfo topology_info = 1;
|
|
uint64 volume_size_limit_mb = 2;
|
|
}
|
|
|
|
message LookupEcVolumeRequest {
|
|
uint32 volume_id = 1;
|
|
}
|
|
message LookupEcVolumeResponse {
|
|
uint32 volume_id = 1;
|
|
message EcShardIdLocation {
|
|
uint32 shard_id = 1;
|
|
repeated Location locations = 2;
|
|
}
|
|
repeated EcShardIdLocation shard_id_locations = 2;
|
|
}
|
|
|
|
message VacuumVolumeRequest {
|
|
float garbage_threshold = 1;
|
|
uint32 volume_id = 2;
|
|
string collection = 3;
|
|
}
|
|
message VacuumVolumeResponse {
|
|
}
|
|
|
|
message DisableVacuumRequest {
|
|
bool by_plugin = 1;
|
|
}
|
|
message DisableVacuumResponse {
|
|
}
|
|
|
|
message EnableVacuumRequest {
|
|
bool by_plugin = 1;
|
|
}
|
|
message EnableVacuumResponse {
|
|
}
|
|
|
|
message VolumeMarkReadonlyRequest {
|
|
string ip = 1;
|
|
uint32 port = 2;
|
|
uint32 volume_id = 4;
|
|
string collection = 5;
|
|
uint32 replica_placement = 6;
|
|
uint32 version = 7;
|
|
uint32 ttl = 8;
|
|
string disk_type = 9;
|
|
bool is_readonly = 10;
|
|
}
|
|
message VolumeMarkReadonlyResponse {
|
|
}
|
|
|
|
message GetMasterConfigurationRequest {
|
|
}
|
|
message GetMasterConfigurationResponse {
|
|
string metrics_address = 1;
|
|
uint32 metrics_interval_seconds = 2;
|
|
repeated StorageBackend storage_backends = 3;
|
|
string default_replication = 4;
|
|
string leader = 5;
|
|
uint32 volume_size_limit_m_b = 6;
|
|
bool volume_preallocate = 7;
|
|
// MIGRATION: fields 8-9 help migrate master.toml [master.maintenance] to admin script plugin. Remove after March 2027.
|
|
string maintenance_scripts = 8;
|
|
uint32 maintenance_sleep_minutes = 9;
|
|
}
|
|
|
|
message ListClusterNodesRequest {
|
|
string client_type = 1;
|
|
string filer_group = 2;
|
|
int32 limit = 4;
|
|
}
|
|
message ListClusterNodesResponse {
|
|
message ClusterNode {
|
|
string address = 1;
|
|
string version = 2;
|
|
int64 created_at_ns = 4;
|
|
string data_center = 5;
|
|
string rack = 6;
|
|
}
|
|
repeated ClusterNode cluster_nodes = 1;
|
|
}
|
|
|
|
message LeaseAdminTokenRequest {
|
|
int64 previous_token = 1;
|
|
int64 previous_lock_time = 2;
|
|
string lock_name = 3;
|
|
string client_name = 4;
|
|
string message = 5;
|
|
}
|
|
message LeaseAdminTokenResponse {
|
|
int64 token = 1;
|
|
int64 lock_ts_ns = 2;
|
|
}
|
|
|
|
message ReleaseAdminTokenRequest {
|
|
int64 previous_token = 1;
|
|
int64 previous_lock_time = 2;
|
|
string lock_name = 3;
|
|
}
|
|
message ReleaseAdminTokenResponse {
|
|
}
|
|
|
|
message PingRequest {
|
|
string target = 1; // default to ping itself
|
|
string target_type = 2;
|
|
}
|
|
message PingResponse {
|
|
int64 start_time_ns = 1;
|
|
int64 remote_time_ns = 2;
|
|
int64 stop_time_ns = 3;
|
|
}
|
|
|
|
message RaftAddServerRequest {
|
|
string id = 1;
|
|
string address = 2;
|
|
bool voter = 3;
|
|
}
|
|
message RaftAddServerResponse {
|
|
}
|
|
|
|
message RaftRemoveServerRequest {
|
|
string id = 1;
|
|
bool force = 2;
|
|
}
|
|
message RaftRemoveServerResponse {
|
|
}
|
|
|
|
message RaftListClusterServersRequest {
|
|
}
|
|
message RaftListClusterServersResponse {
|
|
message ClusterServers {
|
|
string id = 1;
|
|
string address = 2;
|
|
string suffrage = 3;
|
|
bool isLeader = 4;
|
|
}
|
|
repeated ClusterServers cluster_servers = 1;
|
|
}
|
|
|
|
message RaftLeadershipTransferRequest {
|
|
string target_id = 1; // Optional: target server ID. If empty, transfers to any eligible follower
|
|
string target_address = 2; // Optional: target server address. Required if target_id is specified
|
|
}
|
|
message RaftLeadershipTransferResponse {
|
|
string previous_leader = 1;
|
|
string new_leader = 2;
|
|
}
|
|
|
|
message VolumeGrowResponse {
|
|
}
|