feat(plugin): DC/rack/node filtering for volume balance (#8621)
* feat(plugin): add DC/rack/node filtering for volume balance detection Add scoping filters so balance detection can be limited to specific data centers, racks, or nodes. Filters are applied both at the metrics level (in the handler) and at the topology seeding level (in detection) to ensure only the targeted infrastructure participates in balancing. * address PR review: use set lookups, deduplicate test helpers, add target checks * address review: assert non-empty tasks in filter tests Prevent vacuous test passes by requiring len(tasks) > 0 before checking source/target exclusions. * address review: enforce filter scope in fallback, clarify DC filter - Thread allowedServers into createBalanceTask so the fallback planner cannot produce out-of-scope targets when DC/rack/node filters are active - Update data_center_filter description to clarify single-DC usage * address review: centralize parseCSVSet, fix filter scope leak, iterate all targets - Extract ParseCSVSet to shared weed/worker/tasks/util package, remove duplicates from detection.go and volume_balance_handler.go - Fix metric accumulation re-introducing filtered-out servers by only counting metrics for servers that passed DC/rack/node filters - Trim DataCenterFilter before matching to handle trailing spaces - Iterate all task.TypedParams.Targets in filter tests, not just [0] * remove useless descriptor string test
This commit is contained in:
@@ -15,6 +15,7 @@ import (
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
|
||||
balancetask "github.com/seaweedfs/seaweedfs/weed/worker/tasks/balance"
|
||||
taskutil "github.com/seaweedfs/seaweedfs/weed/worker/tasks/util"
|
||||
workertypes "github.com/seaweedfs/seaweedfs/weed/worker/types"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/protobuf/proto"
|
||||
@@ -106,6 +107,30 @@ func (h *VolumeBalanceHandler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
||||
{Value: "FULL", Label: "Full (read-only)"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "data_center_filter",
|
||||
Label: "Data Center Filter",
|
||||
Description: "Only balance volumes in a single data center. Leave empty for all data centers.",
|
||||
Placeholder: "all data centers",
|
||||
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
|
||||
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
|
||||
},
|
||||
{
|
||||
Name: "rack_filter",
|
||||
Label: "Rack Filter",
|
||||
Description: "Only balance volumes on these racks (comma-separated). Leave empty for all racks.",
|
||||
Placeholder: "all racks",
|
||||
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
|
||||
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
|
||||
},
|
||||
{
|
||||
Name: "node_filter",
|
||||
Label: "Node Filter",
|
||||
Description: "Only balance volumes on these nodes (comma-separated server IDs). Leave empty for all nodes.",
|
||||
Placeholder: "all nodes",
|
||||
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
|
||||
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -116,6 +141,15 @@ func (h *VolumeBalanceHandler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
||||
"volume_state": {
|
||||
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: "ALL"},
|
||||
},
|
||||
"data_center_filter": {
|
||||
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""},
|
||||
},
|
||||
"rack_filter": {
|
||||
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""},
|
||||
},
|
||||
"node_filter": {
|
||||
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""},
|
||||
},
|
||||
},
|
||||
},
|
||||
WorkerConfigForm: &plugin_pb.ConfigForm{
|
||||
@@ -288,6 +322,18 @@ func (h *VolumeBalanceHandler) Detect(
|
||||
volumeState := strings.ToUpper(strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "volume_state", "ALL")))
|
||||
metrics = filterMetricsByVolumeState(metrics, volumeState)
|
||||
|
||||
dataCenterFilter := strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "data_center_filter", ""))
|
||||
rackFilter := strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "rack_filter", ""))
|
||||
nodeFilter := strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "node_filter", ""))
|
||||
|
||||
if dataCenterFilter != "" || rackFilter != "" || nodeFilter != "" {
|
||||
metrics = filterMetricsByLocation(metrics, dataCenterFilter, rackFilter, nodeFilter)
|
||||
}
|
||||
|
||||
workerConfig.TaskConfig.DataCenterFilter = dataCenterFilter
|
||||
workerConfig.TaskConfig.RackFilter = rackFilter
|
||||
workerConfig.TaskConfig.NodeFilter = nodeFilter
|
||||
|
||||
clusterInfo := &workertypes.ClusterInfo{ActiveTopology: activeTopology}
|
||||
maxResults := int(request.MaxResults)
|
||||
|
||||
@@ -1081,6 +1127,31 @@ func deriveBalanceWorkerConfig(values map[string]*plugin_pb.ConfigValue) *volume
|
||||
}
|
||||
}
|
||||
|
||||
func filterMetricsByLocation(metrics []*workertypes.VolumeHealthMetrics, dcFilter, rackFilter, nodeFilter string) []*workertypes.VolumeHealthMetrics {
|
||||
var rackSet, nodeSet map[string]bool
|
||||
if rackFilter != "" {
|
||||
rackSet = taskutil.ParseCSVSet(rackFilter)
|
||||
}
|
||||
if nodeFilter != "" {
|
||||
nodeSet = taskutil.ParseCSVSet(nodeFilter)
|
||||
}
|
||||
|
||||
filtered := make([]*workertypes.VolumeHealthMetrics, 0, len(metrics))
|
||||
for _, m := range metrics {
|
||||
if dcFilter != "" && m.DataCenter != dcFilter {
|
||||
continue
|
||||
}
|
||||
if rackSet != nil && !rackSet[m.Rack] {
|
||||
continue
|
||||
}
|
||||
if nodeSet != nil && !nodeSet[m.Server] {
|
||||
continue
|
||||
}
|
||||
filtered = append(filtered, m)
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
func buildVolumeBalanceProposal(
|
||||
result *workertypes.TaskDetectionResult,
|
||||
) (*plugin_pb.JobProposal, error) {
|
||||
|
||||
@@ -629,6 +629,45 @@ func TestExecuteSingleMovePathUnchanged(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterMetricsByLocation(t *testing.T) {
|
||||
metrics := []*workertypes.VolumeHealthMetrics{
|
||||
{VolumeID: 1, Server: "node-a", DataCenter: "dc1", Rack: "rack1"},
|
||||
{VolumeID: 2, Server: "node-b", DataCenter: "dc1", Rack: "rack2"},
|
||||
{VolumeID: 3, Server: "node-c", DataCenter: "dc2", Rack: "rack1"},
|
||||
{VolumeID: 4, Server: "node-d", DataCenter: "dc2", Rack: "rack3"},
|
||||
}
|
||||
|
||||
// Filter by DC
|
||||
filtered := filterMetricsByLocation(metrics, "dc1", "", "")
|
||||
if len(filtered) != 2 {
|
||||
t.Fatalf("DC filter: expected 2, got %d", len(filtered))
|
||||
}
|
||||
|
||||
// Filter by rack
|
||||
filtered = filterMetricsByLocation(metrics, "", "rack1,rack2", "")
|
||||
if len(filtered) != 3 {
|
||||
t.Fatalf("rack filter: expected 3, got %d", len(filtered))
|
||||
}
|
||||
|
||||
// Filter by node
|
||||
filtered = filterMetricsByLocation(metrics, "", "", "node-a,node-c")
|
||||
if len(filtered) != 2 {
|
||||
t.Fatalf("node filter: expected 2, got %d", len(filtered))
|
||||
}
|
||||
|
||||
// Combined DC + rack
|
||||
filtered = filterMetricsByLocation(metrics, "dc2", "rack3", "")
|
||||
if len(filtered) != 1 {
|
||||
t.Fatalf("DC+rack filter: expected 1, got %d", len(filtered))
|
||||
}
|
||||
|
||||
// Empty filters pass all
|
||||
filtered = filterMetricsByLocation(metrics, "", "", "")
|
||||
if len(filtered) != 4 {
|
||||
t.Fatalf("no filter: expected 4, got %d", len(filtered))
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterMetricsByVolumeState(t *testing.T) {
|
||||
metrics := []*workertypes.VolumeHealthMetrics{
|
||||
{VolumeID: 1, FullnessRatio: 0.5}, // active
|
||||
|
||||
Reference in New Issue
Block a user