feat(plugin): DC/rack/node filtering for volume balance (#8621)

* feat(plugin): add DC/rack/node filtering for volume balance detection

Add scoping filters so balance detection can be limited to specific data
centers, racks, or nodes. Filters are applied both at the metrics level
(in the handler) and at the topology seeding level (in detection) to
ensure only the targeted infrastructure participates in balancing.

* address PR review: use set lookups, deduplicate test helpers, add target checks

* address review: assert non-empty tasks in filter tests

Prevent vacuous test passes by requiring len(tasks) > 0
before checking source/target exclusions.

* address review: enforce filter scope in fallback, clarify DC filter

- Thread allowedServers into createBalanceTask so the fallback
  planner cannot produce out-of-scope targets when DC/rack/node
  filters are active
- Update data_center_filter description to clarify single-DC usage

* address review: centralize parseCSVSet, fix filter scope leak, iterate all targets

- Extract ParseCSVSet to shared weed/worker/tasks/util package,
  remove duplicates from detection.go and volume_balance_handler.go
- Fix metric accumulation re-introducing filtered-out servers by
  only counting metrics for servers that passed DC/rack/node filters
- Trim DataCenterFilter before matching to handle trailing spaces
- Iterate all task.TypedParams.Targets in filter tests, not just [0]

* remove useless descriptor string test
This commit is contained in:
Chris Lu
2026-03-13 17:03:37 -07:00
committed by GitHub
parent 00ce1c6eba
commit 47ddf05d95
6 changed files with 283 additions and 2 deletions

View File

@@ -15,6 +15,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
balancetask "github.com/seaweedfs/seaweedfs/weed/worker/tasks/balance"
taskutil "github.com/seaweedfs/seaweedfs/weed/worker/tasks/util"
workertypes "github.com/seaweedfs/seaweedfs/weed/worker/types"
"google.golang.org/grpc"
"google.golang.org/protobuf/proto"
@@ -106,6 +107,30 @@ func (h *VolumeBalanceHandler) Descriptor() *plugin_pb.JobTypeDescriptor {
{Value: "FULL", Label: "Full (read-only)"},
},
},
{
Name: "data_center_filter",
Label: "Data Center Filter",
Description: "Only balance volumes in a single data center. Leave empty for all data centers.",
Placeholder: "all data centers",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
},
{
Name: "rack_filter",
Label: "Rack Filter",
Description: "Only balance volumes on these racks (comma-separated). Leave empty for all racks.",
Placeholder: "all racks",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
},
{
Name: "node_filter",
Label: "Node Filter",
Description: "Only balance volumes on these nodes (comma-separated server IDs). Leave empty for all nodes.",
Placeholder: "all nodes",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
},
},
},
},
@@ -116,6 +141,15 @@ func (h *VolumeBalanceHandler) Descriptor() *plugin_pb.JobTypeDescriptor {
"volume_state": {
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: "ALL"},
},
"data_center_filter": {
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""},
},
"rack_filter": {
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""},
},
"node_filter": {
Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""},
},
},
},
WorkerConfigForm: &plugin_pb.ConfigForm{
@@ -288,6 +322,18 @@ func (h *VolumeBalanceHandler) Detect(
volumeState := strings.ToUpper(strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "volume_state", "ALL")))
metrics = filterMetricsByVolumeState(metrics, volumeState)
dataCenterFilter := strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "data_center_filter", ""))
rackFilter := strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "rack_filter", ""))
nodeFilter := strings.TrimSpace(readStringConfig(request.GetAdminConfigValues(), "node_filter", ""))
if dataCenterFilter != "" || rackFilter != "" || nodeFilter != "" {
metrics = filterMetricsByLocation(metrics, dataCenterFilter, rackFilter, nodeFilter)
}
workerConfig.TaskConfig.DataCenterFilter = dataCenterFilter
workerConfig.TaskConfig.RackFilter = rackFilter
workerConfig.TaskConfig.NodeFilter = nodeFilter
clusterInfo := &workertypes.ClusterInfo{ActiveTopology: activeTopology}
maxResults := int(request.MaxResults)
@@ -1081,6 +1127,31 @@ func deriveBalanceWorkerConfig(values map[string]*plugin_pb.ConfigValue) *volume
}
}
func filterMetricsByLocation(metrics []*workertypes.VolumeHealthMetrics, dcFilter, rackFilter, nodeFilter string) []*workertypes.VolumeHealthMetrics {
var rackSet, nodeSet map[string]bool
if rackFilter != "" {
rackSet = taskutil.ParseCSVSet(rackFilter)
}
if nodeFilter != "" {
nodeSet = taskutil.ParseCSVSet(nodeFilter)
}
filtered := make([]*workertypes.VolumeHealthMetrics, 0, len(metrics))
for _, m := range metrics {
if dcFilter != "" && m.DataCenter != dcFilter {
continue
}
if rackSet != nil && !rackSet[m.Rack] {
continue
}
if nodeSet != nil && !nodeSet[m.Server] {
continue
}
filtered = append(filtered, m)
}
return filtered
}
func buildVolumeBalanceProposal(
result *workertypes.TaskDetectionResult,
) (*plugin_pb.JobProposal, error) {

View File

@@ -629,6 +629,45 @@ func TestExecuteSingleMovePathUnchanged(t *testing.T) {
}
}
func TestFilterMetricsByLocation(t *testing.T) {
metrics := []*workertypes.VolumeHealthMetrics{
{VolumeID: 1, Server: "node-a", DataCenter: "dc1", Rack: "rack1"},
{VolumeID: 2, Server: "node-b", DataCenter: "dc1", Rack: "rack2"},
{VolumeID: 3, Server: "node-c", DataCenter: "dc2", Rack: "rack1"},
{VolumeID: 4, Server: "node-d", DataCenter: "dc2", Rack: "rack3"},
}
// Filter by DC
filtered := filterMetricsByLocation(metrics, "dc1", "", "")
if len(filtered) != 2 {
t.Fatalf("DC filter: expected 2, got %d", len(filtered))
}
// Filter by rack
filtered = filterMetricsByLocation(metrics, "", "rack1,rack2", "")
if len(filtered) != 3 {
t.Fatalf("rack filter: expected 3, got %d", len(filtered))
}
// Filter by node
filtered = filterMetricsByLocation(metrics, "", "", "node-a,node-c")
if len(filtered) != 2 {
t.Fatalf("node filter: expected 2, got %d", len(filtered))
}
// Combined DC + rack
filtered = filterMetricsByLocation(metrics, "dc2", "rack3", "")
if len(filtered) != 1 {
t.Fatalf("DC+rack filter: expected 1, got %d", len(filtered))
}
// Empty filters pass all
filtered = filterMetricsByLocation(metrics, "", "", "")
if len(filtered) != 4 {
t.Fatalf("no filter: expected 4, got %d", len(filtered))
}
}
func TestFilterMetricsByVolumeState(t *testing.T) {
metrics := []*workertypes.VolumeHealthMetrics{
{VolumeID: 1, FullnessRatio: 0.5}, // active