feat(plugin): DC/rack/node filtering for volume balance (#8621)

* feat(plugin): add DC/rack/node filtering for volume balance detection

Add scoping filters so balance detection can be limited to specific data
centers, racks, or nodes. Filters are applied both at the metrics level
(in the handler) and at the topology seeding level (in detection) to
ensure only the targeted infrastructure participates in balancing.

* address PR review: use set lookups, deduplicate test helpers, add target checks

* address review: assert non-empty tasks in filter tests

Prevent vacuous test passes by requiring len(tasks) > 0
before checking source/target exclusions.

* address review: enforce filter scope in fallback, clarify DC filter

- Thread allowedServers into createBalanceTask so the fallback
  planner cannot produce out-of-scope targets when DC/rack/node
  filters are active
- Update data_center_filter description to clarify single-DC usage

* address review: centralize parseCSVSet, fix filter scope leak, iterate all targets

- Extract ParseCSVSet to shared weed/worker/tasks/util package,
  remove duplicates from detection.go and volume_balance_handler.go
- Fix metric accumulation re-introducing filtered-out servers by
  only counting metrics for servers that passed DC/rack/node filters
- Trim DataCenterFilter before matching to handle trailing spaces
- Iterate all task.TypedParams.Targets in filter tests, not just [0]

* remove useless descriptor string test
This commit is contained in:
Chris Lu
2026-03-13 17:03:37 -07:00
committed by GitHub
parent 00ce1c6eba
commit 47ddf05d95
6 changed files with 283 additions and 2 deletions

View File

@@ -2,6 +2,7 @@ package balance
import (
"fmt"
"strings"
"testing"
"github.com/seaweedfs/seaweedfs/weed/admin/topology"
@@ -1000,3 +1001,112 @@ func TestDetection_ZeroVolumeServerIncludedInBalance(t *testing.T) {
t.Logf("Distribution 8/2/1/0 → %v after %d moves (imbalance=%.1f%%)",
effective, len(tasks), imbalance*100)
}
func TestDetection_DataCenterFilter(t *testing.T) {
servers := []serverSpec{
{id: "node-a", diskType: "hdd", diskID: 1, dc: "dc1", rack: "rack1"},
{id: "node-b", diskType: "hdd", diskID: 2, dc: "dc1", rack: "rack1"},
{id: "node-c", diskType: "hdd", diskID: 3, dc: "dc2", rack: "rack1"},
}
var metrics []*types.VolumeHealthMetrics
metrics = append(metrics, makeVolumes("node-a", "hdd", "dc1", "rack1", "c1", 1, 50)...)
metrics = append(metrics, makeVolumes("node-b", "hdd", "dc1", "rack1", "c1", 100, 10)...)
// node-c is in dc2, should be excluded by filter
metrics = append(metrics, makeVolumes("node-c", "hdd", "dc2", "rack1", "c1", 200, 30)...)
// Only include metrics from dc1
dc1Metrics := make([]*types.VolumeHealthMetrics, 0)
for _, m := range metrics {
if m.DataCenter == "dc1" {
dc1Metrics = append(dc1Metrics, m)
}
}
at := buildTopology(servers, metrics)
clusterInfo := &types.ClusterInfo{ActiveTopology: at}
conf := defaultConf()
conf.DataCenterFilter = "dc1"
tasks, _, err := Detection(dc1Metrics, clusterInfo, conf, 100)
if err != nil {
t.Fatalf("Detection failed: %v", err)
}
// Ensure detection produced tasks so the following checks are not vacuous.
if len(tasks) == 0 {
t.Fatal("Expected balance tasks for 50/10 imbalance within dc1, got 0")
}
// With DC filter, only node-a and node-b are considered in topology seeding.
// node-c should never appear as source or destination.
for _, task := range tasks {
if task.Server == "node-c" {
t.Errorf("node-c (dc2) should not be a source with dc1 filter")
}
if task.TypedParams != nil {
for _, tgt := range task.TypedParams.Targets {
if strings.Contains(tgt.Node, "node-c") {
t.Errorf("node-c (dc2) should not be a target with dc1 filter")
}
}
}
}
if len(tasks) > 0 {
t.Logf("Created %d tasks within dc1 scope", len(tasks))
}
}
func TestDetection_NodeFilter(t *testing.T) {
servers := []serverSpec{
{id: "node-a", diskType: "hdd", diskID: 1, dc: "dc1", rack: "rack1"},
{id: "node-b", diskType: "hdd", diskID: 2, dc: "dc1", rack: "rack1"},
{id: "node-c", diskType: "hdd", diskID: 3, dc: "dc1", rack: "rack1"},
}
var metrics []*types.VolumeHealthMetrics
metrics = append(metrics, makeVolumes("node-a", "hdd", "dc1", "rack1", "c1", 1, 50)...)
metrics = append(metrics, makeVolumes("node-b", "hdd", "dc1", "rack1", "c1", 100, 10)...)
metrics = append(metrics, makeVolumes("node-c", "hdd", "dc1", "rack1", "c1", 200, 5)...)
// Only include metrics from node-a and node-b
filteredMetrics := make([]*types.VolumeHealthMetrics, 0)
for _, m := range metrics {
if m.Server == "node-a" || m.Server == "node-b" {
filteredMetrics = append(filteredMetrics, m)
}
}
at := buildTopology(servers, metrics)
clusterInfo := &types.ClusterInfo{ActiveTopology: at}
conf := defaultConf()
conf.NodeFilter = "node-a,node-b"
tasks, _, err := Detection(filteredMetrics, clusterInfo, conf, 100)
if err != nil {
t.Fatalf("Detection failed: %v", err)
}
// Ensure detection produced tasks so the following checks are not vacuous.
if len(tasks) == 0 {
t.Fatal("Expected balance tasks for 50/10 imbalance within node-a,node-b scope, got 0")
}
for _, task := range tasks {
if task.Server == "node-c" {
t.Errorf("node-c should not be a source with node filter")
}
if task.TypedParams != nil {
for _, tgt := range task.TypedParams.Targets {
if strings.Contains(tgt.Node, "node-c") {
t.Errorf("node-c should not be a target with node filter")
}
}
}
}
t.Logf("Created %d tasks within node-a,node-b scope", len(tasks))
}