Add volume.scrub and ec.scrub shell commands to scrub regular & EC volumes on demand. (#8188)

* Implement RPC skeleton for regular/EC volumes scrubbing.

See https://github.com/seaweedfs/seaweedfs/issues/8018 for details.

* Add `volume.scrub` and `ec.scrub` shell commands to scrub regular & EC volumes on demand.

F.ex:

```
> ec.scrub --full
Scrubbing 10.200.17.13:9005 (1/10)...
Scrubbing 10.200.17.13:9001 (2/10)...
Scrubbing 10.200.17.13:9008 (3/10)...
Scrubbing 10.200.17.13:9009 (4/10)...
Scrubbing 10.200.17.13:9004 (5/10)...
Scrubbing 10.200.17.13:9010 (6/10)...
Scrubbing 10.200.17.13:9007 (7/10)...
Scrubbing 10.200.17.13:9002 (8/10)...
Scrubbing 10.200.17.13:9003 (9/10)...
Scrubbing 10.200.17.13:9006 (10/10)...
Scrubbed 20 EC files and 20 volumes on 10 nodes

Got scrub failures on 1 EC volumes and 2 EC shards :(
Affected volumes: 10.200.17.13:9005:1
Details:
	[10.200.17.13:9005] expected 551041 bytes for needle 6, got 551072
	[10.200.17.13:9005] needles in volume file (1) don't match index entries (173) for volume 1
```
This commit is contained in:
Lisandro Pin
2026-02-05 02:08:31 +01:00
committed by GitHub
parent 7831257ed5
commit 2ecbae3611
3 changed files with 324 additions and 1 deletions

View File

@@ -164,7 +164,6 @@ func parseReplicaPlacementArg(commandEnv *CommandEnv, replicaStr string) (*super
}
func collectTopologyInfo(commandEnv *CommandEnv, delayBeforeCollecting time.Duration) (topoInfo *master_pb.TopologyInfo, volumeSizeLimitMb uint64, err error) {
if delayBeforeCollecting > 0 {
time.Sleep(delayBeforeCollecting)
}
@@ -179,7 +178,25 @@ func collectTopologyInfo(commandEnv *CommandEnv, delayBeforeCollecting time.Dura
}
return resp.TopologyInfo, resp.VolumeSizeLimitMb, nil
}
func collectDataNodes(commandEnv *CommandEnv, delayBeforeCollecting time.Duration) ([]*master_pb.DataNodeInfo, error) {
dataNodes := []*master_pb.DataNodeInfo{}
topo, _, err := collectTopologyInfo(commandEnv, delayBeforeCollecting)
if err != nil {
return nil, err
}
for _, dci := range topo.GetDataCenterInfos() {
for _, r := range dci.GetRackInfos() {
for _, dn := range r.GetDataNodeInfos() {
dataNodes = append(dataNodes, dn)
}
}
}
return dataNodes, nil
}
func collectEcNodesForDC(commandEnv *CommandEnv, selectedDataCenter string, diskType types.DiskType) (ecNodes []*EcNode, totalFreeEcSlots int, err error) {