shell: volume balance follows replica placement
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"sort"
|
"sort"
|
||||||
@@ -83,6 +84,7 @@ func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer
|
|||||||
}
|
}
|
||||||
|
|
||||||
typeToNodes := collectVolumeServersByType(resp.TopologyInfo, *dc)
|
typeToNodes := collectVolumeServersByType(resp.TopologyInfo, *dc)
|
||||||
|
volumeReplicas, _ := collectVolumeReplicaLocations(resp)
|
||||||
|
|
||||||
for maxVolumeCount, volumeServers := range typeToNodes {
|
for maxVolumeCount, volumeServers := range typeToNodes {
|
||||||
if len(volumeServers) < 2 {
|
if len(volumeServers) < 2 {
|
||||||
@@ -95,16 +97,16 @@ func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, c := range collections {
|
for _, c := range collections {
|
||||||
if err = balanceVolumeServers(commandEnv, volumeServers, resp.VolumeSizeLimitMb*1024*1024, c, *applyBalancing); err != nil {
|
if err = balanceVolumeServers(commandEnv, volumeReplicas, volumeServers, resp.VolumeSizeLimitMb*1024*1024, c, *applyBalancing); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if *collection == "ALL_COLLECTIONS" {
|
} else if *collection == "ALL_COLLECTIONS" {
|
||||||
if err = balanceVolumeServers(commandEnv, volumeServers, resp.VolumeSizeLimitMb*1024*1024, "ALL_COLLECTIONS", *applyBalancing); err != nil {
|
if err = balanceVolumeServers(commandEnv, volumeReplicas, volumeServers, resp.VolumeSizeLimitMb*1024*1024, "ALL_COLLECTIONS", *applyBalancing); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if err = balanceVolumeServers(commandEnv, volumeServers, resp.VolumeSizeLimitMb*1024*1024, *collection, *applyBalancing); err != nil {
|
if err = balanceVolumeServers(commandEnv, volumeReplicas, volumeServers, resp.VolumeSizeLimitMb*1024*1024, *collection, *applyBalancing); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -113,7 +115,7 @@ func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func balanceVolumeServers(commandEnv *CommandEnv, nodes []*Node, volumeSizeLimit uint64, collection string, applyBalancing bool) error {
|
func balanceVolumeServers(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, volumeSizeLimit uint64, collection string, applyBalancing bool) error {
|
||||||
|
|
||||||
// balance writable volumes
|
// balance writable volumes
|
||||||
for _, n := range nodes {
|
for _, n := range nodes {
|
||||||
@@ -126,7 +128,7 @@ func balanceVolumeServers(commandEnv *CommandEnv, nodes []*Node, volumeSizeLimit
|
|||||||
return !v.ReadOnly && v.Size < volumeSizeLimit
|
return !v.ReadOnly && v.Size < volumeSizeLimit
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
if err := balanceSelectedVolume(commandEnv, nodes, sortWritableVolumes, applyBalancing); err != nil {
|
if err := balanceSelectedVolume(commandEnv, volumeReplicas, nodes, sortWritableVolumes, applyBalancing); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -141,7 +143,7 @@ func balanceVolumeServers(commandEnv *CommandEnv, nodes []*Node, volumeSizeLimit
|
|||||||
return v.ReadOnly || v.Size >= volumeSizeLimit
|
return v.ReadOnly || v.Size >= volumeSizeLimit
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
if err := balanceSelectedVolume(commandEnv, nodes, sortReadOnlyVolumes, applyBalancing); err != nil {
|
if err := balanceSelectedVolume(commandEnv, volumeReplicas, nodes, sortReadOnlyVolumes, applyBalancing); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -186,7 +188,7 @@ func sortReadOnlyVolumes(volumes []*master_pb.VolumeInformationMessage) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func balanceSelectedVolume(commandEnv *CommandEnv, nodes []*Node, sortCandidatesFn func(volumes []*master_pb.VolumeInformationMessage), applyBalancing bool) (err error) {
|
func balanceSelectedVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, sortCandidatesFn func(volumes []*master_pb.VolumeInformationMessage), applyBalancing bool) (err error) {
|
||||||
selectedVolumeCount := 0
|
selectedVolumeCount := 0
|
||||||
for _, dn := range nodes {
|
for _, dn := range nodes {
|
||||||
selectedVolumeCount += len(dn.selectedVolumes)
|
selectedVolumeCount += len(dn.selectedVolumes)
|
||||||
@@ -216,7 +218,7 @@ func balanceSelectedVolume(commandEnv *CommandEnv, nodes []*Node, sortCandidates
|
|||||||
// no more volume servers with empty slots
|
// no more volume servers with empty slots
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
hasMoved, err = attemptToMoveOneVolume(commandEnv, fullNode, candidateVolumes, emptyNode, applyBalancing)
|
hasMoved, err = attemptToMoveOneVolume(commandEnv, volumeReplicas, fullNode, candidateVolumes, emptyNode, applyBalancing)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -229,13 +231,12 @@ func balanceSelectedVolume(commandEnv *CommandEnv, nodes []*Node, sortCandidates
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func attemptToMoveOneVolume(commandEnv *CommandEnv, fullNode *Node, candidateVolumes []*master_pb.VolumeInformationMessage, emptyNode *Node, applyBalancing bool) (hasMoved bool, err error) {
|
func attemptToMoveOneVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, fullNode *Node, candidateVolumes []*master_pb.VolumeInformationMessage, emptyNode *Node, applyBalancing bool) (hasMoved bool, err error) {
|
||||||
|
|
||||||
for _, v := range candidateVolumes {
|
for _, v := range candidateVolumes {
|
||||||
if v.ReplicaPlacement > 0 {
|
if v.ReplicaPlacement > 0 {
|
||||||
if fullNode.dc != emptyNode.dc && fullNode.rack != emptyNode.rack {
|
replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(v.ReplicaPlacement))
|
||||||
// TODO this logic is too simple, but should work most of the time
|
if !isGoodMove(replicaPlacement, volumeReplicas[v.Id], fullNode, emptyNode) {
|
||||||
// Need a correct algorithm to handle all different cases
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -273,3 +274,41 @@ func (node *Node) selectVolumes(fn func(v *master_pb.VolumeInformationMessage) b
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isGoodMove(placement *super_block.ReplicaPlacement, existingReplicas []*VolumeReplica, sourceNode, targetNode *Node) bool {
|
||||||
|
for _, replica := range existingReplicas {
|
||||||
|
if replica.location.dataNode.Id == targetNode.info.Id &&
|
||||||
|
replica.location.rack == targetNode.rack &&
|
||||||
|
replica.location.dc == targetNode.dc {
|
||||||
|
// never move to existing nodes
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dcs, racks := make(map[string]bool), make(map[string]int)
|
||||||
|
for _, replica := range existingReplicas {
|
||||||
|
if replica.location.dataNode.Id != sourceNode.info.Id {
|
||||||
|
dcs[replica.location.DataCenter()] = true
|
||||||
|
racks[replica.location.Rack()]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dcs[targetNode.dc] = true
|
||||||
|
racks[fmt.Sprintf("%s %s", targetNode.dc, targetNode.rack)]++
|
||||||
|
|
||||||
|
if len(dcs) > placement.DiffDataCenterCount+1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(racks) > placement.DiffRackCount+1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sameRackCount := range racks {
|
||||||
|
if sameRackCount > placement.SameRackCount+1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
|
||||||
|
}
|
||||||
|
|||||||
139
weed/shell/command_volume_balance_test.go
Normal file
139
weed/shell/command_volume_balance_test.go
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
package shell
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/pb/master_pb"
|
||||||
|
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
|
||||||
|
)
|
||||||
|
|
||||||
|
type testMoveCase struct {
|
||||||
|
name string
|
||||||
|
replication string
|
||||||
|
replicas []*VolumeReplica
|
||||||
|
sourceLocation location
|
||||||
|
targetLocation location
|
||||||
|
expected bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsGoodMove(t *testing.T) {
|
||||||
|
|
||||||
|
var tests = []testMoveCase{
|
||||||
|
{
|
||||||
|
name: "test move to the same node",
|
||||||
|
replication: "001",
|
||||||
|
replicas: []*VolumeReplica{
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
sourceLocation: location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
targetLocation: location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
name: "test move to the same rack, but existing node",
|
||||||
|
replication: "001",
|
||||||
|
replicas: []*VolumeReplica{
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
sourceLocation: location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
targetLocation: location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
name: "test move to the same rack, a new node",
|
||||||
|
replication: "001",
|
||||||
|
replicas: []*VolumeReplica{
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
sourceLocation: location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
targetLocation: location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn3"}},
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
name: "test 010 move all to the same rack",
|
||||||
|
replication: "010",
|
||||||
|
replicas: []*VolumeReplica{
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
sourceLocation: location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
targetLocation: location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn3"}},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
name: "test 010 move to spread racks",
|
||||||
|
replication: "010",
|
||||||
|
replicas: []*VolumeReplica{
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
sourceLocation: location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
targetLocation: location{"dc1", "r3", &master_pb.DataNodeInfo{Id: "dn3"}},
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
name: "test 010 move to spread racks",
|
||||||
|
replication: "010",
|
||||||
|
replicas: []*VolumeReplica{
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
location: &location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
sourceLocation: location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
|
||||||
|
targetLocation: location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn3"}},
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
replicaPlacement, _ := super_block.NewReplicaPlacementFromString(tt.replication)
|
||||||
|
println("replication:", tt.replication, "expected", tt.expected, "name:", tt.name)
|
||||||
|
sourceNode := &Node{
|
||||||
|
info: tt.sourceLocation.dataNode,
|
||||||
|
dc: tt.sourceLocation.dc,
|
||||||
|
rack: tt.sourceLocation.rack,
|
||||||
|
}
|
||||||
|
targetNode := &Node{
|
||||||
|
info: tt.targetLocation.dataNode,
|
||||||
|
dc: tt.targetLocation.dc,
|
||||||
|
rack: tt.targetLocation.rack,
|
||||||
|
}
|
||||||
|
if isGoodMove(replicaPlacement, tt.replicas, sourceNode, targetNode) != tt.expected {
|
||||||
|
t.Errorf("%s: expect %v move from %v to %s, replication:%v",
|
||||||
|
tt.name, tt.expected, tt.sourceLocation, tt.targetLocation, tt.replication)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -66,18 +66,7 @@ func (c *commandVolumeFixReplication) Do(args []string, commandEnv *CommandEnv,
|
|||||||
|
|
||||||
// find all volumes that needs replication
|
// find all volumes that needs replication
|
||||||
// collect all data nodes
|
// collect all data nodes
|
||||||
volumeReplicas := make(map[uint32][]*VolumeReplica)
|
volumeReplicas, allLocations := collectVolumeReplicaLocations(resp)
|
||||||
var allLocations []location
|
|
||||||
eachDataNode(resp.TopologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
|
|
||||||
loc := newLocation(dc, string(rack), dn)
|
|
||||||
for _, v := range dn.VolumeInfos {
|
|
||||||
volumeReplicas[v.Id] = append(volumeReplicas[v.Id], &VolumeReplica{
|
|
||||||
location: &loc,
|
|
||||||
info: v,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
allLocations = append(allLocations, loc)
|
|
||||||
})
|
|
||||||
|
|
||||||
if len(allLocations) == 0 {
|
if len(allLocations) == 0 {
|
||||||
return fmt.Errorf("no data nodes at all")
|
return fmt.Errorf("no data nodes at all")
|
||||||
@@ -111,6 +100,22 @@ func (c *commandVolumeFixReplication) Do(args []string, commandEnv *CommandEnv,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func collectVolumeReplicaLocations(resp *master_pb.VolumeListResponse) (map[uint32][]*VolumeReplica, []location) {
|
||||||
|
volumeReplicas := make(map[uint32][]*VolumeReplica)
|
||||||
|
var allLocations []location
|
||||||
|
eachDataNode(resp.TopologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
|
||||||
|
loc := newLocation(dc, string(rack), dn)
|
||||||
|
for _, v := range dn.VolumeInfos {
|
||||||
|
volumeReplicas[v.Id] = append(volumeReplicas[v.Id], &VolumeReplica{
|
||||||
|
location: &loc,
|
||||||
|
info: v,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
allLocations = append(allLocations, loc)
|
||||||
|
})
|
||||||
|
return volumeReplicas, allLocations
|
||||||
|
}
|
||||||
|
|
||||||
func (c *commandVolumeFixReplication) fixOverReplicatedVolumes(commandEnv *CommandEnv, writer io.Writer, takeAction bool, overReplicatedVolumeIds []uint32, volumeReplicas map[uint32][]*VolumeReplica, allLocations []location) error {
|
func (c *commandVolumeFixReplication) fixOverReplicatedVolumes(commandEnv *CommandEnv, writer io.Writer, takeAction bool, overReplicatedVolumeIds []uint32, volumeReplicas map[uint32][]*VolumeReplica, allLocations []location) error {
|
||||||
for _, vid := range overReplicatedVolumeIds {
|
for _, vid := range overReplicatedVolumeIds {
|
||||||
replicas := volumeReplicas[vid]
|
replicas := volumeReplicas[vid]
|
||||||
|
|||||||
Reference in New Issue
Block a user