mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-05-30 05:30:23 +00:00
* fix(shell): volume.balance no longer drains all volumes onto one server The density-based capacity function reads per-disk VolumeInfos sizes, but adjustAfterMove only updated VolumeCount and the selectedVolumes map. The planner re-read a stale topology after every move, so the source node's density never dropped and it kept moving volumes until that node was empty. Move the volume's size accounting between disks after each planned move so the density recomputes and the loop converges to an even distribution. * refactor(shell): O(1) volume removal and direct disk lookup in adjustAfterMove removeVolumeInfo swaps with the last element instead of shifting, and the disk is fetched by key rather than ranging the DiskInfos map.
608 lines
20 KiB
Go
608 lines
20 KiB
Go
package shell
|
|
|
|
import (
|
|
"cmp"
|
|
"flag"
|
|
"fmt"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
"io"
|
|
"os"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"slices"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/types"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
)
|
|
|
|
const (
|
|
thresholdVolumeSize = 1.01
|
|
countZeroSelectedVolumes = 0.5
|
|
)
|
|
|
|
func init() {
|
|
Commands = append(Commands, &commandVolumeBalance{})
|
|
}
|
|
|
|
type commandVolumeBalance struct {
|
|
volumeSizeLimitMb uint64
|
|
commandEnv *CommandEnv
|
|
volumeByActive *bool
|
|
applyBalancing bool
|
|
}
|
|
|
|
func (c *commandVolumeBalance) Name() string {
|
|
return "volume.balance"
|
|
}
|
|
|
|
func (c *commandVolumeBalance) Help() string {
|
|
return `balance all volumes among volume servers
|
|
|
|
volume.balance [-collection ALL_COLLECTIONS|EACH_COLLECTION|<collection_name>] [-apply] [-dataCenter=<data_center_name>] [-racks=rack_name_one,rack_name_two] [-nodes=192.168.0.1:8080,192.168.0.2:8080]
|
|
|
|
The -collection parameter supports:
|
|
- ALL_COLLECTIONS: balance across all collections
|
|
- EACH_COLLECTION: balance each collection separately
|
|
- Regular expressions for pattern matching:
|
|
* Use exact match: volume.balance -collection="^mybucket$"
|
|
* Match multiple buckets: volume.balance -collection="bucket.*"
|
|
* Match all user collections: volume.balance -collection="user-.*"
|
|
|
|
Algorithm:
|
|
|
|
For each type of volume server (different max volume count limit){
|
|
for each collection {
|
|
balanceWritableVolumes()
|
|
balanceReadOnlyVolumes()
|
|
}
|
|
}
|
|
|
|
func balanceWritableVolumes(){
|
|
idealWritableVolumeRatio = totalWritableVolumes / totalNumberOfMaxVolumes
|
|
for hasMovedOneVolume {
|
|
sort all volume servers ordered by the localWritableVolumeRatio = localWritableVolumes to localVolumeMax
|
|
pick the volume server B with the highest localWritableVolumeRatio y
|
|
for any the volume server A with the number of writable volumes x + 1 <= idealWritableVolumeRatio * localVolumeMax {
|
|
if y > localWritableVolumeRatio {
|
|
if B has a writable volume id v that A does not have, and satisfy v replication requirements {
|
|
move writable volume v from A to B
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
func balanceReadOnlyVolumes(){
|
|
//similar to balanceWritableVolumes
|
|
}
|
|
|
|
`
|
|
}
|
|
|
|
func (c *commandVolumeBalance) HasTag(CommandTag) bool {
|
|
return false
|
|
}
|
|
|
|
func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
|
|
allowedVolumeBy := map[string]*bool{
|
|
"ALL": nil,
|
|
"ACTIVE": new(bool),
|
|
"FULL": new(bool),
|
|
}
|
|
*allowedVolumeBy["ACTIVE"] = true
|
|
balanceCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
|
|
verbose := balanceCommand.Bool("v", false, "verbose mode")
|
|
collection := balanceCommand.String("collection", "ALL_COLLECTIONS", "collection name, or use \"ALL_COLLECTIONS\" across collections, \"EACH_COLLECTION\" for each collection")
|
|
dc := balanceCommand.String("dataCenter", "", "only apply the balancing for this dataCenter")
|
|
racks := balanceCommand.String("racks", "", "only apply the balancing for this racks")
|
|
nodes := balanceCommand.String("nodes", "", "only apply the balancing for this nodes")
|
|
noLock := balanceCommand.Bool("noLock", false, "do not lock the admin shell at one's own risk")
|
|
applyBalancing := balanceCommand.Bool("apply", false, "apply the balancing plan.")
|
|
// TODO: remove this alias
|
|
applyBalancingAlias := balanceCommand.Bool("force", false, "apply the balancing plan (alias for -apply)")
|
|
balanceCommand.Func("volumeBy", "only apply the balancing for ALL volumes and ACTIVE or FULL", func(flagValue string) error {
|
|
if flagValue == "" {
|
|
return nil
|
|
}
|
|
for allowed, volumeBy := range allowedVolumeBy {
|
|
if flagValue == allowed {
|
|
c.volumeByActive = volumeBy
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("use \"ALL\", \"ACTIVE\" or \"FULL\"")
|
|
})
|
|
if err = balanceCommand.Parse(args); err != nil {
|
|
return nil
|
|
}
|
|
handleDeprecatedForceFlag(writer, balanceCommand, applyBalancingAlias, applyBalancing)
|
|
c.applyBalancing = *applyBalancing
|
|
|
|
infoAboutSimulationMode(writer, c.applyBalancing, "-apply")
|
|
|
|
if *noLock {
|
|
commandEnv.noLock = true
|
|
} else {
|
|
if err = commandEnv.confirmIsLocked(args); err != nil {
|
|
return
|
|
}
|
|
}
|
|
commandEnv.verbose = *verbose
|
|
c.commandEnv = commandEnv
|
|
|
|
// collect topology information
|
|
var topologyInfo *master_pb.TopologyInfo
|
|
topologyInfo, c.volumeSizeLimitMb, err = collectTopologyInfo(commandEnv, 5*time.Second)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
volumeServers := collectVolumeServersByDcRackNode(topologyInfo, *dc, *racks, *nodes)
|
|
volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo)
|
|
diskTypes := collectVolumeDiskTypes(topologyInfo)
|
|
|
|
if *collection == "EACH_COLLECTION" {
|
|
collections, err := ListCollectionNames(commandEnv, true, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, col := range collections {
|
|
// Use direct string comparison for exact match (more efficient than regex)
|
|
if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, nil, col); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
} else if *collection == "ALL_COLLECTIONS" {
|
|
// Pass nil pattern for all collections
|
|
if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, nil, *collection); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
// Compile user-provided pattern
|
|
collectionPattern, err := compileCollectionPattern(*collection)
|
|
if err != nil {
|
|
return fmt.Errorf("invalid collection pattern '%s': %v", *collection, err)
|
|
}
|
|
if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, collectionPattern, *collection); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *commandVolumeBalance) balanceVolumeServers(diskTypes []types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, collectionPattern *regexp.Regexp, collectionName string) error {
|
|
for _, diskType := range diskTypes {
|
|
if err := c.balanceVolumeServersByDiskType(diskType, volumeReplicas, nodes, collectionPattern, collectionName); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *commandVolumeBalance) balanceVolumeServersByDiskType(diskType types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, collectionPattern *regexp.Regexp, collectionName string) error {
|
|
for _, n := range nodes {
|
|
n.selectVolumes(func(v *master_pb.VolumeInformationMessage) bool {
|
|
if collectionName != "ALL_COLLECTIONS" {
|
|
if collectionPattern != nil {
|
|
// Use regex pattern matching
|
|
if !collectionPattern.MatchString(v.Collection) {
|
|
return false
|
|
}
|
|
} else {
|
|
// Use exact string matching (for EACH_COLLECTION)
|
|
if v.Collection != collectionName {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
if v.DiskType != string(diskType) {
|
|
return false
|
|
}
|
|
return selectVolumesByActive(v.Size, c.volumeByActive, c.volumeSizeLimitMb)
|
|
})
|
|
}
|
|
if err := balanceSelectedVolume(c.commandEnv, diskType, volumeReplicas, nodes, sortWritableVolumes, c.volumeSizeLimitMb, c.applyBalancing); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// splitCSVSet parses a comma-separated list into a set for exact-match filtering.
|
|
// Whitespace around items is trimmed and empty items are skipped, so callers
|
|
// can use len(set) > 0 to test whether any filter was specified.
|
|
func splitCSVSet(csv string) map[string]bool {
|
|
set := make(map[string]bool)
|
|
for _, item := range strings.Split(csv, ",") {
|
|
if item = strings.TrimSpace(item); item != "" {
|
|
set[item] = true
|
|
}
|
|
}
|
|
return set
|
|
}
|
|
|
|
func collectVolumeServersByDcRackNode(t *master_pb.TopologyInfo, selectedDataCenter string, selectedRacks string, selectedNodes string) (nodes []*Node) {
|
|
rackSet := splitCSVSet(selectedRacks)
|
|
nodeSet := splitCSVSet(selectedNodes)
|
|
for _, dc := range t.DataCenterInfos {
|
|
if selectedDataCenter != "" && dc.Id != selectedDataCenter {
|
|
continue
|
|
}
|
|
for _, r := range dc.RackInfos {
|
|
if len(rackSet) > 0 && !rackSet[r.Id] {
|
|
continue
|
|
}
|
|
for _, dn := range r.DataNodeInfos {
|
|
if len(nodeSet) > 0 && !nodeSet[dn.Id] {
|
|
continue
|
|
}
|
|
nodes = append(nodes, &Node{
|
|
info: dn,
|
|
dc: dc.Id,
|
|
rack: r.Id,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func collectVolumeDiskTypes(t *master_pb.TopologyInfo) (diskTypes []types.DiskType) {
|
|
knownTypes := make(map[string]bool)
|
|
for _, dc := range t.DataCenterInfos {
|
|
for _, r := range dc.RackInfos {
|
|
for _, dn := range r.DataNodeInfos {
|
|
for diskType := range dn.DiskInfos {
|
|
if _, found := knownTypes[diskType]; !found {
|
|
knownTypes[diskType] = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for diskType := range knownTypes {
|
|
diskTypes = append(diskTypes, types.ToDiskType(diskType))
|
|
}
|
|
return
|
|
}
|
|
|
|
type Node struct {
|
|
info *master_pb.DataNodeInfo
|
|
selectedVolumes map[uint32]*master_pb.VolumeInformationMessage
|
|
dc string
|
|
rack string
|
|
}
|
|
|
|
type CapacityFunc func(*master_pb.DataNodeInfo) float64
|
|
type DensityFunc func(*master_pb.DataNodeInfo) (float64, uint64)
|
|
|
|
func capacityByMinVolumeDensity(diskType types.DiskType, volumeSizeLimitMb uint64) DensityFunc {
|
|
return func(info *master_pb.DataNodeInfo) (float64, uint64) {
|
|
diskInfo, found := info.DiskInfos[string(diskType)]
|
|
if !found {
|
|
return 0, 0
|
|
}
|
|
var volumeSizes uint64
|
|
for _, volumeInfo := range diskInfo.VolumeInfos {
|
|
volumeSizes += volumeInfo.Size
|
|
}
|
|
if volumeSizeLimitMb == 0 {
|
|
volumeSizeLimitMb = util.VolumeSizeLimitGB * util.KiByte
|
|
}
|
|
usedVolumeCount := volumeSizes / (volumeSizeLimitMb * util.MiByte)
|
|
return float64(diskInfo.MaxVolumeCount - int64(usedVolumeCount)), usedVolumeCount
|
|
}
|
|
}
|
|
|
|
func capacityByMaxVolumeCount(diskType types.DiskType) CapacityFunc {
|
|
return func(info *master_pb.DataNodeInfo) float64 {
|
|
diskInfo, found := info.DiskInfos[string(diskType)]
|
|
if !found {
|
|
return 0
|
|
}
|
|
var ecShardCount int
|
|
for _, ecShardInfo := range diskInfo.EcShardInfos {
|
|
ecShardCount += erasure_coding.GetShardCount(ecShardInfo)
|
|
}
|
|
return float64(diskInfo.MaxVolumeCount) - float64(ecShardCount)/erasure_coding.DataShardsCount
|
|
}
|
|
}
|
|
|
|
func capacityByFreeVolumeCount(diskType types.DiskType) CapacityFunc {
|
|
return func(info *master_pb.DataNodeInfo) float64 {
|
|
diskInfo, found := info.DiskInfos[string(diskType)]
|
|
if !found {
|
|
return 0
|
|
}
|
|
var ecShardCount int
|
|
for _, ecShardInfo := range diskInfo.EcShardInfos {
|
|
ecShardCount += erasure_coding.GetShardCount(ecShardInfo)
|
|
}
|
|
return float64(diskInfo.MaxVolumeCount-diskInfo.VolumeCount) - float64(ecShardCount)/erasure_coding.DataShardsCount
|
|
}
|
|
}
|
|
|
|
func (n *Node) localVolumeDensityRatio(capacityFunc DensityFunc) float64 {
|
|
capacity, selectedVolumes := capacityFunc(n.info)
|
|
if capacity == 0 {
|
|
return 0
|
|
}
|
|
if selectedVolumes == 0 {
|
|
return countZeroSelectedVolumes / capacity
|
|
}
|
|
return float64(selectedVolumes) / capacity
|
|
}
|
|
|
|
func (n *Node) localVolumeDensityNextRatio(capacityFunc DensityFunc) float64 {
|
|
capacity, selectedVolumes := capacityFunc(n.info)
|
|
if capacity == 0 {
|
|
return 0
|
|
}
|
|
return float64(selectedVolumes+1) / capacity
|
|
}
|
|
|
|
func (n *Node) localVolumeRatio(capacityFunc CapacityFunc) float64 {
|
|
return float64(len(n.selectedVolumes)) / capacityFunc(n.info)
|
|
}
|
|
|
|
func (n *Node) isOneVolumeOnly() bool {
|
|
if len(n.selectedVolumes) != 1 {
|
|
return false
|
|
}
|
|
for _, disk := range n.info.DiskInfos {
|
|
if disk.VolumeCount == 1 && disk.MaxVolumeCount == 1 {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (n *Node) selectVolumes(fn func(v *master_pb.VolumeInformationMessage) bool) {
|
|
n.selectedVolumes = make(map[uint32]*master_pb.VolumeInformationMessage)
|
|
for _, diskInfo := range n.info.DiskInfos {
|
|
for _, v := range diskInfo.VolumeInfos {
|
|
if fn(v) {
|
|
n.selectedVolumes[v.Id] = v
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func sortWritableVolumes(volumes []*master_pb.VolumeInformationMessage) {
|
|
slices.SortFunc(volumes, func(a, b *master_pb.VolumeInformationMessage) int {
|
|
return cmp.Compare(a.Size, b.Size)
|
|
})
|
|
}
|
|
|
|
func selectVolumesByActive(volumeSize uint64, volumeByActive *bool, volumeSizeLimitMb uint64) bool {
|
|
if volumeByActive == nil {
|
|
return true
|
|
}
|
|
if uint64(float64(volumeSize)*thresholdVolumeSize) < volumeSizeLimitMb*util.MiByte {
|
|
return *volumeByActive
|
|
} else {
|
|
return !(*volumeByActive)
|
|
}
|
|
}
|
|
|
|
func balanceSelectedVolume(commandEnv *CommandEnv, diskType types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, sortCandidatesFn func(volumes []*master_pb.VolumeInformationMessage), volumeSizeLimitMb uint64, applyBalancing bool) (err error) {
|
|
selectedVolumeCount, volumeCapacities := uint64(0), float64(0)
|
|
var nodesWithCapacity []*Node
|
|
if volumeSizeLimitMb == 0 {
|
|
volumeSizeLimitMb = util.VolumeSizeLimitGB * util.KiByte
|
|
}
|
|
capacityFunc := capacityByMinVolumeDensity(diskType, volumeSizeLimitMb)
|
|
for _, dn := range nodes {
|
|
capacity, volumeCount := capacityFunc(dn.info)
|
|
if capacity > 0 {
|
|
nodesWithCapacity = append(nodesWithCapacity, dn)
|
|
}
|
|
volumeCapacities += capacity
|
|
selectedVolumeCount += volumeCount
|
|
}
|
|
if volumeCapacities == 0 {
|
|
return nil
|
|
}
|
|
idealVolumeRatio := float64(selectedVolumeCount) / volumeCapacities
|
|
|
|
hasMoved := true
|
|
|
|
if commandEnv != nil && commandEnv.verbose {
|
|
fmt.Fprintf(os.Stdout, "selected nodes %d, volumes:%d, cap:%d, idealVolumeRatio %f\n", len(nodesWithCapacity), selectedVolumeCount, int64(volumeCapacities), idealVolumeRatio*100)
|
|
}
|
|
for hasMoved {
|
|
hasMoved = false
|
|
slices.SortFunc(nodesWithCapacity, func(a, b *Node) int {
|
|
return cmp.Compare(a.localVolumeDensityRatio(capacityFunc), b.localVolumeDensityRatio(capacityFunc))
|
|
})
|
|
if len(nodesWithCapacity) == 0 {
|
|
if commandEnv != nil && commandEnv.verbose {
|
|
fmt.Fprintf(os.Stdout, "no volume server found with capacity for %s", diskType.ReadableString())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var fullNode *Node
|
|
var fullNodeIndex int
|
|
for fullNodeIndex = len(nodesWithCapacity) - 1; fullNodeIndex >= 0; fullNodeIndex-- {
|
|
fullNode = nodesWithCapacity[fullNodeIndex]
|
|
if len(fullNode.selectedVolumes) == 0 {
|
|
continue
|
|
}
|
|
if !fullNode.isOneVolumeOnly() {
|
|
break
|
|
}
|
|
}
|
|
var candidateVolumes []*master_pb.VolumeInformationMessage
|
|
for _, v := range fullNode.selectedVolumes {
|
|
candidateVolumes = append(candidateVolumes, v)
|
|
}
|
|
if fullNodeIndex == -1 {
|
|
if commandEnv != nil && commandEnv.verbose {
|
|
fmt.Fprintf(os.Stdout, "no nodes with capacity found for %s, nodes %d", diskType.ReadableString(), len(nodesWithCapacity))
|
|
}
|
|
return nil
|
|
}
|
|
sortCandidatesFn(candidateVolumes)
|
|
for _, emptyNode := range nodesWithCapacity[:fullNodeIndex] {
|
|
if !(fullNode.localVolumeDensityNextRatio(capacityFunc) > idealVolumeRatio && emptyNode.localVolumeDensityNextRatio(capacityFunc) <= idealVolumeRatio) {
|
|
if commandEnv != nil && commandEnv.verbose {
|
|
fmt.Printf("no more volume servers with empty slots %s, idealVolumeRatio %f\n", emptyNode.info.Id, idealVolumeRatio)
|
|
}
|
|
break
|
|
}
|
|
fmt.Fprintf(os.Stdout, "%s %.2f %.2f:%.2f\t", diskType.ReadableString(), idealVolumeRatio,
|
|
fullNode.localVolumeDensityRatio(capacityFunc), emptyNode.localVolumeDensityNextRatio(capacityFunc))
|
|
if commandEnv != nil && commandEnv.verbose {
|
|
fmt.Fprintf(os.Stdout, "%s %.1f %.1f:%.1f\t", diskType.ReadableString(), idealVolumeRatio*100,
|
|
fullNode.localVolumeDensityRatio(capacityFunc)*100, emptyNode.localVolumeDensityNextRatio(capacityFunc)*100)
|
|
}
|
|
hasMoved, err = attemptToMoveOneVolume(commandEnv, volumeReplicas, fullNode, candidateVolumes, emptyNode, applyBalancing)
|
|
if err != nil {
|
|
if commandEnv != nil && commandEnv.verbose {
|
|
fmt.Fprintf(os.Stdout, "attempt to move one volume error %+v\n", err)
|
|
}
|
|
if strings.Contains(err.Error(), util.ErrVolumeNoSpaceLeft) {
|
|
continue
|
|
}
|
|
return
|
|
}
|
|
if hasMoved {
|
|
// moved one volume
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func attemptToMoveOneVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, fullNode *Node, candidateVolumes []*master_pb.VolumeInformationMessage, emptyNode *Node, applyBalancing bool) (hasMoved bool, err error) {
|
|
|
|
for _, v := range candidateVolumes {
|
|
hasMoved, err = maybeMoveOneVolume(commandEnv, volumeReplicas, fullNode, v, emptyNode, applyBalancing)
|
|
if err != nil {
|
|
return
|
|
}
|
|
if hasMoved {
|
|
break
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func maybeMoveOneVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, fullNode *Node, candidateVolume *master_pb.VolumeInformationMessage, emptyNode *Node, applyChange bool) (hasMoved bool, err error) {
|
|
if !commandEnv.isLocked() {
|
|
return false, fmt.Errorf("lock is lost")
|
|
}
|
|
|
|
if candidateVolume.RemoteStorageName != "" {
|
|
return false, fmt.Errorf("does not move volume in remote storage")
|
|
}
|
|
|
|
if candidateVolume.ReplicaPlacement > 0 {
|
|
replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(candidateVolume.ReplicaPlacement))
|
|
if !isGoodMove(replicaPlacement, volumeReplicas[candidateVolume.Id], fullNode, emptyNode) {
|
|
return false, nil
|
|
}
|
|
}
|
|
if _, found := emptyNode.selectedVolumes[candidateVolume.Id]; !found {
|
|
if err = moveVolume(commandEnv, candidateVolume, fullNode, emptyNode, applyChange); err == nil {
|
|
adjustAfterMove(candidateVolume, volumeReplicas, fullNode, emptyNode)
|
|
return true, nil
|
|
} else {
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func moveVolume(commandEnv *CommandEnv, v *master_pb.VolumeInformationMessage, fullNode *Node, emptyNode *Node, applyChange bool) error {
|
|
collectionPrefix := v.Collection + "_"
|
|
if v.Collection == "" {
|
|
collectionPrefix = ""
|
|
}
|
|
fmt.Fprintf(os.Stdout, " moving %s volume %s%d %s => %s\n", v.DiskType, collectionPrefix, v.Id, fullNode.info.Id, emptyNode.info.Id)
|
|
if applyChange {
|
|
return LiveMoveVolume(commandEnv.option.GrpcDialOption, os.Stderr, needle.VolumeId(v.Id), pb.NewServerAddressFromDataNode(fullNode.info), pb.NewServerAddressFromDataNode(emptyNode.info), 5*time.Second, v.DiskType, 0, v.ReadOnly)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func isGoodMove(placement *super_block.ReplicaPlacement, existingReplicas []*VolumeReplica, sourceNode, targetNode *Node) bool {
|
|
for _, replica := range existingReplicas {
|
|
if replica.location.dataNode.Id == targetNode.info.Id &&
|
|
replica.location.rack == targetNode.rack &&
|
|
replica.location.dc == targetNode.dc {
|
|
// never move to existing nodes
|
|
return false
|
|
}
|
|
}
|
|
|
|
// existing replicas except the one on sourceNode
|
|
existingReplicasExceptSourceNode := make([]*VolumeReplica, 0)
|
|
for _, replica := range existingReplicas {
|
|
if replica.location.dataNode.Id != sourceNode.info.Id {
|
|
existingReplicasExceptSourceNode = append(existingReplicasExceptSourceNode, replica)
|
|
}
|
|
}
|
|
|
|
// target location
|
|
targetLocation := location{
|
|
dc: targetNode.dc,
|
|
rack: targetNode.rack,
|
|
dataNode: targetNode.info,
|
|
}
|
|
|
|
// check if this satisfies replication requirements
|
|
return satisfyReplicaPlacement(placement, existingReplicasExceptSourceNode, targetLocation)
|
|
}
|
|
|
|
func removeVolumeInfo(diskInfo *master_pb.DiskInfo, volumeId uint32) {
|
|
for i, volumeInfo := range diskInfo.VolumeInfos {
|
|
if volumeInfo.Id == volumeId {
|
|
// order does not matter here, so swap with the last and truncate
|
|
last := len(diskInfo.VolumeInfos) - 1
|
|
diskInfo.VolumeInfos[i] = diskInfo.VolumeInfos[last]
|
|
diskInfo.VolumeInfos[last] = nil
|
|
diskInfo.VolumeInfos = diskInfo.VolumeInfos[:last]
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func adjustAfterMove(v *master_pb.VolumeInformationMessage, volumeReplicas map[uint32][]*VolumeReplica, fullNode *Node, emptyNode *Node) {
|
|
delete(fullNode.selectedVolumes, v.Id)
|
|
if emptyNode.selectedVolumes != nil {
|
|
emptyNode.selectedVolumes[v.Id] = v
|
|
}
|
|
existingReplicas := volumeReplicas[v.Id]
|
|
for _, replica := range existingReplicas {
|
|
if replica.location.dataNode.Id == fullNode.info.Id &&
|
|
replica.location.rack == fullNode.rack &&
|
|
replica.location.dc == fullNode.dc {
|
|
loc := newLocation(emptyNode.dc, emptyNode.rack, emptyNode.info)
|
|
replica.location = &loc
|
|
// Move the volume's size accounting between disks so that
|
|
// capacityByMinVolumeDensity recomputes ratios correctly on the next
|
|
// iteration. Without this the density view stays stale and the planner
|
|
// keeps draining the same node, moving every volume onto one server.
|
|
if fullDisk, found := fullNode.info.DiskInfos[v.DiskType]; found {
|
|
removeVolumeInfo(fullDisk, v.Id)
|
|
addVolumeCount(fullDisk, -1)
|
|
}
|
|
if emptyDisk, found := emptyNode.info.DiskInfos[v.DiskType]; found {
|
|
emptyDisk.VolumeInfos = append(emptyDisk.VolumeInfos, v)
|
|
addVolumeCount(emptyDisk, 1)
|
|
}
|
|
return
|
|
}
|
|
}
|
|
}
|