seaweedfs/weed/shell/command_ec_test.go

package shell

import (
	"testing"

	"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
	"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
	"github.com/seaweedfs/seaweedfs/weed/storage/needle"
	"github.com/seaweedfs/seaweedfs/weed/storage/types"
)

func TestCommandEcBalanceSmall(t *testing.T) {
	ecb := &ecBalancer{
		ecNodes: []*EcNode{
			newEcNode("dc1", "rack1", "dn1", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}),
			newEcNode("dc1", "rack2", "dn2", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}),
		},
		applyBalancing: false,
		diskType:       types.HardDriveType,
	}

	ecb.balance([]string{"c1"})
}

func TestCommandEcBalanceNothingToMove(t *testing.T) {
	ecb := &ecBalancer{
		ecNodes: []*EcNode{
			newEcNode("dc1", "rack1", "dn1", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{7, 8, 9, 10, 11, 12, 13}),
			newEcNode("dc1", "rack1", "dn2", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{7, 8, 9, 10, 11, 12, 13}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6}),
		},
		applyBalancing: false,
		diskType:       types.HardDriveType,
	}

	ecb.balance([]string{"c1"})
}

func TestCommandEcBalanceAddNewServers(t *testing.T) {
	ecb := &ecBalancer{
		ecNodes: []*EcNode{
			newEcNode("dc1", "rack1", "dn1", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{7, 8, 9, 10, 11, 12, 13}),
			newEcNode("dc1", "rack1", "dn2", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{7, 8, 9, 10, 11, 12, 13}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6}),
			newEcNode("dc1", "rack1", "dn3", 100),
			newEcNode("dc1", "rack1", "dn4", 100),
		},
		applyBalancing: false,
		diskType:       types.HardDriveType,
	}

	ecb.balance([]string{"c1"})
}

func TestCommandEcBalanceAddNewRacks(t *testing.T) {
	ecb := &ecBalancer{
		ecNodes: []*EcNode{
			newEcNode("dc1", "rack1", "dn1", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{7, 8, 9, 10, 11, 12, 13}),
			newEcNode("dc1", "rack1", "dn2", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{7, 8, 9, 10, 11, 12, 13}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6}),
			newEcNode("dc1", "rack2", "dn3", 100),
			newEcNode("dc1", "rack2", "dn4", 100),
		},
		applyBalancing: false,
		diskType:       types.HardDriveType,
	}

	ecb.balance([]string{"c1"})
}

func TestCommandEcBalanceVolumeEvenButRackUneven(t *testing.T) {
	ecb := ecBalancer{
		ecNodes: []*EcNode{
			newEcNode("dc1", "rack1", "dn_shared", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{0}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{0}),

			newEcNode("dc1", "rack1", "dn_a1", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{1}),
			newEcNode("dc1", "rack1", "dn_a2", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{2}),
			newEcNode("dc1", "rack1", "dn_a3", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{3}),
			newEcNode("dc1", "rack1", "dn_a4", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{4}),
			newEcNode("dc1", "rack1", "dn_a5", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{5}),
			newEcNode("dc1", "rack1", "dn_a6", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{6}),
			newEcNode("dc1", "rack1", "dn_a7", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{7}),
			newEcNode("dc1", "rack1", "dn_a8", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{8}),
			newEcNode("dc1", "rack1", "dn_a9", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{9}),
			newEcNode("dc1", "rack1", "dn_a10", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{10}),
			newEcNode("dc1", "rack1", "dn_a11", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{11}),
			newEcNode("dc1", "rack1", "dn_a12", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{12}),
			newEcNode("dc1", "rack1", "dn_a13", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{13}),

			newEcNode("dc1", "rack1", "dn_b1", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{1}),
			newEcNode("dc1", "rack1", "dn_b2", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{2}),
			newEcNode("dc1", "rack1", "dn_b3", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{3}),
			newEcNode("dc1", "rack1", "dn_b4", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{4}),
			newEcNode("dc1", "rack1", "dn_b5", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{5}),
			newEcNode("dc1", "rack1", "dn_b6", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{6}),
			newEcNode("dc1", "rack1", "dn_b7", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{7}),
			newEcNode("dc1", "rack1", "dn_b8", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{8}),
			newEcNode("dc1", "rack1", "dn_b9", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{9}),
			newEcNode("dc1", "rack1", "dn_b10", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{10}),
			newEcNode("dc1", "rack1", "dn_b11", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{11}),
			newEcNode("dc1", "rack1", "dn_b12", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{12}),
			newEcNode("dc1", "rack1", "dn_b13", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{13}),

			newEcNode("dc1", "rack1", "dn3", 100),
		},
		applyBalancing: false,
		diskType:       types.HardDriveType,
	}

	ecb.balance([]string{"c1"})
}

func newEcNode(dc string, rack string, dataNodeId string, freeEcSlot int) *EcNode {
	return &EcNode{
		info: &master_pb.DataNodeInfo{
			Id:        dataNodeId,
			DiskInfos: make(map[string]*master_pb.DiskInfo),
		},
		dc:         DataCenterId(dc),
		rack:       RackId(rack),
		freeEcSlot: freeEcSlot,
	}
}

func (ecNode *EcNode) addEcVolumeAndShardsForTest(vid uint32, collection string, shardIds []erasure_coding.ShardId) *EcNode {
	return ecNode.addEcVolumeShards(needle.VolumeId(vid), collection, shardIds, types.HardDriveType)
}

// TestCommandEcBalanceEvenDataAndParityDistribution verifies that after balancing:
// 1. Data shards (0-9) are evenly distributed across racks (max 2 per rack for 6 racks)
// 2. Parity shards (10-13) are evenly distributed across racks (max 1 per rack for 6 racks)
func TestCommandEcBalanceEvenDataAndParityDistribution(t *testing.T) {
	// Setup: All 14 shards start on rack1 (simulating fresh EC encode)
	ecb := &ecBalancer{
		ecNodes: []*EcNode{
			// All shards initially on rack1/dn1
			newEcNode("dc1", "rack1", "dn1", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}),
			// Empty nodes on other racks
			newEcNode("dc1", "rack2", "dn2", 100),
			newEcNode("dc1", "rack3", "dn3", 100),
			newEcNode("dc1", "rack4", "dn4", 100),
			newEcNode("dc1", "rack5", "dn5", 100),
			newEcNode("dc1", "rack6", "dn6", 100),
		},
		applyBalancing: false, // Dry-run mode (simulates moves by updating internal state)
		diskType:       types.HardDriveType,
	}

	ecb.balance([]string{"c1"})

	// After balancing (dry-run), verify the PLANNED distribution by checking what moves were proposed
	// The ecb.ecNodes state is updated during dry-run to track planned moves
	vid := needle.VolumeId(1)
	dataShardCount := erasure_coding.DataShardsCount     // 10
	parityShardCount := erasure_coding.ParityShardsCount // 4

	// Count data and parity shards per rack based on current (updated) state
	dataPerRack, parityPerRack := countDataAndParityShardsPerRack(ecb.ecNodes, vid, dataShardCount)

	// With 6 racks:
	// - Data shards (10): max 2 per rack (ceil(10/6) = 2)
	// - Parity shards (4): max 1 per rack (ceil(4/6) = 1)
	maxDataPerRack := ceilDivide(dataShardCount, 6)     // 2
	maxParityPerRack := ceilDivide(parityShardCount, 6) // 1

	// Verify no rack has more than max data shards
	for rackId, count := range dataPerRack {
		if count > maxDataPerRack {
			t.Errorf("rack %s has %d data shards, expected max %d", rackId, count, maxDataPerRack)
		}
	}

	// Verify no rack has more than max parity shards
	for rackId, count := range parityPerRack {
		if count > maxParityPerRack {
			t.Errorf("rack %s has %d parity shards, expected max %d", rackId, count, maxParityPerRack)
		}
	}

	// Verify all shards are distributed (total counts)
	totalData := 0
	totalParity := 0
	for _, count := range dataPerRack {
		totalData += count
	}
	for _, count := range parityPerRack {
		totalParity += count
	}
	if totalData != dataShardCount {
		t.Errorf("total data shards = %d, expected %d", totalData, dataShardCount)
	}
	if totalParity != parityShardCount {
		t.Errorf("total parity shards = %d, expected %d", totalParity, parityShardCount)
	}

	// Verify data shards are spread across at least 5 racks (10 shards / 2 max per rack)
	racksWithData := len(dataPerRack)
	minRacksForData := dataShardCount / maxDataPerRack // At least 5 racks needed for 10 data shards
	if racksWithData < minRacksForData {
		t.Errorf("data shards spread across only %d racks, expected at least %d", racksWithData, minRacksForData)
	}

	// Verify parity shards are spread across at least 4 racks (4 shards / 1 max per rack)
	racksWithParity := len(parityPerRack)
	if racksWithParity < parityShardCount {
		t.Errorf("parity shards spread across only %d racks, expected at least %d", racksWithParity, parityShardCount)
	}

	t.Logf("Distribution after balancing:")
	t.Logf("  Data shards per rack: %v (max allowed: %d)", dataPerRack, maxDataPerRack)
	t.Logf("  Parity shards per rack: %v (max allowed: %d)", parityPerRack, maxParityPerRack)
}

// countDataAndParityShardsPerRack counts data and parity shards per rack
func countDataAndParityShardsPerRack(ecNodes []*EcNode, vid needle.VolumeId, dataShardCount int) (dataPerRack, parityPerRack map[string]int) {
	dataPerRack = make(map[string]int)
	parityPerRack = make(map[string]int)

	for _, ecNode := range ecNodes {
		si := findEcVolumeShardsInfo(ecNode, vid, types.HardDriveType)
		for _, shardId := range si.Ids() {
			rackId := string(ecNode.rack)
			if int(shardId) < dataShardCount {
				dataPerRack[rackId]++
			} else {
				parityPerRack[rackId]++
			}
		}
	}
	return
}

// TestCommandEcBalanceMultipleVolumesEvenDistribution tests that multiple volumes
// each get their data and parity shards evenly distributed
func TestCommandEcBalanceMultipleVolumesEvenDistribution(t *testing.T) {
	// Setup: Two volumes, each with all 14 shards on different starting racks
	ecb := &ecBalancer{
		ecNodes: []*EcNode{
			// Volume 1: all shards on rack1
			newEcNode("dc1", "rack1", "dn1", 100).addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}),
			// Volume 2: all shards on rack2
			newEcNode("dc1", "rack2", "dn2", 100).addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}),
			// Empty nodes on other racks
			newEcNode("dc1", "rack3", "dn3", 100),
			newEcNode("dc1", "rack4", "dn4", 100),
			newEcNode("dc1", "rack5", "dn5", 100),
			newEcNode("dc1", "rack6", "dn6", 100),
		},
		applyBalancing: false, // Dry-run mode
		diskType:       types.HardDriveType,
	}

	ecb.balance([]string{"c1"})

	// Check both volumes
	for _, vid := range []needle.VolumeId{1, 2} {
		dataPerRack, parityPerRack := countDataAndParityShardsPerRack(ecb.ecNodes, vid, erasure_coding.DataShardsCount)

		maxDataPerRack := ceilDivide(erasure_coding.DataShardsCount, 6)
		maxParityPerRack := ceilDivide(erasure_coding.ParityShardsCount, 6)

		for rackId, count := range dataPerRack {
			if count > maxDataPerRack {
				t.Errorf("volume %d: rack %s has %d data shards, expected max %d", vid, rackId, count, maxDataPerRack)
			}
		}
		for rackId, count := range parityPerRack {
			if count > maxParityPerRack {
				t.Errorf("volume %d: rack %s has %d parity shards, expected max %d", vid, rackId, count, maxParityPerRack)
			}
		}

		t.Logf("Volume %d - Data: %v, Parity: %v", vid, dataPerRack, parityPerRack)
	}
}

// TestCommandEcBalanceAllNodesShareAllVolumes reproduces the scenario from issue #8793:
// When every node has a shard of every volume, ec.balance was unable to move any shards
// because it skipped volumes that already existed on the target node at the volume level.
func TestCommandEcBalanceAllNodesShareAllVolumes(t *testing.T) {
	// 4 nodes, all in same rack, 2 volumes with 14 shards each.
	// Distribute shards so every node has shards of both volumes, but unevenly:
	// dn1: vol1 shards 0-4, vol2 shards 0-4 => 10 shards
	// dn2: vol1 shards 5-9, vol2 shards 5-9 => 10 shards
	// dn3: vol1 shards 10-12, vol2 shards 10-12 => 6 shards
	// dn4: vol1 shard 13, vol2 shard 13 => 2 shards
	// Total: 28 shards, average = 7 per node
	ecb := &ecBalancer{
		ecNodes: []*EcNode{
			newEcNode("dc1", "rack1", "dn1", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{0, 1, 2, 3, 4}),
			newEcNode("dc1", "rack1", "dn2", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{5, 6, 7, 8, 9}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{5, 6, 7, 8, 9}),
			newEcNode("dc1", "rack1", "dn3", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{10, 11, 12}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{10, 11, 12}),
			newEcNode("dc1", "rack1", "dn4", 100).
				addEcVolumeAndShardsForTest(1, "c1", []erasure_coding.ShardId{13}).
				addEcVolumeAndShardsForTest(2, "c1", []erasure_coding.ShardId{13}),
		},
		applyBalancing: false,
		diskType:       types.HardDriveType,
	}

	ecb.balance([]string{"c1"})

	// Count total shards per node after balancing
	for _, node := range ecb.ecNodes {
		count := 0
		if diskInfo, found := node.info.DiskInfos[string(types.HardDriveType)]; found {
			for _, ecsi := range diskInfo.EcShardInfos {
				count += erasure_coding.GetShardCount(ecsi)
			}
		}
		// Average is 7, so all nodes should be at 7 (ceil(28/4) = 7)
		if count > 7 {
			t.Errorf("node %s has %d shards after balancing, expected at most 7", node.info.Id, count)
		}
		t.Logf("node %s: %d shards", node.info.Id, count)
	}
}

// TestCommandEcBalanceIssue8793Topology simulates the real cluster from issue #8793:
// 14 nodes (9 with max=80, 5 with max=33), all in one rack, with mixed capacities.
// Each EC volume has 1 shard per node. Nodes have uneven totals (some have extra volumes).
func TestCommandEcBalanceIssue8793Topology(t *testing.T) {
	// Simulate 22 EC volumes across 14 nodes (each volume has 14 shards, 1 per node).
	// Give nodes 0-3 an extra volume each (vol 23-26, all 14 shards) to create imbalance.
	// Before balancing: nodes 0-3 have 22+14=36 shards each, nodes 4-13 have 22 shards each.
	// Total = 4*36 + 10*22 = 144+220 = 364. Capacities are heterogeneous (max 80 vs
	// 33), so the utilization-based global phase balances by fullness, not count:
	// 364 shards over 9*80+5*33=885 slots is ~41% full, so large nodes settle near
	// 33 shards and small nodes near 14 (all ~41% full) rather than ~26 each, which
	// would drive the small nodes to ~79%.

	type nodeSpec struct {
		id      string
		maxSlot int
	}
	nodes := []nodeSpec{
		{"192.168.0.12:8332", 80}, {"192.168.0.12:8333", 80}, {"192.168.0.12:8334", 80},
		{"192.168.0.12:8335", 80}, {"192.168.0.12:8336", 80}, {"192.168.0.12:8337", 80},
		{"192.168.0.12:8338", 80}, {"192.168.0.12:8339", 80}, {"192.168.0.12:8340", 80},
		{"192.168.0.12:8341", 33}, {"192.168.0.12:8342", 33}, {"192.168.0.12:8343", 33},
		{"192.168.0.25:8350", 33}, {"192.168.0.25:8351", 33},
	}

	ecNodes := make([]*EcNode, len(nodes))
	for i, ns := range nodes {
		ecNodes[i] = newEcNode("home", "center", ns.id, ns.maxSlot)
	}

	// 22 shared volumes: each node gets exactly 1 shard (shard i for node i)
	for vid := uint32(1); vid <= 22; vid++ {
		for i := range ecNodes {
			ecNodes[i].addEcVolumeAndShardsForTest(vid, "cldata", []erasure_coding.ShardId{erasure_coding.ShardId(i)})
		}
	}

	// 4 extra volumes only on first 4 nodes (all 14 shards each) to create imbalance
	for extra := uint32(0); extra < 4; extra++ {
		vid := 23 + extra
		nodeIdx := int(extra)
		allShards := make([]erasure_coding.ShardId, 14)
		for s := 0; s < 14; s++ {
			allShards[s] = erasure_coding.ShardId(s)
		}
		ecNodes[nodeIdx].addEcVolumeAndShardsForTest(vid, "cldata", allShards)
	}

	ecb := &ecBalancer{
		ecNodes:        ecNodes,
		applyBalancing: false,
		diskType:       types.HardDriveType,
	}

	// Log initial state
	for _, node := range ecb.ecNodes {
		count := 0
		if diskInfo, found := node.info.DiskInfos[string(types.HardDriveType)]; found {
			for _, ecsi := range diskInfo.EcShardInfos {
				count += erasure_coding.GetShardCount(ecsi)
			}
		}
		t.Logf("BEFORE node %s (max %d): %d shards", node.info.Id, node.freeEcSlot+count, count)
	}

	ecb.balance([]string{"cldata"})

	// Verify even FULLNESS (shards/capacity), not even count: with heterogeneous
	// capacities the utilization-based global phase fills nodes proportionally, so
	// large nodes hold more shards than small ones while every node ends near the
	// overall fullness. (An even-count result would over-fill the small nodes.)
	capacityByID := make(map[string]int, len(nodes))
	for _, ns := range nodes {
		capacityByID[ns.id] = ns.maxSlot
	}

	totalShards := 0
	totalCapacity := 0
	shardCounts := make(map[string]int)
	for _, node := range ecb.ecNodes {
		count := 0
		if diskInfo, found := node.info.DiskInfos[string(types.HardDriveType)]; found {
			for _, ecsi := range diskInfo.EcShardInfos {
				count += erasure_coding.GetShardCount(ecsi)
			}
		}
		shardCounts[node.info.Id] = count
		totalShards += count
		totalCapacity += capacityByID[node.info.Id]
	}
	overallFullness := float64(totalShards) / float64(totalCapacity)

	// Tolerance well below the gap a count-even result would show (small nodes
	// would sit ~38 points above overall), but above integer-rounding skew.
	const tolerance = 0.05
	for _, node := range ecb.ecNodes {
		count := shardCounts[node.info.Id]
		capacity := capacityByID[node.info.Id]
		fullness := float64(count) / float64(capacity)
		t.Logf("AFTER  node %s: %d/%d shards (%.0f%% full, overall %.0f%%)",
			node.info.Id, count, capacity, fullness*100, overallFullness*100)
		if diff := fullness - overallFullness; diff > tolerance || diff < -tolerance {
			t.Errorf("node %s fullness %.1f%% deviates from overall %.1f%% by more than %.0f points",
				node.info.Id, fullness*100, overallFullness*100, tolerance*100)
		}
	}
}