Files
seaweedfs/sw-block/engine/replication/phase14_command_test.go
pingqiu 43dbebfa04 refactor: close bounded recovery drain and invalidation seams
Move removed-replica drain and replica-scoped invalidation onto explicit core-command paths so the widened multi-replica runtime no longer depends on coarse host-side recovery handling.

Made-with: Cursor
2026-04-04 11:01:12 -07:00

396 lines
12 KiB
Go

package replication
import (
"reflect"
"testing"
)
func TestPhase14_CommandSequence_PrimaryAssignmentIsBounded(t *testing.T) {
core := NewCoreEngine()
ev := AssignmentDelivered{
ID: "vol-cmd-primary",
Epoch: 1,
Role: RolePrimary,
RecoveryTarget: SessionCatchUp,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.10:9333", CtrlAddr: "10.0.0.10:9334", Version: 1}},
},
}
result := core.ApplyEvent(ev)
assertCommandNames(t, result.Commands, []string{
"apply_role",
"configure_shipper",
"start_recovery_task",
"publish_projection",
})
result = core.ApplyEvent(ev)
assertCommandNames(t, result.Commands, nil)
}
func TestPhase14_CommandSequence_PrimaryMultiReplicaAssignmentStartsRecoveryPerReplica(t *testing.T) {
core := NewCoreEngine()
ev := AssignmentDelivered{
ID: "vol-cmd-primary-multi",
Epoch: 1,
Role: RolePrimary,
RecoveryTarget: SessionCatchUp,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.21:9333", CtrlAddr: "10.0.0.21:9334", Version: 1}},
{ReplicaID: "replica-2", Endpoint: Endpoint{DataAddr: "10.0.0.22:9333", CtrlAddr: "10.0.0.22:9334", Version: 1}},
},
}
result := core.ApplyEvent(ev)
assertCommandNames(t, result.Commands, []string{
"apply_role",
"configure_shipper",
"start_recovery_task",
"start_recovery_task",
"publish_projection",
})
if got := recoveryTaskReplicaIDs(result.Commands); !reflect.DeepEqual(got, []string{"replica-1", "replica-2"}) {
t.Fatalf("recovery task replicas=%v", got)
}
result = core.ApplyEvent(ev)
assertCommandNames(t, result.Commands, nil)
}
func TestPhase14_CommandSequence_ReplicaAssignmentIsBounded(t *testing.T) {
core := NewCoreEngine()
ev := AssignmentDelivered{
ID: "vol-cmd-replica",
Epoch: 3,
Role: RoleReplica,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.11:9333", CtrlAddr: "10.0.0.11:9334", Version: 1}},
},
}
result := core.ApplyEvent(ev)
assertCommandNames(t, result.Commands, []string{
"apply_role",
"start_receiver",
"publish_projection",
})
result = core.ApplyEvent(ev)
assertCommandNames(t, result.Commands, nil)
}
func TestPhase14_CommandSequence_AssignmentChangeReissuesNeededCommand(t *testing.T) {
core := NewCoreEngine()
initial := AssignmentDelivered{
ID: "vol-cmd-change",
Epoch: 5,
Role: RolePrimary,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.12:9333", CtrlAddr: "10.0.0.12:9334", Version: 1}},
},
}
core.ApplyEvent(initial)
core.ApplyEvent(RoleApplied{ID: "vol-cmd-change"})
core.ApplyEvent(ShipperConfiguredObserved{ID: "vol-cmd-change"})
core.ApplyEvent(ShipperConnectedObserved{ID: "vol-cmd-change"})
changed := AssignmentDelivered{
ID: "vol-cmd-change",
Epoch: 5,
Role: RolePrimary,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.13:9333", CtrlAddr: "10.0.0.13:9334", Version: 2}},
},
}
result := core.ApplyEvent(changed)
assertCommandNames(t, result.Commands, []string{
"configure_shipper",
"publish_projection",
})
}
func TestPhase14_CommandSequence_InvalidateOnlyOnNewFailureTransition(t *testing.T) {
core := NewCoreEngine()
core.ApplyEvent(AssignmentDelivered{
ID: "vol-cmd-failure",
Epoch: 1,
Role: RolePrimary,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.14:9333", CtrlAddr: "10.0.0.14:9334", Version: 1}},
},
})
core.ApplyEvent(RoleApplied{ID: "vol-cmd-failure"})
core.ApplyEvent(ShipperConfiguredObserved{ID: "vol-cmd-failure"})
core.ApplyEvent(ShipperConnectedObserved{ID: "vol-cmd-failure"})
core.ApplyEvent(BarrierAccepted{ID: "vol-cmd-failure", FlushedLSN: 9})
result := core.ApplyEvent(BarrierRejected{ID: "vol-cmd-failure", Reason: "timeout"})
assertCommandNames(t, result.Commands, []string{
"invalidate_session",
"publish_projection",
})
result = core.ApplyEvent(BarrierRejected{ID: "vol-cmd-failure", Reason: "timeout"})
assertCommandNames(t, result.Commands, nil)
}
func TestPhase14_CommandSequence_PublishOnlyWhenProjectionChanges(t *testing.T) {
core := NewCoreEngine()
core.ApplyEvent(AssignmentDelivered{
ID: "vol-cmd-publish",
Epoch: 2,
Role: RolePrimary,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.15:9333", CtrlAddr: "10.0.0.15:9334", Version: 1}},
},
})
result := core.ApplyEvent(RoleApplied{ID: "vol-cmd-publish"})
assertCommandNames(t, result.Commands, []string{
"publish_projection",
})
result = core.ApplyEvent(RoleApplied{ID: "vol-cmd-publish"})
assertCommandNames(t, result.Commands, nil)
}
func TestPhase14_CommandSequence_CatchUpStartIsBounded(t *testing.T) {
core := NewCoreEngine()
core.ApplyEvent(AssignmentDelivered{
ID: "vol-cmd-catchup",
Epoch: 6,
Role: RolePrimary,
RecoveryTarget: SessionCatchUp,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.16:9333", CtrlAddr: "10.0.0.16:9334", Version: 1}},
},
})
core.ApplyEvent(RoleApplied{ID: "vol-cmd-catchup"})
core.ApplyEvent(ShipperConfiguredObserved{ID: "vol-cmd-catchup"})
core.ApplyEvent(ShipperConnectedObserved{ID: "vol-cmd-catchup"})
result := core.ApplyEvent(CatchUpPlanned{ID: "vol-cmd-catchup", ReplicaID: "replica-1", TargetLSN: 55})
assertCommandNames(t, result.Commands, []string{
"start_catchup",
"publish_projection",
})
start, ok := result.Commands[0].(StartCatchUpCommand)
if !ok {
t.Fatalf("cmd0=%T", result.Commands[0])
}
if start.ReplicaID != "replica-1" {
t.Fatalf("replica_id=%q", start.ReplicaID)
}
result = core.ApplyEvent(CatchUpPlanned{ID: "vol-cmd-catchup", TargetLSN: 55})
assertCommandNames(t, result.Commands, nil)
}
func TestPhase14_CommandSequence_RebuildStartIsBounded(t *testing.T) {
core := NewCoreEngine()
core.ApplyEvent(AssignmentDelivered{
ID: "vol-cmd-rebuild",
Epoch: 7,
Role: RoleReplica,
RecoveryTarget: SessionRebuild,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.17:9333", CtrlAddr: "10.0.0.17:9334", Version: 1}},
},
})
core.ApplyEvent(NeedsRebuildObserved{ID: "vol-cmd-rebuild", Reason: "gap_too_large"})
result := core.ApplyEvent(RebuildStarted{ID: "vol-cmd-rebuild", ReplicaID: "replica-1", TargetLSN: 80})
assertCommandNames(t, result.Commands, []string{
"start_rebuild",
"publish_projection",
})
start, ok := result.Commands[0].(StartRebuildCommand)
if !ok {
t.Fatalf("cmd0=%T", result.Commands[0])
}
if start.ReplicaID != "replica-1" {
t.Fatalf("replica_id=%q", start.ReplicaID)
}
result = core.ApplyEvent(RebuildStarted{ID: "vol-cmd-rebuild", TargetLSN: 80})
assertCommandNames(t, result.Commands, nil)
}
func TestPhase14_CommandSequence_RebuildingAssignmentStartsRecoveryTaskWithoutReceiver(t *testing.T) {
core := NewCoreEngine()
result := core.ApplyEvent(AssignmentDelivered{
ID: "vol-cmd-rebuild-assign",
Epoch: 8,
Role: RoleReplica,
RecoveryTarget: SessionRebuild,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.20:9333", CtrlAddr: "10.0.0.20:9334", Version: 1}},
},
})
assertCommandNames(t, result.Commands, []string{
"apply_role",
"start_recovery_task",
"publish_projection",
})
}
func TestPhase14_CommandSequence_AssignmentChangeDrainsRemovedRecoveryReplica(t *testing.T) {
core := NewCoreEngine()
core.ApplyEvent(AssignmentDelivered{
ID: "vol-cmd-remove-replica",
Epoch: 1,
Role: RolePrimary,
RecoveryTarget: SessionCatchUp,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.30:9333", CtrlAddr: "10.0.0.30:9334", Version: 1}},
{ReplicaID: "replica-2", Endpoint: Endpoint{DataAddr: "10.0.0.31:9333", CtrlAddr: "10.0.0.31:9334", Version: 1}},
},
})
result := core.ApplyEvent(AssignmentDelivered{
ID: "vol-cmd-remove-replica",
Epoch: 2,
Role: RolePrimary,
RecoveryTarget: SessionCatchUp,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.30:9333", CtrlAddr: "10.0.0.30:9334", Version: 1}},
},
})
assertCommandNames(t, result.Commands, []string{
"apply_role",
"configure_shipper",
"drain_recovery_task",
"start_recovery_task",
"publish_projection",
})
if got := drainRecoveryTaskReplicaIDs(result.Commands); !reflect.DeepEqual(got, []string{"replica-2"}) {
t.Fatalf("drain recovery task replicas=%v", got)
}
if got := recoveryTaskReplicaIDs(result.Commands); !reflect.DeepEqual(got, []string{"replica-1"}) {
t.Fatalf("start recovery task replicas=%v", got)
}
}
func TestPhase14_CommandSequence_NeedsRebuildInvalidatesOnlyAffectedReplica(t *testing.T) {
core := NewCoreEngine()
core.ApplyEvent(AssignmentDelivered{
ID: "vol-cmd-needs-rebuild-targeted",
Epoch: 1,
Role: RolePrimary,
RecoveryTarget: SessionCatchUp,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.32:9333", CtrlAddr: "10.0.0.32:9334", Version: 1}},
{ReplicaID: "replica-2", Endpoint: Endpoint{DataAddr: "10.0.0.33:9333", CtrlAddr: "10.0.0.33:9334", Version: 1}},
},
})
result := core.ApplyEvent(NeedsRebuildObserved{
ID: "vol-cmd-needs-rebuild-targeted",
ReplicaID: "replica-2",
Reason: "gap_too_large",
})
assertCommandNames(t, result.Commands, []string{
"invalidate_session",
"publish_projection",
})
invalidate, ok := result.Commands[0].(InvalidateSessionCommand)
if !ok {
t.Fatalf("cmd0=%T", result.Commands[0])
}
if invalidate.ReplicaID != "replica-2" {
t.Fatalf("invalidate replica_id=%q", invalidate.ReplicaID)
}
}
func TestPhase14_CommandSequence_AssignmentChangeAllowsFreshRecoveryStart(t *testing.T) {
core := NewCoreEngine()
core.ApplyEvent(AssignmentDelivered{
ID: "vol-cmd-reassign",
Epoch: 1,
Role: RolePrimary,
RecoveryTarget: SessionCatchUp,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.18:9333", CtrlAddr: "10.0.0.18:9334", Version: 1}},
},
})
core.ApplyEvent(RoleApplied{ID: "vol-cmd-reassign"})
core.ApplyEvent(ShipperConfiguredObserved{ID: "vol-cmd-reassign"})
core.ApplyEvent(ShipperConnectedObserved{ID: "vol-cmd-reassign"})
result := core.ApplyEvent(CatchUpPlanned{ID: "vol-cmd-reassign", TargetLSN: 90})
assertCommandNames(t, result.Commands, []string{
"start_catchup",
"publish_projection",
})
result = core.ApplyEvent(AssignmentDelivered{
ID: "vol-cmd-reassign",
Epoch: 2,
Role: RolePrimary,
RecoveryTarget: SessionCatchUp,
Replicas: []ReplicaAssignment{
{ReplicaID: "replica-1", Endpoint: Endpoint{DataAddr: "10.0.0.19:9333", CtrlAddr: "10.0.0.19:9334", Version: 2}},
},
})
assertCommandNames(t, result.Commands, []string{
"apply_role",
"configure_shipper",
"start_recovery_task",
"publish_projection",
})
result = core.ApplyEvent(CatchUpPlanned{ID: "vol-cmd-reassign", TargetLSN: 90})
assertCommandNames(t, result.Commands, []string{
"start_catchup",
"publish_projection",
})
}
func assertCommandNames(t *testing.T, cmds []Command, want []string) {
t.Helper()
got := make([]string, 0, len(cmds))
for _, cmd := range cmds {
got = append(got, cmd.commandName())
}
if want == nil {
want = []string{}
}
if !reflect.DeepEqual(got, want) {
t.Fatalf("commands=%v, want %v", got, want)
}
}
func recoveryTaskReplicaIDs(cmds []Command) []string {
var replicaIDs []string
for _, cmd := range cmds {
start, ok := cmd.(StartRecoveryTaskCommand)
if !ok {
continue
}
replicaIDs = append(replicaIDs, start.ReplicaID)
}
return replicaIDs
}
func drainRecoveryTaskReplicaIDs(cmds []Command) []string {
var replicaIDs []string
for _, cmd := range cmds {
drain, ok := cmd.(DrainRecoveryTaskCommand)
if !ok {
continue
}
replicaIDs = append(replicaIDs, drain.ReplicaID)
}
return replicaIDs
}