mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-05-29 21:20:21 +00:00
fix: CP13-6 rev3 — hard hold-release assertion + stale comment cleanup
1. TestWalRetention_TimeoutTriggersNeedsRebuild: add hard assertion that checkpoint advances past replicaFlushedLSN after NeedsRebuild (proves hold is actually released, not just state transition) 2. TestWalRetention_RequiredReplicaBlocksReclaim: remove stale "EXPECTED TO FAIL" / duplicate comment block Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -57,7 +57,7 @@ All 3 retention tests rewritten from placeholder/PASS* to hard-assertion proofs:
|
||||
| Test | Was | Now | Hard assertion |
|
||||
|------|-----|-----|----------------|
|
||||
| `TestWalRetention_RequiredReplicaBlocksReclaim` | PASS (log-only, no assertion) | PASS (hard assert) | `checkpointLSN <= replicaFlushedLSN` — flusher did not advance past retention floor |
|
||||
| `TestWalRetention_TimeoutTriggersNeedsRebuild` | PASS (log-only, no assertion) | PASS (hard assert) | `s.State() == NeedsRebuild` after 1ns timeout evaluation |
|
||||
| `TestWalRetention_TimeoutTriggersNeedsRebuild` | PASS (log-only, no assertion) | PASS (hard assert) | `s.State() == NeedsRebuild` + `checkpointAfter > replicaFlushedLSN` (hold released) |
|
||||
| `TestWalRetention_MaxBytesTriggersNeedsRebuild` | PASS* (logged "not implemented") | PASS (hard assert) | `s.State() == NeedsRebuild` after lag exceeds 8KB budget |
|
||||
|
||||
## Proof Promotion
|
||||
@@ -67,7 +67,7 @@ All 3 retention tests rewritten from placeholder/PASS* to hard-assertion proofs:
|
||||
| Test | What it proves |
|
||||
|------|---------------|
|
||||
| `TestWalRetention_RequiredReplicaBlocksReclaim` | Flusher checkpoint does not advance past `replicaFlushedLSN` while recoverable replica is behind |
|
||||
| `TestWalRetention_TimeoutTriggersNeedsRebuild` | Timeout budget evaluation transitions shipper to `NeedsRebuild` (verified via `State()` assertion) |
|
||||
| `TestWalRetention_TimeoutTriggersNeedsRebuild` | Timeout budget → `NeedsRebuild` (State assertion) + checkpoint advances past replicaFlushedLSN after flush (hold-release assertion) |
|
||||
| `TestWalRetention_MaxBytesTriggersNeedsRebuild` | Max-bytes budget evaluation transitions shipper to `NeedsRebuild` (verified via `State()` assertion, uses actual `BlockSize` from volume config) |
|
||||
|
||||
## What CP13-6 Does NOT Close
|
||||
|
||||
@@ -391,12 +391,8 @@ func TestReconnect_GapBeyondRetainedWal_NeedsRebuild(t *testing.T) {
|
||||
// ---------- WAL retention ----------
|
||||
|
||||
// TestWalRetention_RequiredReplicaBlocksReclaim verifies that the flusher
|
||||
// does not reclaim WAL entries that a required replica still needs for catch-up.
|
||||
//
|
||||
// Currently EXPECTED TO FAIL: WAL reclaim is driven only by checkpointLSN,
|
||||
// not replica progress.
|
||||
// TestWalRetention_RequiredReplicaBlocksReclaim verifies that the flusher
|
||||
// does not advance the WAL tail past entries a recoverable replica still needs.
|
||||
// does not advance the WAL checkpoint past entries a recoverable replica
|
||||
// still needs for catch-up.
|
||||
//
|
||||
// CP13-6 proof: retention floor from MinRecoverableFlushedLSN blocks reclaim.
|
||||
func TestWalRetention_RequiredReplicaBlocksReclaim(t *testing.T) {
|
||||
@@ -850,12 +846,18 @@ func TestWalRetention_TimeoutTriggersNeedsRebuild(t *testing.T) {
|
||||
t.Fatalf("CP13-6: expected NeedsRebuild after timeout, got %s", st)
|
||||
}
|
||||
|
||||
// After NeedsRebuild: WAL hold should be released (MinRecoverableFlushedLSN
|
||||
// skips NeedsRebuild shippers). Verify by flushing — checkpoint should advance.
|
||||
// Hard assertion: WAL hold released after NeedsRebuild.
|
||||
// Record checkpoint before flush, flush, assert it advances past the old floor.
|
||||
replicaFlushed := s.ReplicaFlushedLSN()
|
||||
checkpointBefore := primary.flusher.CheckpointLSN()
|
||||
primary.flusher.FlushOnce()
|
||||
checkpointAfter := primary.flusher.CheckpointLSN()
|
||||
// Checkpoint should advance past the old replica flushedLSN since the hold is released.
|
||||
t.Logf("CP13-6: timeout triggered NeedsRebuild, checkpoint=%d (hold released)", checkpointAfter)
|
||||
if checkpointAfter <= replicaFlushed {
|
||||
t.Fatalf("CP13-6: checkpoint should advance past replicaFlushedLSN %d after hold released, got %d",
|
||||
replicaFlushed, checkpointAfter)
|
||||
}
|
||||
t.Logf("CP13-6: hold released — checkpoint %d→%d (past replicaFlushed=%d)",
|
||||
checkpointBefore, checkpointAfter, replicaFlushed)
|
||||
}
|
||||
|
||||
// TestWalRetention_MaxBytesTriggersNeedsRebuild verifies that when the
|
||||
|
||||
Reference in New Issue
Block a user