fix: CP13-6 rev3 — hard hold-release assertion + stale comment cleanup

1. TestWalRetention_TimeoutTriggersNeedsRebuild: add hard assertion that
   checkpoint advances past replicaFlushedLSN after NeedsRebuild (proves
   hold is actually released, not just state transition)
2. TestWalRetention_RequiredReplicaBlocksReclaim: remove stale "EXPECTED
   TO FAIL" / duplicate comment block

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
pingqiu
2026-04-02 23:59:44 -07:00
parent 4e55b53bef
commit 0ce5aa32e9
2 changed files with 14 additions and 12 deletions

View File

@@ -57,7 +57,7 @@ All 3 retention tests rewritten from placeholder/PASS* to hard-assertion proofs:
| Test | Was | Now | Hard assertion |
|------|-----|-----|----------------|
| `TestWalRetention_RequiredReplicaBlocksReclaim` | PASS (log-only, no assertion) | PASS (hard assert) | `checkpointLSN <= replicaFlushedLSN` — flusher did not advance past retention floor |
| `TestWalRetention_TimeoutTriggersNeedsRebuild` | PASS (log-only, no assertion) | PASS (hard assert) | `s.State() == NeedsRebuild` after 1ns timeout evaluation |
| `TestWalRetention_TimeoutTriggersNeedsRebuild` | PASS (log-only, no assertion) | PASS (hard assert) | `s.State() == NeedsRebuild` + `checkpointAfter > replicaFlushedLSN` (hold released) |
| `TestWalRetention_MaxBytesTriggersNeedsRebuild` | PASS* (logged "not implemented") | PASS (hard assert) | `s.State() == NeedsRebuild` after lag exceeds 8KB budget |
## Proof Promotion
@@ -67,7 +67,7 @@ All 3 retention tests rewritten from placeholder/PASS* to hard-assertion proofs:
| Test | What it proves |
|------|---------------|
| `TestWalRetention_RequiredReplicaBlocksReclaim` | Flusher checkpoint does not advance past `replicaFlushedLSN` while recoverable replica is behind |
| `TestWalRetention_TimeoutTriggersNeedsRebuild` | Timeout budget evaluation transitions shipper to `NeedsRebuild` (verified via `State()` assertion) |
| `TestWalRetention_TimeoutTriggersNeedsRebuild` | Timeout budget `NeedsRebuild` (State assertion) + checkpoint advances past replicaFlushedLSN after flush (hold-release assertion) |
| `TestWalRetention_MaxBytesTriggersNeedsRebuild` | Max-bytes budget evaluation transitions shipper to `NeedsRebuild` (verified via `State()` assertion, uses actual `BlockSize` from volume config) |
## What CP13-6 Does NOT Close

View File

@@ -391,12 +391,8 @@ func TestReconnect_GapBeyondRetainedWal_NeedsRebuild(t *testing.T) {
// ---------- WAL retention ----------
// TestWalRetention_RequiredReplicaBlocksReclaim verifies that the flusher
// does not reclaim WAL entries that a required replica still needs for catch-up.
//
// Currently EXPECTED TO FAIL: WAL reclaim is driven only by checkpointLSN,
// not replica progress.
// TestWalRetention_RequiredReplicaBlocksReclaim verifies that the flusher
// does not advance the WAL tail past entries a recoverable replica still needs.
// does not advance the WAL checkpoint past entries a recoverable replica
// still needs for catch-up.
//
// CP13-6 proof: retention floor from MinRecoverableFlushedLSN blocks reclaim.
func TestWalRetention_RequiredReplicaBlocksReclaim(t *testing.T) {
@@ -850,12 +846,18 @@ func TestWalRetention_TimeoutTriggersNeedsRebuild(t *testing.T) {
t.Fatalf("CP13-6: expected NeedsRebuild after timeout, got %s", st)
}
// After NeedsRebuild: WAL hold should be released (MinRecoverableFlushedLSN
// skips NeedsRebuild shippers). Verify by flushing — checkpoint should advance.
// Hard assertion: WAL hold released after NeedsRebuild.
// Record checkpoint before flush, flush, assert it advances past the old floor.
replicaFlushed := s.ReplicaFlushedLSN()
checkpointBefore := primary.flusher.CheckpointLSN()
primary.flusher.FlushOnce()
checkpointAfter := primary.flusher.CheckpointLSN()
// Checkpoint should advance past the old replica flushedLSN since the hold is released.
t.Logf("CP13-6: timeout triggered NeedsRebuild, checkpoint=%d (hold released)", checkpointAfter)
if checkpointAfter <= replicaFlushed {
t.Fatalf("CP13-6: checkpoint should advance past replicaFlushedLSN %d after hold released, got %d",
replicaFlushed, checkpointAfter)
}
t.Logf("CP13-6: hold released — checkpoint %d→%d (past replicaFlushed=%d)",
checkpointBefore, checkpointAfter, replicaFlushed)
}
// TestWalRetention_MaxBytesTriggersNeedsRebuild verifies that when the