diff --git a/sw-block/.private/phase/phase-16-finish-review.md b/sw-block/.private/phase/phase-16-finish-review.md new file mode 100644 index 000000000..ba6ac23a7 --- /dev/null +++ b/sw-block/.private/phase/phase-16-finish-review.md @@ -0,0 +1,154 @@ +# Phase 16 Finish-Line Review + +Date: 2026-04-04 +Status: ready for review + +## Review Object + +Review the current bounded runtime checkpoint as: + +1. `Phase 15` delivered +2. `16A-16T` delivered on the previously accepted bounded runtime path +3. `16U-16W` delivered as the last visible bounded heartbeat/restart truth + closure slices + +This checkpoint should be judged as the bounded `Phase 16` finish-line review, +not as a broad product-readiness or launch review. + +## What Is In Scope + +### Current bounded runtime claim + +The checkpoint may now claim that, on the chosen bounded heartbeat/master/API +path: + +1. explicit primary truth survives steady-state sparse heartbeats and bounded + restart reconstruction +2. restart primary swap rebases explicit primary truth to the winning heartbeat +3. replica explicit readiness no longer silently falls back to address-shaped + semantics after explicit truth has already been accepted +4. empty full block inventory delete behavior is explicit rather than inferred + from emptiness alone +5. one real sender-side path truthfully emits non-authoritative inventory + +### Expected judgment + +1. the checkpoint is a real bounded runtime-closure step, not only protocol + plumbing +2. the accepted claim set is explicit and evidence-backed +3. residual gaps are named rather than hidden + +## What Is Explicitly Out Of Scope + +Do NOT review this checkpoint as claiming: + +1. broad recovery-loop closure +2. broad end-to-end failover/recovery/publication closure +3. full restart-window policy for all loading/not-yet-authoritative states +4. broad multi-replica startup / reconciliation ownership +5. launch / rollout readiness + +## Primary Files + +Checkpoint framing: + +1. `sw-block/.private/phase/phase-16.md` +2. `sw-block/.private/phase/phase-16-log.md` +3. `sw-block/design/v2-product-completion-overview.md` +4. `sw-block/design/v2-protocol-truths.md` +5. `sw-block/design/v2-protocol-claim-and-evidence.md` + +Checkpoint code: + +1. `weed/server/master_block_registry.go` +2. `weed/server/master_block_registry_test.go` +3. `weed/server/volume_server_block.go` +4. `weed/server/volume_grpc_client_to_master.go` +5. `weed/server/master_grpc_server.go` +6. `weed/server/volume_server_test.go` + +## Accepted Claim Set + +1. steady-state and restart reconstruction preserve accepted explicit primary + heartbeat truth on the bounded chosen path +2. sparse primary and replica heartbeats no longer silently erase already + accepted explicit truth on existing entries +3. empty full block inventory delete behavior is explicit rather than heuristic +4. one real sender-side non-authoritative inventory path is now implemented and + tested + +## Explicit Non-Claims + +1. full recovery-loop ownership +2. broad failover/publication proof +3. broad restart/disturbance hardening +4. launch-envelope freeze or rollout approval + +## Residual Gaps + +1. broader recovery-loop closure beyond the chosen bounded path +2. broader failover/publication whole-chain statement +3. long-window restart/disturbance policy and soak hardening +4. launch-envelope and rollout-gate work + +## Evidence Summary + +### Heartbeat truth closure and sparse-field retention + +1. `go test ./weed/storage/blockvol -count=1 -run "TestInfoMessage_(ReplicaReady|NeedsRebuild|PublishHealthy|VolumeMode|VolumeModeReason)"` +2. `go test ./weed/server -count=1 -timeout 180s -run "Test(Registry_UpdateFullHeartbeat_(ConsumesCoreInfluencedReplicaReady|ReplicaReadyFallsBackToAddressesWhenFieldAbsent|ReplicaReadyMissingFieldPreservesAcceptedExplicitTruth|ReplicaReadyMissingFieldFreshEntryStillFallsBack|ConsumesExplicitNeedsRebuildFromPrimaryHeartbeat|NeedsRebuildFallsBackWhenFieldAbsent|ExplicitHealthySuppressesStaleNeedsRebuildHeuristic|ConsumesExplicitPublishHealthyFromPrimaryHeartbeat|ExplicitUnhealthySuppressesStalePublishHealthyHeuristic|ConsumesExplicitVolumeModeFromPrimaryHeartbeat|VolumeModeFallsBackWhenFieldAbsent|AutoRegisterPreservesExplicitPrimaryTruthOnRestart|MissingFieldsPreserveAcceptedExplicitPrimaryTruth|MissingFieldsDoNotInventExplicitTruthOnFreshEntry))"` +3. result: `PASS` + +### Restart reconciliation and disturbance surfaces + +1. `go test ./weed/server -count=1 -timeout 180s -run "Test(MasterRestart_(HigherEpochWins|HigherEpochRebasesExplicitPrimaryTruth|HigherEpochSparsePrimaryClearsOldExplicitTruth|LowerEpochBecomesReplica|SameEpoch_HigherLSNWins|SameEpoch_SameLSN_ExistingWins|SameEpoch_RoleTrusted)|P11P3_HeartbeatReconstruction|P12P1_Restart_SameLineage)"` +2. `go test ./weed/server -count=1 -timeout 180s -run "Test(StartBlockService_ScanFailureEmitsNonAuthoritativeInventory|CollectBlockVolumeHeartbeat_IncludesInventoryAuthority|Registry_UpdateFullHeartbeatWithInventoryAuthority_(NonAuthoritativeEmptyDoesNotDelete|AuthoritativeEmptyStillDeletes)|Master_ExpandCoordinated_B10_HeartbeatDoesNotDeleteDuringExpand|QA_Reg_FullHeartbeatEmptyServer)"` +3. result: `PASS` + +### Outward surface coherence + +1. `go test ./weed/server -count=1 -timeout 180s -run "Test(EntryToVolumeInfo_(ReflectsCoreInfluencedReadyConsume|ReflectsCoreInfluencedDegradedConsume)|BlockVolume(Get|List)Handler_ReflectsCoreInfluencedDegradedConsume)"` +2. result: `PASS` + +## Review Questions + +### For `sw` + +Please check implementation correctness and checkpoint coherence: + +1. Is the finish-line boundary coherent as one bounded runtime checkpoint? +2. Are the `16U-16W` changes internally consistent with the existing `16M-16T` + truth-closure discipline? +3. Are there any small cleanup issues that should be fixed before a checkpoint + commit, without widening scope? + +### For `tester` + +Please challenge the proof posture: + +1. Do the new tests prove semantic claim rather than implementation shape? +2. Is restart primary-truth rebase adequately covered for the bounded chosen + path? +3. Is the replica sparse-heartbeat retention proof strong enough to support the + bounded claim? + +### For `manager` + +Please challenge overclaim and stop-line discipline: + +1. Does the checkpoint wording stay disciplined about broad residual gaps? +2. Is `Phase 16` the right place to stop and package a runtime checkpoint rather + than continue indefinite edge-case slicing? +3. Are the explicit non-claims and residuals sufficient to prevent product + overreach? + +## Requested Output Shape + +Please reply with one of: + +1. `ACCEPT` +2. `ACCEPT WITH MINOR FIXES` +3. `REJECT` + +If not `ACCEPT`, list findings ordered by severity and keep them bounded to this +checkpoint's actual claim set. diff --git a/sw-block/.private/phase/phase-16-log.md b/sw-block/.private/phase/phase-16-log.md index 068f0530e..470cc4563 100644 --- a/sw-block/.private/phase/phase-16-log.md +++ b/sw-block/.private/phase/phase-16-log.md @@ -1552,3 +1552,391 @@ Conclusion: field keep the previous empty/default outward reason behavior 3. this slice still does not claim restart/disturbance hardening or broad failover closure by itself + +--- + +#### `16R` Start Note Rev 1 + +Date: 2026-04-04 +Scope: bounded explicit primary-truth preservation on master restart auto-register + +Why this slice exists: + +1. `16N-16Q` made primary heartbeat truth explicit across the steady-state + heartbeat/master/API seam +2. but the fresh-registry auto-register branch in `UpdateFullHeartbeat` + reconstructs only size/epoch/degraded/publication metadata and drops the new + explicit primary truth +3. that leaves a bounded restart/disturbance seam where master restart can + collapse accepted explicit heartbeat truth back to older heuristics + +Chosen implementation rule: + +1. preserve explicit primary heartbeat `needs_rebuild`, `publish_healthy`, + `volume_mode`, and `volume_mode_reason` when auto-registering from a fresh + registry after restart +2. prove outward mode/reason surfaces remain coherent after restart + reconstruction +3. do not broaden this slice into general restart hardening or repeated-failover + closure + +--- + +#### `16R` Delivery Note Rev 1 + +Date: 2026-04-04 +Scope: bounded explicit primary-truth preservation on master restart auto-register + +What changed: + +1. `weed/server/master_block_registry.go` + - fresh-registry auto-register now preserves explicit primary heartbeat + `needs_rebuild`, `publish_healthy`, `volume_mode`, and + `volume_mode_reason` truth instead of dropping them during restart + reconstruction +2. `weed/server/master_block_registry_test.go` + - added a focused proof that fresh-registry auto-register preserves explicit + primary truth and outward API-facing mode/reason after restart +3. `sw-block/.private/phase/phase-16.md` + - recorded `16R` as the next bounded restart/disturbance seam and marked it + delivered + +Proof / evidence: + +1. `go test ./weed/server -count=1 -timeout 180s -run "TestRegistry_(UpdateFullHeartbeat_(AutoRegisterPreservesExplicitPrimaryTruthOnRestart|ConsumesExplicitVolumeModeFromPrimaryHeartbeat|VolumeModeFallsBackWhenFieldAbsent)|ReplicaHeartbeat_ReconstructsAfterRestart)"` +2. `go test ./weed/server -count=1 -timeout 180s -run "Test(EntryToVolumeInfo_(ReflectsCoreInfluencedReadyConsume|ReflectsCoreInfluencedDegradedConsume)|BlockVolume(List|Get)Handler_ReflectsCoreInfluencedDegradedConsume|P11P3_HeartbeatReconstruction|P12P1_Restart_SameLineage)"` +3. result: `PASS` + +Conclusion: + +1. accepted explicit primary heartbeat truth is now preserved not only on the + steady-state consume path but also on bounded master restart reconstruction +2. this closes one concrete restart/disturbance seam without broadening into + general restart hardening or repeated-failover closure + +--- + +#### `16S` Start Note Rev 1 + +Date: 2026-04-04 +Scope: bounded explicit primary-truth retention when heartbeat fields are absent + +Why this slice exists: + +1. `16N-16R` made several bounded primary heartbeat truths explicit and + preserved them across steady-state and restart reconstruction paths +2. but the steady-state existing-entry consume path still clears those explicit + truths whenever a later heartbeat omits the newer fields entirely +3. that leaves a bounded disturbance/compat seam where accepted explicit truth + can regress back to heuristics simply because a heartbeat is field-sparse + +Chosen implementation rule: + +1. preserve already accepted explicit primary heartbeat `needs_rebuild`, + `publish_healthy`, `volume_mode`, and `volume_mode_reason` truth when the + corresponding field is absent on a later heartbeat +2. keep fresh-entry/auto-register behavior unchanged so absent fields still mean + "no explicit truth yet" on first observation +3. do not broaden this slice into entry-deletion policy or general restart + hardening + +--- + +#### `16S` Delivery Note Rev 1 + +Date: 2026-04-04 +Scope: bounded explicit primary-truth retention when heartbeat fields are absent + +What changed: + +1. `weed/server/master_block_registry.go` + - existing-entry primary heartbeat consume now preserves already accepted + explicit primary truth when a later heartbeat omits the newer explicit + fields entirely +2. `weed/server/master_block_registry_test.go` + - added focused proofs that missing-field heartbeats preserve accepted + explicit truth on existing entries while still not inventing explicit truth + on fresh auto-registered entries +3. `sw-block/.private/phase/phase-16.md` + - recorded `16S` as the bounded missing-field retention seam and marked it + delivered + +Proof / evidence: + +1. `go test ./weed/server -count=1 -timeout 180s -run "TestRegistry_UpdateFullHeartbeat_(MissingFieldsPreserveAcceptedExplicitPrimaryTruth|MissingFieldsDoNotInventExplicitTruthOnFreshEntry|AutoRegisterPreservesExplicitPrimaryTruthOnRestart|ConsumesExplicitVolumeModeFromPrimaryHeartbeat|VolumeModeFallsBackWhenFieldAbsent|NeedsRebuildFallsBackWhenFieldAbsent|ExplicitHealthySuppressesStaleNeedsRebuildHeuristic|ConsumesExplicitPublishHealthyFromPrimaryHeartbeat|ExplicitUnhealthySuppressesStalePublishHealthyHeuristic)"` +2. `go test ./weed/server -count=1 -timeout 180s -run "Test(EntryToVolumeInfo_(ReflectsCoreInfluencedReadyConsume|ReflectsCoreInfluencedDegradedConsume)|BlockVolume(List|Get)Handler_ReflectsCoreInfluencedDegradedConsume|P11P3_HeartbeatReconstruction|P12P1_Restart_SameLineage)"` +3. result: `PASS` + +Conclusion: + +1. once accepted on the bounded primary path, explicit heartbeat truth no longer + regresses merely because a later heartbeat is field-sparse +2. backward-compatible fresh-entry fallback is preserved because absent fields + still do not create explicit truth on first observation + +--- + +#### `16T` Start Note Rev 1 + +Date: 2026-04-04 +Scope: bounded authoritative guard for empty full block heartbeat inventory + +Why this slice exists: + +1. full block heartbeat currently treats empty inventory as enough to make stale + cleanup/delete decisions on the master side +2. that leaves a bounded disturbance seam because "authoritatively empty" and + "non-authoritative/temporarily unavailable inventory" still share the same + empty heartbeat shape +3. the next narrow closure step is to make stale-delete eligibility depend on an + explicit authoritative signal, not empty inventory alone + +Chosen implementation rule: + +1. add one additive heartbeat-level signal for block inventory authority +2. keep the current chosen-path regular and shutdown-originated empty inventory + heartbeats authoritative +3. make master full-heartbeat stale cleanup conditional on that signal +4. do not broaden this slice into general restart policy or long-window + disturbance handling + +--- + +#### `16T` Delivery Note Rev 1 + +Date: 2026-04-04 +Scope: bounded authoritative guard for empty full block heartbeat inventory + +What changed: + +1. `weed/pb/master.proto` + - added additive optional `block_volume_inventory_authoritative` to the + top-level `Heartbeat` +2. `weed/pb/master_pb/master.pb.go` + - regenerated for the new heartbeat-level optional authority bit +3. `weed/server/volume_server_block.go` + - `BlockService` now tracks whether its block inventory is authoritative + enough for full-heartbeat stale cleanup +4. `weed/server/volume_grpc_client_to_master.go` + - full block heartbeats now emit explicit block inventory authority; regular + and shutdown-originated empty inventory heartbeats stay authoritative on + the current chosen path +5. `weed/server/master_grpc_server.go` + - master heartbeat handling now forwards the explicit block inventory + authority into block full-heartbeat reconciliation +6. `weed/server/master_block_registry.go` + - full-heartbeat stale cleanup now runs only when block inventory is + authoritative +7. focused tests in `master_block_registry_test.go` and `volume_server_test.go` + - now prove non-authoritative empty full heartbeat preserves entries while + authoritative empty inventory keeps the old delete behavior + +Proof / evidence: + +1. `go test ./weed/server -count=1 -timeout 180s -run "Test(CollectBlockVolumeHeartbeat_IncludesInventoryAuthority|Registry_UpdateFullHeartbeatWithInventoryAuthority_(NonAuthoritativeEmptyDoesNotDelete|AuthoritativeEmptyStillDeletes)|Registry_UpdateFullHeartbeat_(MissingFieldsPreserveAcceptedExplicitPrimaryTruth|AutoRegisterPreservesExplicitPrimaryTruthOnRestart)|Master_ExpandCoordinated_B10_HeartbeatDoesNotDeleteDuringExpand|QA_Reg_FullHeartbeatEmptyServer)"` +2. `go test ./weed/server -count=1 -timeout 180s -run "Test(Registry_UpdateFullHeartbeat$|Registry_UpdateFullHeartbeat_VolumeModeFallsBackWhenFieldAbsent|Registry_UpdateFullHeartbeat_ConsumesExplicitVolumeModeFromPrimaryHeartbeat|P12P1_Restart_SameLineage|P11P3_HeartbeatReconstruction)"` +3. result: `PASS` + +Conclusion: + +1. empty full block heartbeat is no longer forced to mean authoritative delete + intent on the master side +2. this closes one bounded disturbance seam by making stale-delete eligibility + explicit without yet deciding the broader restart-window policy itself + +--- + +#### `16U` Start Note Rev 1 + +Date: 2026-04-04 +Scope: real sender-side non-authoritative block inventory path + +Why this slice exists: + +1. `16T` added an explicit block-inventory authority bit and guarded master-side + stale delete with it +2. but on the chosen runtime path that new bit was still mostly only wire + capability; no real sender-side path emitted `false` +3. the next bounded closure step is to bind one truthful runtime condition to + that bit without widening into general restart policy + +Chosen implementation rule: + +1. use startup block-directory scan failure as one truthful + non-authoritative-inventory condition +2. prove that ordinary full-heartbeat emission carries `false` there +3. prove master-side inventory-authority guard still preserves entries there + +--- + +#### `16U` Delivery Note Rev 1 + +Date: 2026-04-04 +Scope: real sender-side non-authoritative block inventory path + +What changed: + +1. `weed/server/volume_server_test.go` + - added a focused startup-scan-failure proof showing a real block-service + sender path emits `block_volume_inventory_authoritative=false` +2. `sw-block/.private/phase/phase-16.md` + - recorded `16U` as delivered and folded it into the checkpoint package + +Proof / evidence: + +1. `go test ./weed/server -count=1 -timeout 180s -run "Test(StartBlockService_ScanFailureEmitsNonAuthoritativeInventory|CollectBlockVolumeHeartbeat_IncludesInventoryAuthority|Registry_UpdateFullHeartbeatWithInventoryAuthority_(NonAuthoritativeEmptyDoesNotDelete|AuthoritativeEmptyStillDeletes))"` +2. result: `PASS` + +Conclusion: + +1. the block-inventory authority bit now has one real sender-side runtime + meaning instead of remaining only a latent wire affordance +2. this still does not decide the broader restart-window policy + +--- + +#### `16V` Start Note Rev 1 + +Date: 2026-04-04 +Scope: restart primary-swap explicit-truth rebase + +Why this slice exists: + +1. `16R-16S` preserved explicit primary truth across restart auto-register and + later sparse heartbeats +2. but restart reconciliation could still promote a new primary while retaining + explicit truth that belonged to the old primary longer than intended +3. the next bounded closure step is to rebind explicit truth at the exact + primary-swap seam + +Chosen implementation rule: + +1. when restart reconciliation promotes a new primary, explicit + `needs_rebuild`, `publish_healthy`, `volume_mode`, and `volume_mode_reason` + must come from the winning heartbeat +2. if the winning heartbeat is sparse, clear old explicit truth rather than + preserving it across ownership change +3. recompute outward surfaces immediately after primary swap + +--- + +#### `16V` Delivery Note Rev 1 + +Date: 2026-04-04 +Scope: restart primary-swap explicit-truth rebase + +What changed: + +1. `weed/server/master_block_registry.go` + - factored explicit primary-truth consume into a helper and used it both for + existing-entry retention and restart primary-swap rebase + - primary-swap reconciliation now clears stale old-primary explicit truth + when the winning heartbeat omits those fields + - primary-swap reconciliation now recomputes outward aggregate state + immediately after demotion/promotion +2. `weed/server/master_block_registry_test.go` + - added focused proofs that higher-epoch promotion rebases explicit truth to + the winning heartbeat and that sparse winning heartbeats clear stale + old-primary truth +3. `sw-block/.private/phase/phase-16.md` + - recorded `16V` as delivered and folded it into the checkpoint package + +Proof / evidence: + +1. `go test ./weed/server -count=1 -timeout 180s -run "Test(MasterRestart_(HigherEpochWins|HigherEpochRebasesExplicitPrimaryTruth|HigherEpochSparsePrimaryClearsOldExplicitTruth|LowerEpochBecomesReplica|SameEpoch_HigherLSNWins|SameEpoch_SameLSN_ExistingWins|SameEpoch_RoleTrusted)|Registry_UpdateFullHeartbeat_(AutoRegisterPreservesExplicitPrimaryTruthOnRestart|MissingFieldsPreserveAcceptedExplicitPrimaryTruth|ConsumesExplicitVolumeModeFromPrimaryHeartbeat|VolumeModeFallsBackWhenFieldAbsent))"` +2. result: `PASS` + +Conclusion: + +1. explicit primary truth is now bound to the winning primary after restart + reconciliation rather than being retained across ownership change +2. this closes one bounded restart seam without widening into general restart + policy or failover redesign + +--- + +#### `16W` Start Note Rev 1 + +Date: 2026-04-04 +Scope: replica-side explicit readiness retention parity + +Why this slice exists: + +1. `16M` made replica readiness explicit on the heartbeat/master seam +2. but replica consume still fell back to address heuristics whenever a later + heartbeat omitted `ReplicaReady`, even after explicit truth had already been + accepted +3. the next bounded closure step is to mirror the primary-side missing-field + retention discipline for this one replica truth + +Chosen implementation rule: + +1. once an existing replica entry has accepted explicit `ReplicaReady`, preserve + that truth across sparse later heartbeats +2. keep fresh-entry behavior backward-compatible: absent `ReplicaReady` still + falls back to transport addresses until explicit truth has been observed +3. do not widen this slice into broader replica policy redesign + +--- + +#### `16W` Delivery Note Rev 1 + +Date: 2026-04-04 +Scope: replica-side explicit readiness retention parity + +What changed: + +1. `weed/server/master_block_registry.go` + - replica entries now track whether readiness was explicitly carried on a + heartbeat + - existing replica entries preserve accepted explicit readiness on sparse + later heartbeats, while fresh entries still use address fallback +2. `weed/server/master_block_registry_test.go` + - added focused proofs for accepted explicit-readiness retention and + fresh-entry backward-compatible fallback +3. `sw-block/.private/phase/phase-16.md` + - recorded `16W` as delivered and folded it into the checkpoint package + +Proof / evidence: + +1. `go test ./weed/server -count=1 -timeout 180s -run "TestRegistry_UpdateFullHeartbeat_(ConsumesCoreInfluencedReplicaReady|ReplicaReadyFallsBackToAddressesWhenFieldAbsent|ReplicaReadyMissingFieldPreservesAcceptedExplicitTruth|ReplicaReadyMissingFieldFreshEntryStillFallsBack)"` +2. result: `PASS` + +Conclusion: + +1. replica-side explicit readiness now follows the same bounded sparse-heartbeat + retention discipline as the primary side +2. backward compatibility remains preserved because fresh entries still fall + back to address-shaped readiness when no explicit field has yet been observed + +--- + +#### `Phase 16` Finish-Line Checkpoint Package Rev 1 + +Date: 2026-04-04 +Scope: bounded runtime checkpoint through `16W` + +What changed: + +1. `sw-block/.private/phase/phase-16.md` + - updated the `Phase 16` checkpoint to include `16U-16W` + - added accepted claim set, explicit non-claims, residual gaps, and exact + proof commands +2. `sw-block/.private/phase/phase-16-finish-review.md` + - created a structured review artifact for the bounded finish-line checkpoint +3. `sw-block/design/v2-protocol-claim-and-evidence.md` + - recorded the bounded runtime-checkpoint claim and its evidence anchor + +Proof / evidence: + +1. `go test ./weed/storage/blockvol -count=1 -run "TestInfoMessage_(ReplicaReady|NeedsRebuild|PublishHealthy|VolumeMode|VolumeModeReason)"` +2. `go test ./weed/server -count=1 -timeout 180s -run "Test(Registry_UpdateFullHeartbeat_(ConsumesCoreInfluencedReplicaReady|ReplicaReadyFallsBackToAddressesWhenFieldAbsent|ReplicaReadyMissingFieldPreservesAcceptedExplicitTruth|ReplicaReadyMissingFieldFreshEntryStillFallsBack|ConsumesExplicitNeedsRebuildFromPrimaryHeartbeat|NeedsRebuildFallsBackWhenFieldAbsent|ExplicitHealthySuppressesStaleNeedsRebuildHeuristic|ConsumesExplicitPublishHealthyFromPrimaryHeartbeat|ExplicitUnhealthySuppressesStalePublishHealthyHeuristic|ConsumesExplicitVolumeModeFromPrimaryHeartbeat|VolumeModeFallsBackWhenFieldAbsent|AutoRegisterPreservesExplicitPrimaryTruthOnRestart|MissingFieldsPreserveAcceptedExplicitPrimaryTruth|MissingFieldsDoNotInventExplicitTruthOnFreshEntry)|MasterRestart_(HigherEpochWins|HigherEpochRebasesExplicitPrimaryTruth|HigherEpochSparsePrimaryClearsOldExplicitTruth|LowerEpochBecomesReplica|SameEpoch_HigherLSNWins|SameEpoch_SameLSN_ExistingWins|SameEpoch_RoleTrusted)|StartBlockService_ScanFailureEmitsNonAuthoritativeInventory|CollectBlockVolumeHeartbeat_IncludesInventoryAuthority|Registry_UpdateFullHeartbeatWithInventoryAuthority_(NonAuthoritativeEmptyDoesNotDelete|AuthoritativeEmptyStillDeletes)|P11P3_HeartbeatReconstruction|P12P1_Restart_SameLineage)"` +3. `go test ./weed/server -count=1 -timeout 180s -run "Test(EntryToVolumeInfo_(ReflectsCoreInfluencedReadyConsume|ReflectsCoreInfluencedDegradedConsume)|BlockVolume(Get|List)Handler_ReflectsCoreInfluencedDegradedConsume|Master_ExpandCoordinated_B10_HeartbeatDoesNotDeleteDuringExpand|QA_Reg_FullHeartbeatEmptyServer)"` +4. result: `PASS` + +Conclusion: + +1. the visible bounded heartbeat/restart truth-closure seams through `16W` are + now packaged as one reviewable checkpoint +2. broader recovery-loop, failover/publication, and long-window disturbance + behavior remain explicit residuals rather than hidden claims diff --git a/sw-block/.private/phase/phase-16.md b/sw-block/.private/phase/phase-16.md index 3dcc0012e..2305e3e98 100644 --- a/sw-block/.private/phase/phase-16.md +++ b/sw-block/.private/phase/phase-16.md @@ -769,6 +769,265 @@ Evidence: 1. focused working-tree change after `16P` closeout +### `16R`: Restart Auto-Register Truth Preservation + +Goal: + +1. close one bounded restart/disturbance seam by preserving explicit primary + heartbeat truth when the master reconstructs a volume entry from a fresh + registry after restart +2. keep the slice limited to master-side auto-register consume of already + explicit heartbeat truth, not broader restart or failover closure + +Acceptance object: + +1. the master restart auto-register path preserves explicit primary heartbeat + `needs_rebuild`, `publish_healthy`, `volume_mode`, and `volume_mode_reason` + truth on the new registry entry +2. outward volume-info surfaces preserve that explicit truth after restart + reconstruction instead of collapsing back to heuristic defaults +3. focused proofs show fresh-registry heartbeat reconstruction preserves the + bounded explicit primary truth +4. this slice still does not yet claim broad restart/disturbance or launch + closure + +Current chosen path: + +1. widen `UpdateFullHeartbeat` auto-register so fresh-registry reconstruction + consumes the same explicit primary truth that existing-entry consume already + prefers +2. prove restart reconstruction preserves outward mode/reason semantics on the + bounded path + +Status: + +1. delivered + +Delivered result: + +1. master restart auto-register now preserves explicit primary heartbeat + `needs_rebuild`, `publish_healthy`, `volume_mode`, and + `volume_mode_reason` truth +2. fresh-registry reconstruction no longer drops those explicit fields and then + falls back immediately to older heuristics +3. outward volume-info surfaces now preserve explicit mode/reason truth after + bounded restart reconstruction + +Evidence: + +1. focused working-tree change after `16Q` closeout + +### `16S`: Missing-Field Truth Retention + +Goal: + +1. close one bounded disturbance/compat seam by ensuring a primary heartbeat + that omits newer explicit fields does not erase already accepted explicit + truth on the master side +2. keep the slice limited to missing-field retention for bounded primary + heartbeat truth, not entry deletion or broad restart policy + +Acceptance object: + +1. once explicit primary heartbeat `needs_rebuild`, `publish_healthy`, + `volume_mode`, or `volume_mode_reason` truth has been accepted on an existing + entry, a later heartbeat that omits those fields does not clear it +2. fresh entries with absent fields still keep backward-compatible fallback + behavior +3. focused proofs show missing-field heartbeats preserve accepted explicit truth + on the bounded primary path +4. this slice still does not yet claim broad restart/disturbance or launch + closure + +Current chosen path: + +1. make existing-entry primary heartbeat consume preserve prior explicit truth + when the corresponding field is absent +2. keep auto-register/fresh-entry behavior unchanged so absent fields still mean + no explicit truth on first observation + +Status: + +1. delivered + +Delivered result: + +1. existing-entry primary consume now preserves already accepted explicit + `needs_rebuild`, `publish_healthy`, `volume_mode`, and + `volume_mode_reason` truth when a later heartbeat omits those fields +2. fresh-entry auto-register behavior remains backward-compatible: absent fields + still do not invent explicit truth on first observation +3. bounded outward mode/reason surfaces no longer regress merely because a + later heartbeat is field-sparse + +Evidence: + +1. focused working-tree change after `16R` closeout + +### `16T`: Authoritative Empty-Inventory Guard + +Goal: + +1. close one bounded disturbance seam by separating authoritative empty block + inventory from non-authoritative empty heartbeat observation on the + master/volume-server seam +2. keep the slice limited to stale-delete eligibility for full block heartbeat + reconciliation, not broader restart policy + +Acceptance object: + +1. full block heartbeat carries an additive explicit signal for whether the + reported block inventory is authoritative +2. master stale-delete on full heartbeat runs only when the block inventory is + authoritative +3. bounded proofs show non-authoritative empty full heartbeat does not delete an + existing primary entry, while authoritative empty full heartbeat retains the + old delete behavior +4. this slice still does not yet claim broad restart/disturbance or launch + closure + +Current chosen path: + +1. widen `Heartbeat` with an additive `block_volume_inventory_authoritative` + field +2. keep regular and shutdown-originated empty block inventory heartbeats + authoritative on the current chosen path +3. make master full-heartbeat stale cleanup conditional on that explicit signal + +Status: + +1. delivered + +Delivered result: + +1. full block heartbeat now carries an explicit + `block_volume_inventory_authoritative` signal +2. master full-heartbeat stale cleanup now runs only when block inventory is + authoritative +3. current chosen-path regular and shutdown-originated empty block inventory + heartbeats remain authoritative, while non-authoritative empty inventory can + now be preserved without deleting entries + +Evidence: + +1. focused working-tree change after `16S` closeout + +### `16U`: Real Non-Authoritative Inventory Path + +Goal: + +1. close one bounded runtime seam by making the new + `block_volume_inventory_authoritative` signal true on a real sender-side + path, not only as an unused wire capability +2. keep the slice limited to one truthful sender-side non-authoritative path, + not broad restart policy + +Acceptance object: + +1. one real block-service startup path emits + `block_volume_inventory_authoritative=false` +2. focused proofs show that sender-side path and the master-side authoritative + guard compose correctly +3. this slice still does not yet claim broad restart-window policy + +Current chosen path: + +1. treat startup block-inventory scan failure as a truthful + non-authoritative-inventory condition +2. emit that condition on the ordinary full block heartbeat path + +Status: + +1. delivered + +Delivered result: + +1. startup scan failure now yields a real sender-side + non-authoritative block inventory heartbeat +2. the new authority bit is no longer only a wire capability; it now carries one + real bounded runtime meaning + +Evidence: + +1. focused working-tree change after `16T` closeout + +### `16V`: Restart Primary-Truth Rebase + +Goal: + +1. close one bounded restart seam by ensuring explicit primary truth is rebased + to the winning primary during restart reconciliation +2. keep the slice limited to primary-swap explicit truth handling, not broad + restart policy + +Acceptance object: + +1. when restart reconciliation promotes a new primary, explicit + `needs_rebuild`, `publish_healthy`, `volume_mode`, and `volume_mode_reason` + truth reflect the winning primary heartbeat +2. stale explicit truth from the old primary does not survive primary swap +3. sparse winning-primary heartbeats clear old explicit truth instead of + retaining it incorrectly + +Current chosen path: + +1. rebase or clear explicit primary truth during `demoteExistingToReplica` +2. recompute outward surfaces immediately after primary swap + +Status: + +1. delivered + +Delivered result: + +1. restart reconciliation now rebinds explicit primary truth to the winning + primary heartbeat +2. sparse winning-primary heartbeats no longer leave stale old-primary outward + truth attached to the entry + +Evidence: + +1. focused working-tree change after `16T` closeout + +### `16W`: Replica Explicit-Readiness Retention + +Goal: + +1. close one bounded replica-side compat seam by making accepted explicit + `ReplicaReady` truth survive later sparse replica heartbeats +2. keep the slice limited to replica readiness presence/retention, not broad + replica policy + +Acceptance object: + +1. once explicit replica readiness is accepted for an existing replica entry, a + later sparse heartbeat does not revert it silently to address heuristics +2. fresh replica observations without explicit readiness still keep the old + backward-compatible address fallback +3. focused proofs show replica-side readiness now follows the same bounded + explicit-truth discipline as the primary side + +Current chosen path: + +1. track whether a replica entry has accepted explicit readiness +2. preserve that truth on sparse heartbeats only for existing replica entries +3. keep fresh-entry fallback unchanged + +Status: + +1. delivered + +Delivered result: + +1. existing replica entries now preserve accepted explicit `ReplicaReady` truth + across sparse heartbeats +2. fresh replica entries still use address fallback when no explicit readiness + has yet been observed + +Evidence: + +1. focused working-tree change after `16T` closeout + ## Current Checkpoint Review Target The current review target is the current widened bounded runtime checkpoint @@ -861,11 +1120,68 @@ boundary: - heartbeat/master/API path now preserves explicit bounded `VolumeModeReason` truth instead of dropping outward mode reasons at the master boundary +19. `16R` delivered: + - master restart auto-register consume now preserves the same explicit + primary heartbeat truth as the steady-state consume path +20. `16S` delivered: + - steady-state primary consume now preserves already accepted explicit + primary truth when later heartbeats omit those fields +21. `16T` delivered: + - full heartbeat stale-delete now depends on an explicit block-inventory + authoritative signal instead of empty inventory alone +22. `16U` delivered: + - one real sender-side startup path now emits non-authoritative empty block + inventory rather than leaving the new authority bit as wire-only +23. `16V` delivered: + - restart primary swap now rebases explicit primary truth to the winning + heartbeat instead of retaining stale old-primary truth +24. `16W` delivered: + - replica-side accepted explicit readiness now survives sparse heartbeats + without inventing explicit truth on fresh entries After this checkpoint: 1. keep `legacy P4` only as a compatibility guard -2. continue closing broader recovery-loop, publication, and disturbance seams - one bounded step at a time after outward reason preservation +2. stop the current bounded runtime closure package here unless a clearly + smaller seam remains visible than the residual gaps below 3. do not yet claim full recovery-loop closure 4. do not broaden into launch claims + +## Accepted Claim Set + +At this checkpoint the bounded chosen path may now claim: + +1. steady-state and restart reconstruction preserve accepted explicit primary + heartbeat truth on the current heartbeat/master/API path +2. sparse primary and replica heartbeats no longer silently erase already + accepted explicit truth on existing entries +3. empty full block inventory delete behavior is explicit rather than inferred + from emptiness alone +4. these claims remain bounded to the current chosen path and do not imply broad + runtime/product closure + +## Explicit Non-Claims + +This checkpoint still does NOT claim: + +1. broad recovery-loop closure across all lifecycle branches +2. broad end-to-end failover/recovery/publication proof +3. full restart-window policy for all empty-inventory / not-yet-loaded cases +4. broad multi-replica startup ownership beyond the bounded proven path +5. launch / rollout readiness + +## Residual Gaps After Checkpoint + +The remaining visible gaps are now better treated as residuals unless they can +be cut smaller than the slices above: + +1. broader recovery-loop closure +2. broader failover/publication whole-chain statement +3. long-window restart/disturbance policy and soak hardening +4. launch-envelope freeze and rollout gates + +## Checkpoint Proof Commands + +1. `go test ./weed/storage/blockvol -count=1 -run "TestInfoMessage_(ReplicaReady|NeedsRebuild|PublishHealthy|VolumeMode|VolumeModeReason)"` +2. `go test ./weed/server -count=1 -timeout 180s -run "Test(Registry_UpdateFullHeartbeat_(ConsumesCoreInfluencedReplicaReady|ReplicaReadyFallsBackToAddressesWhenFieldAbsent|ReplicaReadyMissingFieldPreservesAcceptedExplicitTruth|ReplicaReadyMissingFieldFreshEntryStillFallsBack|ConsumesExplicitNeedsRebuildFromPrimaryHeartbeat|NeedsRebuildFallsBackWhenFieldAbsent|ExplicitHealthySuppressesStaleNeedsRebuildHeuristic|ConsumesExplicitPublishHealthyFromPrimaryHeartbeat|ExplicitUnhealthySuppressesStalePublishHealthyHeuristic|ConsumesExplicitVolumeModeFromPrimaryHeartbeat|VolumeModeFallsBackWhenFieldAbsent|AutoRegisterPreservesExplicitPrimaryTruthOnRestart|MissingFieldsPreserveAcceptedExplicitPrimaryTruth|MissingFieldsDoNotInventExplicitTruthOnFreshEntry)|MasterRestart_(HigherEpochWins|HigherEpochRebasesExplicitPrimaryTruth|HigherEpochSparsePrimaryClearsOldExplicitTruth|LowerEpochBecomesReplica|SameEpoch_HigherLSNWins|SameEpoch_SameLSN_ExistingWins|SameEpoch_RoleTrusted)|StartBlockService_ScanFailureEmitsNonAuthoritativeInventory|CollectBlockVolumeHeartbeat_IncludesInventoryAuthority|Registry_UpdateFullHeartbeatWithInventoryAuthority_(NonAuthoritativeEmptyDoesNotDelete|AuthoritativeEmptyStillDeletes)|P11P3_HeartbeatReconstruction|P12P1_Restart_SameLineage)"` +3. `go test ./weed/server -count=1 -timeout 180s -run "Test(EntryToVolumeInfo_(ReflectsCoreInfluencedReadyConsume|ReflectsCoreInfluencedDegradedConsume)|BlockVolume(Get|List)Handler_ReflectsCoreInfluencedDegradedConsume|Master_ExpandCoordinated_B10_HeartbeatDoesNotDeleteDuringExpand|QA_Reg_FullHeartbeatEmptyServer)"` diff --git a/sw-block/design/v2-protocol-claim-and-evidence.md b/sw-block/design/v2-protocol-claim-and-evidence.md index 70555486b..ad74a1737 100644 --- a/sw-block/design/v2-protocol-claim-and-evidence.md +++ b/sw-block/design/v2-protocol-claim-and-evidence.md @@ -108,6 +108,7 @@ These are the claims that may currently be made without overreach. | `C-ADAPTER-CLOSURE` | assignment / readiness / publication closure is explicit on the chosen path | bounded chosen path only; does not imply mode normalization or pure-core extraction | `CP13-8A` proof package | allowed | | `C-CONSTRAINED-V1-RUNTIME` | current integrated checks are evaluating `V1` runtime behavior under `V2` constraints rather than validating a completed `V2 runtime` | current chosen path only, until explicit `V2 core` extraction | `v2_mini_core_design.md`, `Phase 13` docs | allowed | | `C-MODE-NORMALIZATION` | one bounded mode-policy / normalization package is closed on the current constrained chosen path | bounded chosen path only; does not imply pure `V2 core` extraction or broad product policy | `CP13-9` docs/tests | allowed | +| `C-PHASE16-RUNTIME-CHECKPOINT` | the bounded heartbeat/master/API runtime path now preserves accepted explicit truth across the delivered `16M-16W` restart/disturbance seams | bounded chosen path only; excludes broad recovery-loop, broad failover/publication, and launch claims | `sw-block/.private/phase/phase-16-finish-review.md`, `phase-16.md`, focused `weed/server` tests | allowed | | `C-LAUNCH-APPROVAL` | broad product launch readiness | outside current phase | future | not allowed | ## Evidence Map @@ -124,6 +125,7 @@ These are the claims that may currently be made without overreach. | Real-workload package | one bounded workload matrix passes on the corrected chosen path | `CP13-8` scenario/doc | tester validation reports | | Assignment/publication closure | assignment does not imply readiness/publication and corrected wiring refreshes replication truth explicitly | `CP13-8A` code/tests/debug evidence | tester investigation, bug docs | | Mode normalization | one bounded mode set is explicit and surface-consistent on the constrained current path | `CP13-9` contract/doc/tests | tester validation report | +| Runtime truth closure under restart/disturbance | accepted explicit truth survives the delivered bounded `Phase 16` heartbeat/restart seams through `16W` | `phase-16-finish-review.md`, `phase-16.md`, focused restart/heartbeat tests in `weed/server` | `v2-product-completion-overview.md`, `v2-protocol-truths.md` | ## Invalidated Or Narrowed Evidence diff --git a/weed/pb/master.proto b/weed/pb/master.proto index c83b07d71..f293ee869 100644 --- a/weed/pb/master.proto +++ b/weed/pb/master.proto @@ -114,6 +114,7 @@ message Heartbeat { bool has_no_block_volumes = 27; // server-level NVMe/TCP target address (empty if NVMe disabled on this VS) string block_nvme_addr = 28; + optional bool block_volume_inventory_authoritative = 29; } message HeartbeatResponse { diff --git a/weed/pb/master_pb/master.pb.go b/weed/pb/master_pb/master.pb.go index a4a184561..1d017ac2e 100644 --- a/weed/pb/master_pb/master.pb.go +++ b/weed/pb/master_pb/master.pb.go @@ -55,9 +55,10 @@ type Heartbeat struct { DeletedBlockVolumes []*BlockVolumeShortInfoMessage `protobuf:"bytes,26,rep,name=deleted_block_volumes,json=deletedBlockVolumes,proto3" json:"deleted_block_volumes,omitempty"` HasNoBlockVolumes bool `protobuf:"varint,27,opt,name=has_no_block_volumes,json=hasNoBlockVolumes,proto3" json:"has_no_block_volumes,omitempty"` // server-level NVMe/TCP target address (empty if NVMe disabled on this VS) - BlockNvmeAddr string `protobuf:"bytes,28,opt,name=block_nvme_addr,json=blockNvmeAddr,proto3" json:"block_nvme_addr,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + BlockNvmeAddr string `protobuf:"bytes,28,opt,name=block_nvme_addr,json=blockNvmeAddr,proto3" json:"block_nvme_addr,omitempty"` + BlockVolumeInventoryAuthoritative *bool `protobuf:"varint,29,opt,name=block_volume_inventory_authoritative,json=blockVolumeInventoryAuthoritative,proto3,oneof" json:"block_volume_inventory_authoritative,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *Heartbeat) Reset() { @@ -265,6 +266,13 @@ func (x *Heartbeat) GetBlockNvmeAddr() string { return "" } +func (x *Heartbeat) GetBlockVolumeInventoryAuthoritative() bool { + if x != nil && x.BlockVolumeInventoryAuthoritative != nil { + return *x.BlockVolumeInventoryAuthoritative + } + return false +} + type HeartbeatResponse struct { state protoimpl.MessageState `protogen:"open.v1"` VolumeSizeLimit uint64 `protobuf:"varint,1,opt,name=volume_size_limit,json=volumeSizeLimit,proto3" json:"volume_size_limit,omitempty"` @@ -5631,8 +5639,7 @@ var File_master_proto protoreflect.FileDescriptor const file_master_proto_rawDesc = "" + "\n" + - "\fmaster.proto\x12\tmaster_pb\x1a\x13volume_server.proto\"\xe5\n" + - "\n" + + "\fmaster.proto\x12\tmaster_pb\x1a\x13volume_server.proto\"\xe4\v\n" + "\tHeartbeat\x12\x0e\n" + "\x02ip\x18\x01 \x01(\tR\x02ip\x12\x12\n" + "\x04port\x18\x02 \x01(\rR\x04port\x12\x1d\n" + @@ -5664,10 +5671,12 @@ const file_master_proto_rawDesc = "" + "\x11new_block_volumes\x18\x19 \x03(\v2&.master_pb.BlockVolumeShortInfoMessageR\x0fnewBlockVolumes\x12Z\n" + "\x15deleted_block_volumes\x18\x1a \x03(\v2&.master_pb.BlockVolumeShortInfoMessageR\x13deletedBlockVolumes\x12/\n" + "\x14has_no_block_volumes\x18\x1b \x01(\bR\x11hasNoBlockVolumes\x12&\n" + - "\x0fblock_nvme_addr\x18\x1c \x01(\tR\rblockNvmeAddr\x1aB\n" + + "\x0fblock_nvme_addr\x18\x1c \x01(\tR\rblockNvmeAddr\x12T\n" + + "$block_volume_inventory_authoritative\x18\x1d \x01(\bH\x00R!blockVolumeInventoryAuthoritative\x88\x01\x01\x1aB\n" + "\x14MaxVolumeCountsEntry\x12\x10\n" + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + - "\x05value\x18\x02 \x01(\rR\x05value:\x028\x01\"\xa9\x03\n" + + "\x05value\x18\x02 \x01(\rR\x05value:\x028\x01B'\n" + + "%_block_volume_inventory_authoritative\"\xa9\x03\n" + "\x11HeartbeatResponse\x12*\n" + "\x11volume_size_limit\x18\x01 \x01(\x04R\x0fvolumeSizeLimit\x12\x16\n" + "\x06leader\x18\x02 \x01(\tR\x06leader\x12'\n" + @@ -6398,6 +6407,7 @@ func file_master_proto_init() { if File_master_proto != nil { return } + file_master_proto_msgTypes[0].OneofWrappers = []any{} file_master_proto_msgTypes[61].OneofWrappers = []any{} type x struct{} out := protoimpl.TypeBuilder{ diff --git a/weed/server/master_block_registry.go b/weed/server/master_block_registry.go index 84893dcba..42865e2c2 100644 --- a/weed/server/master_block_registry.go +++ b/weed/server/master_block_registry.go @@ -24,20 +24,21 @@ const ( // ReplicaInfo tracks one replica of a block volume (CP8-2). type ReplicaInfo struct { - Server string // replica VS address - Path string // file path on replica VS - ISCSIAddr string // iSCSI target address - IQN string // iSCSI qualified name - NvmeAddr string // NVMe/TCP target address (ip:port), empty if NVMe disabled - NQN string // NVMe subsystem NQN, empty if NVMe disabled - DataAddr string // WAL receiver data listen addr - CtrlAddr string // WAL receiver ctrl listen addr - Ready bool // receiver/publish readiness confirmed by replica heartbeat - HealthScore float64 // from heartbeat (0.0-1.0) - WALHeadLSN uint64 // from heartbeat - WALLag uint64 // computed: primary.WALHeadLSN - replica.WALHeadLSN - LastHeartbeat time.Time // last heartbeat received from this replica - Role uint32 // replica role (RoleReplica, RoleRebuilding, etc.) + Server string // replica VS address + Path string // file path on replica VS + ISCSIAddr string // iSCSI target address + IQN string // iSCSI qualified name + NvmeAddr string // NVMe/TCP target address (ip:port), empty if NVMe disabled + NQN string // NVMe subsystem NQN, empty if NVMe disabled + DataAddr string // WAL receiver data listen addr + CtrlAddr string // WAL receiver ctrl listen addr + Ready bool // receiver/publish readiness confirmed by replica heartbeat + HasExplicitReady bool // whether replica readiness was carried explicitly on heartbeat + HealthScore float64 // from heartbeat (0.0-1.0) + WALHeadLSN uint64 // from heartbeat + WALLag uint64 // computed: primary.WALHeadLSN - replica.WALHeadLSN + LastHeartbeat time.Time // last heartbeat received from this replica + Role uint32 // replica role (RoleReplica, RoleRebuilding, etc.) } const ( @@ -485,6 +486,10 @@ type HeartbeatResult struct { } func (r *BlockVolumeRegistry) UpdateFullHeartbeat(server string, infos []*master_pb.BlockVolumeInfoMessage, nvmeAddr string) HeartbeatResult { + return r.UpdateFullHeartbeatWithInventoryAuthority(server, infos, nvmeAddr, true) +} + +func (r *BlockVolumeRegistry) UpdateFullHeartbeatWithInventoryAuthority(server string, infos []*master_pb.BlockVolumeInfoMessage, nvmeAddr string, blockInventoryAuthoritative bool) HeartbeatResult { var result HeartbeatResult r.mu.Lock() defer r.mu.Unlock() @@ -499,44 +504,46 @@ func (r *BlockVolumeRegistry) UpdateFullHeartbeat(server string, infos []*master } // Find entries for this server that are NOT reported -> reconcile. - if names, ok := r.byServer[server]; ok { - for name := range names { - entry := r.volumes[name] - if entry == nil { - continue - } - if entry.VolumeServer == server { - // Server is the primary: check if primary path is reported. - if _, found := reported[entry.Path]; !found { - // B-10: Do not delete entries with a coordinated expand in flight. - // The primary may have restarted mid-expand; deleting the entry - // would orphan the volume and strand the expand coordinator. - if entry.ExpandInProgress { - glog.Warningf("block registry: skipping stale-cleanup for %q (ExpandInProgress=true, server=%s)", - name, server) - continue - } - delete(r.volumes, name) - delete(names, name) - // Also clean up replica entries from byServer. - for _, ri := range entry.Replicas { - r.removeFromServer(ri.Server, name) - } - } - } else { - // Server is a replica: check if replica path is reported. - ri := entry.ReplicaByServer(server) - if ri == nil { - // No replica record — stale byServer index, just clean up. - delete(names, name) + if blockInventoryAuthoritative { + if names, ok := r.byServer[server]; ok { + for name := range names { + entry := r.volumes[name] + if entry == nil { continue } - if _, found := reported[ri.Path]; !found { - // Replica path not reported — remove this replica, NOT the whole volume. - r.removeReplicaLocked(entry, server, name) - delete(names, name) - glog.V(0).Infof("block registry: removed stale replica %s for %q (path %s not in heartbeat)", - server, name, ri.Path) + if entry.VolumeServer == server { + // Server is the primary: check if primary path is reported. + if _, found := reported[entry.Path]; !found { + // B-10: Do not delete entries with a coordinated expand in flight. + // The primary may have restarted mid-expand; deleting the entry + // would orphan the volume and strand the expand coordinator. + if entry.ExpandInProgress { + glog.Warningf("block registry: skipping stale-cleanup for %q (ExpandInProgress=true, server=%s)", + name, server) + continue + } + delete(r.volumes, name) + delete(names, name) + // Also clean up replica entries from byServer. + for _, ri := range entry.Replicas { + r.removeFromServer(ri.Server, name) + } + } + } else { + // Server is a replica: check if replica path is reported. + ri := entry.ReplicaByServer(server) + if ri == nil { + // No replica record — stale byServer index, just clean up. + delete(names, name) + continue + } + if _, found := reported[ri.Path]; !found { + // Replica path not reported — remove this replica, NOT the whole volume. + r.removeReplicaLocked(entry, server, name) + delete(names, name) + glog.V(0).Infof("block registry: removed stale replica %s for %q (path %s not in heartbeat)", + server, name, ri.Path) + } } } } @@ -606,20 +613,32 @@ func (r *BlockVolumeRegistry) UpdateFullHeartbeat(server string, infos []*master existing, dup := r.volumes[name] if !dup { entry := &BlockVolumeEntry{ - Name: name, - VolumeServer: server, - Path: info.Path, - SizeBytes: info.VolumeSize, - Epoch: info.Epoch, - Role: info.Role, - Status: StatusActive, - LastLeaseGrant: time.Now(), - LeaseTTL: 30 * time.Second, - HealthScore: info.HealthScore, - TransportDegraded: info.ReplicaDegraded, - WALHeadLSN: info.WalHeadLsn, - DurabilityMode: info.DurabilityMode, + Name: name, + VolumeServer: server, + Path: info.Path, + SizeBytes: info.VolumeSize, + Epoch: info.Epoch, + Role: info.Role, + Status: StatusActive, + LastLeaseGrant: time.Now(), + LeaseTTL: 30 * time.Second, + HealthScore: info.HealthScore, + TransportDegraded: info.ReplicaDegraded, + NeedsRebuild: false, + HasNeedsRebuild: false, + PublishHealthy: false, + HasPublishHealthy: false, + HeartbeatVolumeMode: "", + HasHeartbeatVolumeMode: false, + HeartbeatVolumeReason: "", + HasHeartbeatVolumeReason: false, + WALHeadLSN: info.WalHeadLsn, + DurabilityMode: info.DurabilityMode, } + entry.NeedsRebuild, entry.HasNeedsRebuild = primaryNeedsRebuildObservedFromHeartbeat(info) + entry.PublishHealthy, entry.HasPublishHealthy = primaryPublishHealthyObservedFromHeartbeat(info) + entry.HeartbeatVolumeMode, entry.HasHeartbeatVolumeMode = primaryVolumeModeObservedFromHeartbeat(info) + entry.HeartbeatVolumeReason, entry.HasHeartbeatVolumeReason = primaryVolumeReasonObservedFromHeartbeat(info) if info.ReplicaDataAddr != "" { entry.ReplicaDataAddr = info.ReplicaDataAddr } @@ -662,10 +681,7 @@ func (r *BlockVolumeRegistry) applyPrimaryHeartbeatObservation(existing *BlockVo existing.LastLeaseGrant = time.Now() existing.HealthScore = info.HealthScore existing.TransportDegraded = info.ReplicaDegraded - existing.NeedsRebuild, existing.HasNeedsRebuild = primaryNeedsRebuildObservedFromHeartbeat(info) - existing.PublishHealthy, existing.HasPublishHealthy = primaryPublishHealthyObservedFromHeartbeat(info) - existing.HeartbeatVolumeMode, existing.HasHeartbeatVolumeMode = primaryVolumeModeObservedFromHeartbeat(info) - existing.HeartbeatVolumeReason, existing.HasHeartbeatVolumeReason = primaryVolumeReasonObservedFromHeartbeat(info) + applyExplicitPrimaryTruthFromHeartbeat(existing, info, true) existing.WALHeadLSN = info.WalHeadLsn // F3: only update DurabilityMode when non-empty (prevents older VS from clearing strict mode). if info.DurabilityMode != "" { @@ -707,7 +723,7 @@ func (r *BlockVolumeRegistry) applyReplicaHeartbeatObservation(existing *BlockVo existing.Replicas[i].Role = blockvol.RoleToWire(blockvol.RoleReplica) existing.Replicas[i].NvmeAddr = info.NvmeAddr existing.Replicas[i].NQN = info.Nqn - existing.Replicas[i].Ready = replicaReadyObservedFromHeartbeat(info) + applyReplicaReadyFromHeartbeat(&existing.Replicas[i], info, true) if existing.WALHeadLSN > info.WalHeadLsn { existing.Replicas[i].WALLag = existing.WALHeadLSN - info.WalHeadLsn } else { @@ -743,6 +759,22 @@ func (r *BlockVolumeRegistry) applyReplicaHeartbeatObservation(existing *BlockVo existing.recomputeReplicaState() } +func applyReplicaReadyFromHeartbeat(replica *ReplicaInfo, info *master_pb.BlockVolumeInfoMessage, preserveWhenAbsent bool) { + if replica == nil || info == nil { + return + } + if info.ReplicaReady != nil { + replica.Ready = info.GetReplicaReady() + replica.HasExplicitReady = true + return + } + if preserveWhenAbsent && replica.HasExplicitReady { + return + } + replica.Ready = info.ReplicaDataAddr != "" && info.ReplicaCtrlAddr != "" + replica.HasExplicitReady = false +} + func replicaReadyObservedFromHeartbeat(info *master_pb.BlockVolumeInfoMessage) bool { if info == nil { return false @@ -793,6 +825,40 @@ func primaryVolumeReasonObservedFromHeartbeat(info *master_pb.BlockVolumeInfoMes return "", false } +func applyExplicitPrimaryTruthFromHeartbeat(existing *BlockVolumeEntry, info *master_pb.BlockVolumeInfoMessage, preserveWhenAbsent bool) { + if existing == nil || info == nil { + return + } + if needsRebuild, ok := primaryNeedsRebuildObservedFromHeartbeat(info); ok { + existing.NeedsRebuild = needsRebuild + existing.HasNeedsRebuild = true + } else if !preserveWhenAbsent { + existing.NeedsRebuild = false + existing.HasNeedsRebuild = false + } + if publishHealthy, ok := primaryPublishHealthyObservedFromHeartbeat(info); ok { + existing.PublishHealthy = publishHealthy + existing.HasPublishHealthy = true + } else if !preserveWhenAbsent { + existing.PublishHealthy = false + existing.HasPublishHealthy = false + } + if mode, ok := primaryVolumeModeObservedFromHeartbeat(info); ok { + existing.HeartbeatVolumeMode = mode + existing.HasHeartbeatVolumeMode = true + } else if !preserveWhenAbsent { + existing.HeartbeatVolumeMode = "" + existing.HasHeartbeatVolumeMode = false + } + if reason, ok := primaryVolumeReasonObservedFromHeartbeat(info); ok { + existing.HeartbeatVolumeReason = reason + existing.HasHeartbeatVolumeReason = true + } else if !preserveWhenAbsent { + existing.HeartbeatVolumeReason = "" + existing.HasHeartbeatVolumeReason = false + } +} + func validHeartbeatVolumeMode(mode string) bool { switch mode { case "allocated_only", "bootstrap_pending", "publish_healthy", "degraded", "needs_rebuild": @@ -907,6 +973,7 @@ func (r *BlockVolumeRegistry) demoteExistingToReplica(name string, existing *Blo } existing.NvmeAddr = info.NvmeAddr existing.NQN = info.Nqn + applyExplicitPrimaryTruthFromHeartbeat(existing, info, false) // Add old primary as replica. existing.Replicas = append(existing.Replicas, oldReplica) @@ -917,6 +984,7 @@ func (r *BlockVolumeRegistry) demoteExistingToReplica(name string, existing *Blo existing.ReplicaServer = oldReplica.Server existing.ReplicaPath = oldReplica.Path } + existing.recomputeReplicaState() } // upsertServerAsReplica adds or updates the server as a replica for the existing entry. @@ -944,6 +1012,7 @@ func (r *BlockVolumeRegistry) upsertServerAsReplica(name string, existing *Block existing.Replicas[i].Role = replicaRole existing.Replicas[i].NvmeAddr = info.NvmeAddr existing.Replicas[i].NQN = info.Nqn + applyReplicaReadyFromHeartbeat(&existing.Replicas[i], info, true) return } } @@ -962,6 +1031,7 @@ func (r *BlockVolumeRegistry) upsertServerAsReplica(name string, existing *Block NvmeAddr: info.NvmeAddr, NQN: info.Nqn, } + applyReplicaReadyFromHeartbeat(&ri, info, false) existing.Replicas = append(existing.Replicas, ri) r.addToServer(newServer, name) if len(existing.Replicas) == 1 { diff --git a/weed/server/master_block_registry_test.go b/weed/server/master_block_registry_test.go index bf75d3ba3..f3b3a2f15 100644 --- a/weed/server/master_block_registry_test.go +++ b/weed/server/master_block_registry_test.go @@ -114,6 +114,28 @@ func TestRegistry_UpdateFullHeartbeat(t *testing.T) { } } +func TestRegistry_UpdateFullHeartbeatWithInventoryAuthority_NonAuthoritativeEmptyDoesNotDelete(t *testing.T) { + r := NewBlockVolumeRegistry() + r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/v1.blk", Status: StatusActive}) + + r.UpdateFullHeartbeatWithInventoryAuthority("s1", nil, "", false) + + if _, ok := r.Lookup("vol1"); !ok { + t.Fatal("non-authoritative empty full heartbeat should not delete vol1") + } +} + +func TestRegistry_UpdateFullHeartbeatWithInventoryAuthority_AuthoritativeEmptyStillDeletes(t *testing.T) { + r := NewBlockVolumeRegistry() + r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/v1.blk", Status: StatusActive}) + + r.UpdateFullHeartbeatWithInventoryAuthority("s1", nil, "", true) + + if _, ok := r.Lookup("vol1"); ok { + t.Fatal("authoritative empty full heartbeat should still delete vol1") + } +} + func TestRegistry_UpdateDeltaHeartbeat(t *testing.T) { r := NewBlockVolumeRegistry() r.Register(&BlockVolumeEntry{Name: "vol1", VolumeServer: "s1", Path: "/v1.blk", Status: StatusPending}) @@ -1663,6 +1685,110 @@ func TestMasterRestart_HigherEpochWins(t *testing.T) { } } +func TestMasterRestart_HigherEpochRebasesExplicitPrimaryTruth(t *testing.T) { + r := NewBlockVolumeRegistry() + + oldNeedsRebuild := false + oldPublishHealthy := true + oldMode := "publish_healthy" + oldReason := "" + r.UpdateFullHeartbeat("vs1:9333", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/data/vol1.blk", + Epoch: 5, + Role: blockvol.RoleToWire(blockvol.RolePrimary), + WalHeadLsn: 100, + PublishHealthy: &oldPublishHealthy, + NeedsRebuild: &oldNeedsRebuild, + VolumeMode: &oldMode, + VolumeModeReason: &oldReason, + VolumeSize: 1 << 30, + }}, "") + + newNeedsRebuild := true + newPublishHealthy := false + newMode := "needs_rebuild" + newReason := "gap_too_large" + r.UpdateFullHeartbeat("vs2:9333", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/data/vol1.blk", + Epoch: 6, + Role: blockvol.RoleToWire(blockvol.RolePrimary), + WalHeadLsn: 150, + PublishHealthy: &newPublishHealthy, + NeedsRebuild: &newNeedsRebuild, + VolumeMode: &newMode, + VolumeModeReason: &newReason, + VolumeSize: 1 << 30, + }}, "") + + entry, _ := r.Lookup("vol1") + if entry.VolumeServer != "vs2:9333" { + t.Fatalf("expected vs2 as primary after rebase, got %q", entry.VolumeServer) + } + if !entry.HasNeedsRebuild || !entry.NeedsRebuild { + t.Fatalf("expected new primary explicit needs_rebuild truth, entry=%+v", entry) + } + if !entry.HasPublishHealthy || entry.PublishHealthy { + t.Fatalf("expected new primary explicit false publish_healthy truth, entry=%+v", entry) + } + if !entry.HasHeartbeatVolumeMode || entry.HeartbeatVolumeMode != "needs_rebuild" { + t.Fatalf("expected new primary explicit volume_mode truth, entry=%+v", entry) + } + if !entry.HasHeartbeatVolumeReason || entry.HeartbeatVolumeReason != "gap_too_large" { + t.Fatalf("expected new primary explicit volume_mode_reason truth, entry=%+v", entry) + } + if entry.VolumeMode != "needs_rebuild" { + t.Fatalf("expected outward mode to follow winning primary truth, got %q", entry.VolumeMode) + } +} + +func TestMasterRestart_HigherEpochSparsePrimaryClearsOldExplicitTruth(t *testing.T) { + r := NewBlockVolumeRegistry() + + oldNeedsRebuild := false + oldPublishHealthy := true + oldMode := "publish_healthy" + oldReason := "" + r.UpdateFullHeartbeat("vs1:9333", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/data/vol1.blk", + Epoch: 5, + Role: blockvol.RoleToWire(blockvol.RolePrimary), + WalHeadLsn: 100, + PublishHealthy: &oldPublishHealthy, + NeedsRebuild: &oldNeedsRebuild, + VolumeMode: &oldMode, + VolumeModeReason: &oldReason, + VolumeSize: 1 << 30, + }}, "") + + r.UpdateFullHeartbeat("vs2:9333", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/data/vol1.blk", + Epoch: 6, + Role: blockvol.RoleToWire(blockvol.RolePrimary), + WalHeadLsn: 150, + VolumeSize: 1 << 30, + }}, "") + + entry, _ := r.Lookup("vol1") + if entry.VolumeServer != "vs2:9333" { + t.Fatalf("expected vs2 as primary after sparse rebase, got %q", entry.VolumeServer) + } + if entry.HasNeedsRebuild || entry.NeedsRebuild { + t.Fatalf("sparse new primary should clear old explicit needs_rebuild truth, entry=%+v", entry) + } + if entry.HasPublishHealthy || entry.PublishHealthy { + t.Fatalf("sparse new primary should clear old explicit publish_healthy truth, entry=%+v", entry) + } + if entry.HasHeartbeatVolumeMode || entry.HeartbeatVolumeMode != "" { + t.Fatalf("sparse new primary should clear old explicit volume_mode truth, entry=%+v", entry) + } + if entry.HasHeartbeatVolumeReason || entry.HeartbeatVolumeReason != "" { + t.Fatalf("sparse new primary should clear old explicit volume_mode_reason truth, entry=%+v", entry) + } + if entry.VolumeMode == "publish_healthy" { + t.Fatalf("sparse new primary should not retain stale publish_healthy mode, entry=%+v", entry) + } +} + func TestMasterRestart_LowerEpochBecomesReplica(t *testing.T) { r := NewBlockVolumeRegistry() @@ -2180,6 +2306,80 @@ func TestRegistry_UpdateFullHeartbeat_ReplicaReadyFallsBackToAddressesWhenFieldA } } +func TestRegistry_UpdateFullHeartbeat_ReplicaReadyMissingFieldPreservesAcceptedExplicitTruth(t *testing.T) { + r := NewBlockVolumeRegistry() + if err := r.Register(&BlockVolumeEntry{ + Name: "vol-master-ready-preserve-explicit", + VolumeServer: "primary-server:8080", + Path: "/blocks/vol-master-ready-preserve-explicit-primary.blk", + Status: StatusActive, + Role: blockvol.RoleToWire(blockvol.RolePrimary), + ReplicaFactor: 2, + Replicas: []ReplicaInfo{{ + Server: "replica-server:8080", + Path: "/blocks/vol-master-ready-preserve-explicit.blk", + }}, + }); err != nil { + t.Fatalf("register: %v", err) + } + + replicaReady := false + r.UpdateFullHeartbeat("replica-server:8080", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/blocks/vol-master-ready-preserve-explicit.blk", + ReplicaDataAddr: "10.0.0.2:4260", + ReplicaCtrlAddr: "10.0.0.2:4261", + ReplicaReady: &replicaReady, + }}, "") + r.UpdateFullHeartbeat("replica-server:8080", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/blocks/vol-master-ready-preserve-explicit.blk", + ReplicaDataAddr: "10.0.0.2:4260", + ReplicaCtrlAddr: "10.0.0.2:4261", + }}, "") + + entry, _ := r.Lookup("vol-master-ready-preserve-explicit") + if !entry.Replicas[0].HasExplicitReady { + t.Fatalf("expected accepted explicit replica readiness to remain marked explicit, entry=%+v", entry) + } + if entry.Replicas[0].Ready { + t.Fatalf("missing-field replica heartbeat should preserve explicit false ready truth, entry=%+v", entry) + } + if entry.ReplicaReady { + t.Fatalf("aggregate replica ready should remain false after sparse heartbeat, entry=%+v", entry) + } +} + +func TestRegistry_UpdateFullHeartbeat_ReplicaReadyMissingFieldFreshEntryStillFallsBack(t *testing.T) { + r := NewBlockVolumeRegistry() + if err := r.Register(&BlockVolumeEntry{ + Name: "vol-master-ready-fresh-fallback", + VolumeServer: "primary-server:8080", + Path: "/blocks/vol-master-ready-fresh-fallback-primary.blk", + Status: StatusActive, + Role: blockvol.RoleToWire(blockvol.RolePrimary), + ReplicaFactor: 2, + Replicas: []ReplicaInfo{{ + Server: "replica-server:8080", + Path: "/blocks/vol-master-ready-fresh-fallback.blk", + }}, + }); err != nil { + t.Fatalf("register: %v", err) + } + + r.UpdateFullHeartbeat("replica-server:8080", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/blocks/vol-master-ready-fresh-fallback.blk", + ReplicaDataAddr: "10.0.0.2:4260", + ReplicaCtrlAddr: "10.0.0.2:4261", + }}, "") + + entry, _ := r.Lookup("vol-master-ready-fresh-fallback") + if entry.Replicas[0].HasExplicitReady { + t.Fatalf("fresh missing-field replica heartbeat should not invent explicit readiness, entry=%+v", entry) + } + if !entry.Replicas[0].Ready || !entry.ReplicaReady { + t.Fatalf("fresh missing-field replica heartbeat should still fall back to addresses, entry=%+v", entry) + } +} + func TestRegistry_UpdateFullHeartbeat_ConsumesExplicitNeedsRebuildFromPrimaryHeartbeat(t *testing.T) { r := NewBlockVolumeRegistry() if err := r.Register(&BlockVolumeEntry{ @@ -2479,3 +2679,175 @@ func TestRegistry_UpdateFullHeartbeat_VolumeModeFallsBackWhenFieldAbsent(t *test t.Fatalf("expected fallback reconstructed degraded mode, got %q", entry.VolumeMode) } } + +func TestRegistry_UpdateFullHeartbeat_AutoRegisterPreservesExplicitPrimaryTruthOnRestart(t *testing.T) { + tests := []struct { + name string + needsRebuild bool + publishHealthy bool + mode string + reason string + wantMode string + wantReason string + }{ + { + name: "publish_healthy", + needsRebuild: false, + publishHealthy: true, + mode: "publish_healthy", + reason: "", + wantMode: "publish_healthy", + wantReason: "", + }, + { + name: "needs_rebuild", + needsRebuild: true, + publishHealthy: false, + mode: "needs_rebuild", + reason: "gap_too_large", + wantMode: "needs_rebuild", + wantReason: "gap_too_large", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := NewBlockVolumeRegistry() + r.MarkBlockCapable("primary-server:8080") + + mode := tt.mode + reason := tt.reason + needsRebuild := tt.needsRebuild + publishHealthy := tt.publishHealthy + r.UpdateFullHeartbeat("primary-server:8080", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/blocks/vol-restart-" + tt.name + ".blk", + VolumeSize: 1 << 30, + BlockSize: 4096, + Epoch: 7, + Role: blockvol.RoleToWire(blockvol.RolePrimary), + NeedsRebuild: &needsRebuild, + PublishHealthy: &publishHealthy, + VolumeMode: &mode, + VolumeModeReason: &reason, + }}, "") + + entry, ok := r.Lookup("vol-restart-" + tt.name) + if !ok { + t.Fatalf("expected auto-registered entry for %q", tt.name) + } + if !entry.HasNeedsRebuild || entry.NeedsRebuild != tt.needsRebuild { + t.Fatalf("needs_rebuild truth lost during auto-register, entry=%+v", entry) + } + if !entry.HasPublishHealthy || entry.PublishHealthy != tt.publishHealthy { + t.Fatalf("publish_healthy truth lost during auto-register, entry=%+v", entry) + } + if !entry.HasHeartbeatVolumeMode || entry.HeartbeatVolumeMode != tt.mode { + t.Fatalf("volume_mode truth lost during auto-register, entry=%+v", entry) + } + if !entry.HasHeartbeatVolumeReason || entry.HeartbeatVolumeReason != tt.reason { + t.Fatalf("volume_mode_reason truth lost during auto-register, entry=%+v", entry) + } + if entry.VolumeMode != tt.wantMode { + t.Fatalf("expected outward volume_mode %q after auto-register, got %q", tt.wantMode, entry.VolumeMode) + } + + info := entryToVolumeInfo(&entry, true) + if info.VolumeMode != tt.wantMode { + t.Fatalf("expected outward API volume_mode %q after auto-register, got %q", tt.wantMode, info.VolumeMode) + } + if info.VolumeModeReason != tt.wantReason { + t.Fatalf("expected outward API volume_mode_reason %q after auto-register, got %q", tt.wantReason, info.VolumeModeReason) + } + }) + } +} + +func TestRegistry_UpdateFullHeartbeat_MissingFieldsPreserveAcceptedExplicitPrimaryTruth(t *testing.T) { + r := NewBlockVolumeRegistry() + if err := r.Register(&BlockVolumeEntry{ + Name: "vol-master-preserve-explicit-truth", + VolumeServer: "primary-server:8080", + Path: "/blocks/vol-master-preserve-explicit-truth-primary.blk", + Status: StatusActive, + Role: blockvol.RoleToWire(blockvol.RolePrimary), + ReplicaFactor: 2, + Replicas: []ReplicaInfo{{ + Server: "replica-server:8080", + Path: "/blocks/vol-master-preserve-explicit-truth-replica.blk", + Ready: true, + Role: blockvol.RoleToWire(blockvol.RoleRebuilding), + }}, + }); err != nil { + t.Fatalf("register: %v", err) + } + + needsRebuild := false + publishHealthy := false + mode := "degraded" + reason := "barrier_timeout" + r.UpdateFullHeartbeat("primary-server:8080", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/blocks/vol-master-preserve-explicit-truth-primary.blk", + Role: blockvol.RoleToWire(blockvol.RolePrimary), + ReplicaDegraded: true, + NeedsRebuild: &needsRebuild, + PublishHealthy: &publishHealthy, + VolumeMode: &mode, + VolumeModeReason: &reason, + }}, "") + + r.UpdateFullHeartbeat("primary-server:8080", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/blocks/vol-master-preserve-explicit-truth-primary.blk", + Role: blockvol.RoleToWire(blockvol.RolePrimary), + ReplicaDegraded: true, + }}, "") + + entry, _ := r.Lookup("vol-master-preserve-explicit-truth") + if !entry.HasNeedsRebuild || entry.NeedsRebuild { + t.Fatalf("missing-field heartbeat should preserve explicit false needs_rebuild truth, entry=%+v", entry) + } + if !entry.HasPublishHealthy || entry.PublishHealthy { + t.Fatalf("missing-field heartbeat should preserve explicit false publish_healthy truth, entry=%+v", entry) + } + if !entry.HasHeartbeatVolumeMode || entry.HeartbeatVolumeMode != "degraded" { + t.Fatalf("missing-field heartbeat should preserve explicit volume_mode truth, entry=%+v", entry) + } + if !entry.HasHeartbeatVolumeReason || entry.HeartbeatVolumeReason != "barrier_timeout" { + t.Fatalf("missing-field heartbeat should preserve explicit volume_mode_reason truth, entry=%+v", entry) + } + if entry.VolumeMode != "degraded" { + t.Fatalf("missing-field heartbeat should preserve outward degraded mode, got %q", entry.VolumeMode) + } + + info := entryToVolumeInfo(&entry, true) + if info.VolumeMode != "degraded" { + t.Fatalf("expected outward API volume_mode=degraded, got %q", info.VolumeMode) + } + if info.VolumeModeReason != "barrier_timeout" { + t.Fatalf("expected outward API volume_mode_reason=barrier_timeout, got %q", info.VolumeModeReason) + } +} + +func TestRegistry_UpdateFullHeartbeat_MissingFieldsDoNotInventExplicitTruthOnFreshEntry(t *testing.T) { + r := NewBlockVolumeRegistry() + r.MarkBlockCapable("primary-server:8080") + + r.UpdateFullHeartbeat("primary-server:8080", []*master_pb.BlockVolumeInfoMessage{{ + Path: "/blocks/vol-master-fresh-fallback-primary.blk", + VolumeSize: 1 << 30, + BlockSize: 4096, + Epoch: 3, + Role: blockvol.RoleToWire(blockvol.RolePrimary), + ReplicaDegraded: true, + }}, "") + + entry, ok := r.Lookup("vol-master-fresh-fallback-primary") + if !ok { + t.Fatal("expected fresh entry from auto-register") + } + if entry.HasNeedsRebuild || entry.HasPublishHealthy || entry.HasHeartbeatVolumeMode || entry.HasHeartbeatVolumeReason { + t.Fatalf("fresh missing-field heartbeat should not invent explicit truth, entry=%+v", entry) + } + if entry.VolumeMode != "allocated_only" { + t.Fatalf("expected fresh fallback outward mode allocated_only without explicit truth, got %q", entry.VolumeMode) + } +} diff --git a/weed/server/master_grpc_server.go b/weed/server/master_grpc_server.go index c84024446..6d2f01848 100644 --- a/weed/server/master_grpc_server.go +++ b/weed/server/master_grpc_server.go @@ -277,7 +277,16 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ // (BlockVolumeInfos on first heartbeat) or deltas (NewBlockVolumes/DeletedBlockVolumes // on subsequent heartbeats), never both in the same message. if len(heartbeat.BlockVolumeInfos) > 0 || heartbeat.HasNoBlockVolumes { - hbResult := ms.blockRegistry.UpdateFullHeartbeat(dn.Url(), heartbeat.BlockVolumeInfos, heartbeat.BlockNvmeAddr) + blockInventoryAuthoritative := true + if heartbeat.BlockVolumeInventoryAuthoritative != nil { + blockInventoryAuthoritative = heartbeat.GetBlockVolumeInventoryAuthoritative() + } + hbResult := ms.blockRegistry.UpdateFullHeartbeatWithInventoryAuthority( + dn.Url(), + heartbeat.BlockVolumeInfos, + heartbeat.BlockNvmeAddr, + blockInventoryAuthoritative, + ) // CP13-8: If a replica's receiver address changed (e.g., restart with port conflict), // immediately refresh the primary's assignment with the new addresses. for _, ac := range hbResult.AddrChanges { diff --git a/weed/server/volume_grpc_client_to_master.go b/weed/server/volume_grpc_client_to_master.go index 8454471a2..27bd389a8 100644 --- a/weed/server/volume_grpc_client_to_master.go +++ b/weed/server/volume_grpc_client_to_master.go @@ -349,16 +349,18 @@ func (vs *VolumeServer) doHeartbeatWithRetry(masterAddress pb.ServerAddress, grp return case <-vs.stopChan: var volumeMessages []*master_pb.VolumeInformationMessage + blockInventoryAuthoritative := true emptyBeat := &master_pb.Heartbeat{ - Ip: ip, - Port: port, - PublicUrl: vs.store.PublicUrl, - MaxFileKey: uint64(0), - DataCenter: dataCenter, - Rack: rack, - Volumes: volumeMessages, - HasNoVolumes: len(volumeMessages) == 0, - HasNoBlockVolumes: vs.blockService != nil, + Ip: ip, + Port: port, + PublicUrl: vs.store.PublicUrl, + MaxFileKey: uint64(0), + DataCenter: dataCenter, + Rack: rack, + Volumes: volumeMessages, + HasNoVolumes: len(volumeMessages) == 0, + HasNoBlockVolumes: vs.blockService != nil, + BlockVolumeInventoryAuthoritative: &blockInventoryAuthoritative, } glog.V(1).Infof("volume server %s:%d stops and deletes all volumes", vs.store.Ip, vs.store.Port) if err = stream.Send(emptyBeat); err != nil { @@ -374,13 +376,15 @@ func (vs *VolumeServer) doHeartbeatWithRetry(masterAddress pb.ServerAddress, grp // Uses BlockService.CollectBlockVolumeHeartbeat which includes replication addresses (R1-4). func (vs *VolumeServer) collectBlockVolumeHeartbeat(ip string, port uint32, dc, rack string) *master_pb.Heartbeat { msgs := vs.blockService.CollectBlockVolumeHeartbeat() + blockInventoryAuthoritative := vs.blockService.BlockInventoryAuthoritative() return &master_pb.Heartbeat{ - Ip: ip, - Port: port, - DataCenter: dc, - Rack: rack, - BlockVolumeInfos: blockvol.InfoMessagesToProto(msgs), - HasNoBlockVolumes: len(msgs) == 0, - BlockNvmeAddr: vs.blockService.NvmeListenAddr(), + Ip: ip, + Port: port, + DataCenter: dc, + Rack: rack, + BlockVolumeInfos: blockvol.InfoMessagesToProto(msgs), + HasNoBlockVolumes: len(msgs) == 0, + BlockNvmeAddr: vs.blockService.NvmeListenAddr(), + BlockVolumeInventoryAuthoritative: &blockInventoryAuthoritative, } } diff --git a/weed/server/volume_server_block.go b/weed/server/volume_server_block.go index ceb97bbdf..98a03ed2b 100644 --- a/weed/server/volume_server_block.go +++ b/weed/server/volume_server_block.go @@ -97,6 +97,10 @@ type BlockService struct { // routable host:port. This is the -ip value (IP or resolvable hostname), // never an opaque server identity from -id. advertisedHost string + // blockInventoryAuthoritative reports whether the in-memory block inventory is + // authoritative enough to drive master-side stale cleanup from a full + // heartbeat. It becomes false when startup inventory scan fails. + blockInventoryAuthoritative bool // TestHook: if set, invoked when the legacy direct rebuild starter is used. onLegacyStartRebuild func(path, rebuildAddr string, epoch uint64) @@ -112,6 +116,15 @@ func (bs *BlockService) V2Core() *engine.CoreEngine { return bs.v2Core } +// BlockInventoryAuthoritative reports whether the current block inventory can be +// treated as authoritative for full-heartbeat stale cleanup. +func (bs *BlockService) BlockInventoryAuthoritative() bool { + if bs == nil { + return false + } + return bs.blockInventoryAuthoritative +} + // CoreProjection returns the latest adapter-cached projection emitted by the // explicit V2 core on the narrow live path. func (bs *BlockService) CoreProjection(path string) (engine.PublicationProjection, bool) { @@ -210,17 +223,18 @@ func StartBlockService(listenAddr, blockDir, iqnPrefix, portalAddr string, nvmeC } bs := &BlockService{ - blockStore: storage.NewBlockVolumeStore(), - iqnPrefix: iqnPrefix, - nqnPrefix: nqnPrefix, - blockDir: blockDir, - listenAddr: listenAddr, - nvmeListenAddr: nvmeCfg.ListenAddr, - v2Bridge: v2bridge.NewControlBridge(), - v2Orchestrator: engine.NewRecoveryOrchestrator(), - v2Core: engine.NewCoreEngine(), - localServerID: listenAddr, // INTERIM: transport-shaped, see field doc - coreProj: make(map[string]engine.PublicationProjection), + blockStore: storage.NewBlockVolumeStore(), + iqnPrefix: iqnPrefix, + nqnPrefix: nqnPrefix, + blockDir: blockDir, + listenAddr: listenAddr, + nvmeListenAddr: nvmeCfg.ListenAddr, + v2Bridge: v2bridge.NewControlBridge(), + v2Orchestrator: engine.NewRecoveryOrchestrator(), + v2Core: engine.NewCoreEngine(), + localServerID: listenAddr, // INTERIM: transport-shaped, see field doc + coreProj: make(map[string]engine.PublicationProjection), + blockInventoryAuthoritative: false, } bs.v2Recovery = NewRecoveryManager(bs) @@ -290,6 +304,7 @@ func StartBlockService(listenAddr, blockDir, iqnPrefix, portalAddr string, nvmeC name := strings.TrimSuffix(entry.Name(), ".blk") bs.registerVolume(vol, name) } + bs.blockInventoryAuthoritative = true // Start iSCSI target in background. go func() { diff --git a/weed/server/volume_server_test.go b/weed/server/volume_server_test.go index ac1ad774e..7105a596b 100644 --- a/weed/server/volume_server_test.go +++ b/weed/server/volume_server_test.go @@ -1,6 +1,7 @@ package weed_server import ( + "path/filepath" "testing" "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb" @@ -67,3 +68,47 @@ func TestMaintenanceMode(t *testing.T) { }) } } + +func TestCollectBlockVolumeHeartbeat_IncludesInventoryAuthority(t *testing.T) { + vs := &VolumeServer{ + store: &storage.Store{}, + blockService: &BlockService{ + blockStore: storage.NewBlockVolumeStore(), + blockInventoryAuthoritative: false, + }, + } + + hb := vs.collectBlockVolumeHeartbeat("127.0.0.1", 18080, "dc1", "rack1") + if hb.BlockVolumeInventoryAuthoritative == nil { + t.Fatal("expected block inventory authority bit to be present") + } + if hb.GetBlockVolumeInventoryAuthoritative() { + t.Fatal("expected non-authoritative block inventory bit on test heartbeat") + } + if !hb.HasNoBlockVolumes { + t.Fatal("expected empty heartbeat to report has_no_block_volumes") + } +} + +func TestStartBlockService_ScanFailureEmitsNonAuthoritativeInventory(t *testing.T) { + missingDir := filepath.Join(t.TempDir(), "missing-block-dir") + bs := StartBlockService("127.0.0.1:3260", missingDir, "", "", NVMeConfig{}) + if bs == nil { + t.Fatal("expected block service even when startup scan fails") + } + if bs.BlockInventoryAuthoritative() { + t.Fatal("startup scan failure should leave block inventory non-authoritative") + } + + vs := &VolumeServer{ + store: &storage.Store{}, + blockService: bs, + } + hb := vs.collectBlockVolumeHeartbeat("127.0.0.1", 18080, "dc1", "rack1") + if hb.BlockVolumeInventoryAuthoritative == nil { + t.Fatal("expected inventory authority bit on startup-scan-failure heartbeat") + } + if hb.GetBlockVolumeInventoryAuthoritative() { + t.Fatal("startup-scan-failure heartbeat should be non-authoritative") + } +}