Files
seaweedfs/weed/server/volume_server_block_debug.go
pingqiu cf16e53b04 feat: Phase 16M/17 + promote fixes + testrunner updates
Phase 16M: explicit replica readiness on heartbeat seam
- master.proto: optional bool replica_ready = 19 (proto regenerated on M01)
- block_heartbeat_proto.go: write/read ReplicaReady with presence semantics
- master_block_registry.go: replicaReadyObservedFromHeartbeat prefers
  explicit proto field, falls back to address heuristic when absent
- volume_server_block.go: heartbeat emits ReplicaReady from core projection

Phase 17: host effects extraction + stop line
- phase-17-log.md: Batch 10/11 delivery notes

Promote fixes:
- master_block_failover.go: deterministic replica addrs from path hash
- qa_promote_replication_test.go: address-upgrade trigger test
- qa_promote_rejoin_live_test.go: new live rejoin test

Testrunner:
- devops.go: action improvements
- recovery-baseline-failover.yaml, suite-ha-failover.yaml: scenario updates
- cp11b3-manual-promote.yaml: promote scenario alignment
- fresh_volume_write_test.go: new component test

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 11:38:05 -07:00

124 lines
4.8 KiB
Go

package weed_server
import (
"encoding/json"
"net/http"
"time"
engine "github.com/seaweedfs/seaweedfs/sw-block/engine/replication"
"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
)
// ShipperDebugInfo is the real-time shipper state for one replica.
type ShipperDebugInfo struct {
DataAddr string `json:"data_addr"`
State string `json:"state"`
FlushedLSN uint64 `json:"flushed_lsn"`
}
// BlockVolumeDebugInfo is the real-time block volume state.
type BlockVolumeDebugInfo struct {
Path string `json:"path"`
Role string `json:"role"`
Mode string `json:"mode,omitempty"`
Epoch uint64 `json:"epoch"`
HeadLSN uint64 `json:"head_lsn"`
Degraded bool `json:"degraded"`
RoleApplied bool `json:"role_applied"`
ReceiverReady bool `json:"receiver_ready"`
ShipperConfigured bool `json:"shipper_configured"`
ShipperConnected bool `json:"shipper_connected"`
ReplicaEligible bool `json:"replica_eligible"`
PublishHealthy bool `json:"publish_healthy"`
PublicationReason string `json:"publication_reason,omitempty"`
Shippers []ShipperDebugInfo `json:"shippers,omitempty"`
CoreProjection *engine.PublicationProjection `json:"core_projection,omitempty"`
ExecutedCoreCommands []string `json:"executed_core_commands,omitempty"`
ProjectionMismatches []string `json:"projection_mismatches,omitempty"`
Timestamp string `json:"timestamp"`
}
// DebugInfoForVolume returns the current debug surface for one volume. When the
// Phase 15 core projection exists on the live path, this surface prefers the
// core-owned projection truth over adapter-local convenience flags.
func (bs *BlockService) DebugInfoForVolume(path string, vol *blockvol.BlockVol) BlockVolumeDebugInfo {
status := vol.Status()
readiness := bs.ReadinessSnapshot(path)
info := BlockVolumeDebugInfo{
Path: path,
Role: status.Role.String(),
Epoch: status.Epoch,
HeadLSN: status.WALHeadLSN,
Degraded: status.ReplicaDegraded,
RoleApplied: readiness.RoleApplied,
ReceiverReady: readiness.ReceiverReady,
ShipperConfigured: readiness.ShipperConfigured,
ShipperConnected: readiness.ShipperConnected,
ReplicaEligible: readiness.ReplicaEligible,
PublishHealthy: readiness.PublishHealthy,
Timestamp: time.Now().UTC().Format(time.RFC3339Nano),
}
if proj, ok := bs.CoreProjection(path); ok {
info.Role = string(proj.Role)
info.Mode = string(proj.Mode.Name)
info.RoleApplied = proj.Readiness.RoleApplied
info.ReceiverReady = proj.Readiness.ReceiverReady
info.ShipperConfigured = proj.Readiness.ShipperConfigured
info.ShipperConnected = proj.Readiness.ShipperConnected
info.ReplicaEligible = proj.Readiness.ReplicaReady
info.PublishHealthy = proj.Publication.Healthy
info.PublicationReason = proj.Publication.Reason
projCopy := proj
info.CoreProjection = &projCopy
}
if cmds := bs.ExecutedCoreCommands(path); len(cmds) > 0 {
info.ExecutedCoreCommands = cmds
}
if mismatches := bs.CoreProjectionMismatches(path); len(mismatches) > 0 {
info.ProjectionMismatches = mismatches
}
return info
}
// debugBlockShipperHandler returns real-time shipper state for all block volumes.
// Unlike the master's replica_degraded (heartbeat-lagged), this reads directly
// from the shipper's atomic state field — no heartbeat delay.
//
// GET /debug/block/shipper
func (vs *VolumeServer) debugBlockShipperHandler(w http.ResponseWriter, r *http.Request) {
if vs.blockService == nil {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode([]BlockVolumeDebugInfo{})
return
}
store := vs.blockService.Store()
if store == nil {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode([]BlockVolumeDebugInfo{})
return
}
var infos []BlockVolumeDebugInfo
store.IterateBlockVolumes(func(path string, vol *blockvol.BlockVol) {
info := vs.blockService.DebugInfoForVolume(path, vol)
// Get per-shipper state from ShipperGroup if available.
sg := vol.GetShipperGroup()
if sg != nil {
for _, ss := range sg.ShipperStates() {
info.Shippers = append(info.Shippers, ShipperDebugInfo{
DataAddr: ss.DataAddr,
State: ss.State,
FlushedLSN: ss.FlushedLSN,
})
}
}
infos = append(infos, info)
})
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(infos)
}