Files
seaweedfs/weed/shell/command_cluster_check.go
Chris Lu 10cc06333b cluster: restrict Ping RPC to known peers of the requested type (#9445)
Ping previously dialled whatever host:port the caller asked for. Gate
each server's Ping handler on cluster membership: masters check the
topology, registered cluster nodes, and configured master peers; volume
servers only accept their seed/current masters; filers accept tracked
peer filers, the master-learned volume server set, and configured
masters.

Use address-indexed peer lookups to keep Ping target validation O(1):
- topology maintains a pb.ServerAddress -> *DataNode index alongside
  the dc/rack/node tree, kept in sync from doLinkChildNode and
  UnlinkChildNode plus the ip/port-rewrite branch in
  GetOrCreateDataNode. GetTopology now returns nil on a detached
  subtree instead of panicking, so the linkage hooks can no-op safely.
- vid_map tracks a refcount per volume-server address so
  hasVolumeServer answers without scanning every vid location. The
  add path skips empty-address entries the same way the delete path
  already does, so a zero-value Location cannot leak a permanent
  serverRefCount[""] bucket.
- masters reuse a cached master-address set from MasterClient instead
  of walking the configured peer slice on every request.
- volume servers compare against a pre-built seed-master set and
  protect currentMaster reads/writes with an RWMutex, fixing the
  data race with the heartbeat goroutine. The seed slice is copied
  on construction so external mutation cannot desync it from the
  frozen lookup set.
- cluster.check drops the direct volume-to-volume sweep; volume
  servers no longer carry a peer-volume list, and the note next to
  the dropped probe is reworded to make clear that direct
  volume-to-volume reachability is intentionally not validated by
  this command.

Update the volume-server integration tests that drove Ping through the
new admission gate: success-path coverage now targets the master peer
(the only type a volume server tracks), and the unknown/unreachable
path asserts the InvalidArgument the gate now returns instead of the
old downstream dial error.

Mirror the same admission gate in the Rust volume server crate: a
seed-master HashSet built once at startup plus a tokio RwLock over the
heartbeat-tracked current master, both consulted in is_known_ping_target
on every Ping, with InvalidArgument returned for any target that isn't
a recognised master.
2026-05-12 13:00:52 -07:00

250 lines
7.9 KiB
Go

package shell
import (
"context"
"flag"
"fmt"
"io"
"github.com/seaweedfs/seaweedfs/weed/cluster"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
)
func init() {
Commands = append(Commands, &commandClusterCheck{})
}
type commandClusterCheck struct {
}
func (c *commandClusterCheck) Name() string {
return "cluster.check"
}
func (c *commandClusterCheck) Help() string {
return `check current cluster network connectivity
cluster.check
`
}
func (c *commandClusterCheck) HasTag(CommandTag) bool {
return false
}
func (c *commandClusterCheck) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
clusterPsCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
if err = clusterPsCommand.Parse(args); err != nil {
return nil
}
// collect topology information
topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
if err != nil {
return err
}
fmt.Fprintf(writer, "Topology volumeSizeLimit:%d MB%s\n", volumeSizeLimitMb, diskInfosToString(topologyInfo.DiskInfos))
if len(topologyInfo.DiskInfos) == 0 {
return fmt.Errorf("no disk type defined")
}
for diskType, diskInfo := range topologyInfo.DiskInfos {
if diskInfo.MaxVolumeCount == 0 {
return fmt.Errorf("no volume available for \"%s\" disk type", diskType)
}
}
// collect filers
var filers []pb.ServerAddress
err = commandEnv.MasterClient.WithClient(false, func(client master_pb.SeaweedClient) error {
resp, err := client.ListClusterNodes(context.Background(), &master_pb.ListClusterNodesRequest{
ClientType: cluster.FilerType,
FilerGroup: *commandEnv.option.FilerGroup,
})
for _, node := range resp.ClusterNodes {
filers = append(filers, pb.ServerAddress(node.Address))
}
return err
})
if err != nil {
return
}
fmt.Fprintf(writer, "the cluster has %d filers: %+v\n", len(filers), filers)
if len(filers) > 0 {
genericDiskInfo, genericDiskInfoOk := topologyInfo.DiskInfos[""]
hddDiskInfo, hddDiskInfoOk := topologyInfo.DiskInfos[types.HddType]
if !genericDiskInfoOk && !hddDiskInfoOk {
return fmt.Errorf("filer metadata logs need generic or hdd disk type to be defined")
}
if (genericDiskInfoOk && genericDiskInfo.MaxVolumeCount == 0) || (hddDiskInfoOk && hddDiskInfo.MaxVolumeCount == 0) {
return fmt.Errorf("filer metadata logs need generic or hdd volumes to be available")
}
}
// collect volume servers
var volumeServers []pb.ServerAddress
t, _, err := collectTopologyInfo(commandEnv, 0)
if err != nil {
return err
}
for _, dc := range t.DataCenterInfos {
for _, r := range dc.RackInfos {
for _, dn := range r.DataNodeInfos {
volumeServers = append(volumeServers, pb.NewServerAddressFromDataNode(dn))
}
}
}
fmt.Fprintf(writer, "the cluster has %d volume servers: %+v\n", len(volumeServers), volumeServers)
// collect all masters
var masters []pb.ServerAddress
masters = append(masters, commandEnv.MasterClient.GetMasters(context.Background())...)
// check from master to volume servers
for _, master := range masters {
for _, volumeServer := range volumeServers {
fmt.Fprintf(writer, "checking master %s to volume server %s ... ", string(master), string(volumeServer))
err := pb.WithMasterClient(false, master, commandEnv.option.GrpcDialOption, false, func(client master_pb.SeaweedClient) error {
pong, err := client.Ping(context.Background(), &master_pb.PingRequest{
Target: string(volumeServer),
TargetType: cluster.VolumeServerType,
})
if err == nil {
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
}
return err
})
if err != nil {
fmt.Fprintf(writer, "%v\n", err)
}
}
}
// check between masters
for _, sourceMaster := range masters {
for _, targetMaster := range masters {
if sourceMaster == targetMaster {
continue
}
fmt.Fprintf(writer, "checking master %s to %s ... ", string(sourceMaster), string(targetMaster))
err := pb.WithMasterClient(false, sourceMaster, commandEnv.option.GrpcDialOption, false, func(client master_pb.SeaweedClient) error {
pong, err := client.Ping(context.Background(), &master_pb.PingRequest{
Target: string(targetMaster),
TargetType: cluster.MasterType,
})
if err == nil {
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
}
return err
})
if err != nil {
fmt.Fprintf(writer, "%v\n", err)
}
}
}
// check from volume servers to masters
for _, volumeServer := range volumeServers {
for _, master := range masters {
fmt.Fprintf(writer, "checking volume server %s to master %s ... ", string(volumeServer), string(master))
err := pb.WithVolumeServerClient(false, volumeServer, commandEnv.option.GrpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
pong, err := client.Ping(context.Background(), &volume_server_pb.PingRequest{
Target: string(master),
TargetType: cluster.MasterType,
})
if err == nil {
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
}
return err
})
if err != nil {
fmt.Fprintf(writer, "%v\n", err)
}
}
}
// check from filers to masters
for _, filer := range filers {
for _, master := range masters {
fmt.Fprintf(writer, "checking filer %s to master %s ... ", string(filer), string(master))
err := pb.WithFilerClient(false, 0, filer, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
pong, err := client.Ping(context.Background(), &filer_pb.PingRequest{
Target: string(master),
TargetType: cluster.MasterType,
})
if err == nil {
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
}
return err
})
if err != nil {
fmt.Fprintf(writer, "%v\n", err)
}
}
}
// check from filers to volume servers
for _, filer := range filers {
for _, volumeServer := range volumeServers {
fmt.Fprintf(writer, "checking filer %s to volume server %s ... ", string(filer), string(volumeServer))
err := pb.WithFilerClient(false, 0, filer, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
pong, err := client.Ping(context.Background(), &filer_pb.PingRequest{
Target: string(volumeServer),
TargetType: cluster.VolumeServerType,
})
if err == nil {
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
}
return err
})
if err != nil {
fmt.Fprintf(writer, "%v\n", err)
}
}
}
// Direct volume-to-volume connectivity is intentionally not validated
// here. Each volume server now restricts Ping to peers it can identify
// (its configured/current masters), so it does not carry a peer-volume
// list to drive a mesh check from. The master->volume and filer->volume
// probes above do not exercise volume-to-volume reachability.
// check between filers, and need to connect to itself
for _, sourceFiler := range filers {
for _, targetFiler := range filers {
fmt.Fprintf(writer, "checking filer %s to %s ... ", string(sourceFiler), string(targetFiler))
err := pb.WithFilerClient(false, 0, sourceFiler, commandEnv.option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
pong, err := client.Ping(context.Background(), &filer_pb.PingRequest{
Target: string(targetFiler),
TargetType: cluster.FilerType,
})
if err == nil {
printTiming(writer, pong.StartTimeNs, pong.RemoteTimeNs, pong.StopTimeNs)
}
return err
})
if err != nil {
fmt.Fprintf(writer, "%v\n", err)
}
}
}
return nil
}
func printTiming(writer io.Writer, startNs, remoteNs, stopNs int64) {
roundTripTimeMs := float32(stopNs-startNs) / 1000000
deltaTimeMs := float32(remoteNs-(startNs+stopNs)/2) / 1000000
fmt.Fprintf(writer, "ok round trip %.3fms clock delta %.3fms\n", roundTripTimeMs, deltaTimeMs)
}