mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-05-14 05:41:29 +00:00
Ping previously dialled whatever host:port the caller asked for. Gate each server's Ping handler on cluster membership: masters check the topology, registered cluster nodes, and configured master peers; volume servers only accept their seed/current masters; filers accept tracked peer filers, the master-learned volume server set, and configured masters. Use address-indexed peer lookups to keep Ping target validation O(1): - topology maintains a pb.ServerAddress -> *DataNode index alongside the dc/rack/node tree, kept in sync from doLinkChildNode and UnlinkChildNode plus the ip/port-rewrite branch in GetOrCreateDataNode. GetTopology now returns nil on a detached subtree instead of panicking, so the linkage hooks can no-op safely. - vid_map tracks a refcount per volume-server address so hasVolumeServer answers without scanning every vid location. The add path skips empty-address entries the same way the delete path already does, so a zero-value Location cannot leak a permanent serverRefCount[""] bucket. - masters reuse a cached master-address set from MasterClient instead of walking the configured peer slice on every request. - volume servers compare against a pre-built seed-master set and protect currentMaster reads/writes with an RWMutex, fixing the data race with the heartbeat goroutine. The seed slice is copied on construction so external mutation cannot desync it from the frozen lookup set. - cluster.check drops the direct volume-to-volume sweep; volume servers no longer carry a peer-volume list, and the note next to the dropped probe is reworded to make clear that direct volume-to-volume reachability is intentionally not validated by this command. Update the volume-server integration tests that drove Ping through the new admission gate: success-path coverage now targets the master peer (the only type a volume server tracks), and the unknown/unreachable path asserts the InvalidArgument the gate now returns instead of the old downstream dial error. Mirror the same admission gate in the Rust volume server crate: a seed-master HashSet built once at startup plus a tokio RwLock over the heartbeat-tracked current master, both consulted in is_known_ping_target on every Ping, with InvalidArgument returned for any target that isn't a recognised master.
232 lines
7.0 KiB
Go
232 lines
7.0 KiB
Go
package weed_server
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math/rand/v2"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/raft"
|
|
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/stats"
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/status"
|
|
)
|
|
|
|
/*
|
|
How exclusive lock works?
|
|
-----------
|
|
|
|
Shell
|
|
------
|
|
When shell lock,
|
|
* lease an admin token (lockTime, token)
|
|
* start a goroutine to renew the admin token periodically
|
|
|
|
When shell unlock
|
|
* stop the renewal goroutine
|
|
* sends a release lock request
|
|
|
|
Master
|
|
------
|
|
Master maintains:
|
|
* randomNumber
|
|
* lastLockTime
|
|
When master receives the lease/renew request from shell
|
|
If lastLockTime still fresh {
|
|
if is a renew and token is valid {
|
|
// for renew
|
|
generate the randomNumber => token
|
|
return
|
|
}
|
|
refuse
|
|
return
|
|
} else {
|
|
// for fresh lease request
|
|
generate the randomNumber => token
|
|
return
|
|
}
|
|
|
|
When master receives the release lock request from shell
|
|
set the lastLockTime to zero
|
|
|
|
|
|
The volume server does not need to verify.
|
|
This makes the lock/unlock optional, similar to what golang code usually does.
|
|
|
|
*/
|
|
|
|
const (
|
|
LockDuration = 10 * time.Second
|
|
)
|
|
|
|
type AdminLock struct {
|
|
accessSecret int64
|
|
accessLockTime time.Time
|
|
lastClient string
|
|
lastMessage string
|
|
}
|
|
|
|
type AdminLocks struct {
|
|
locks map[string]*AdminLock
|
|
sync.RWMutex
|
|
}
|
|
|
|
func NewAdminLocks() *AdminLocks {
|
|
return &AdminLocks{
|
|
locks: make(map[string]*AdminLock),
|
|
}
|
|
}
|
|
|
|
func (locks *AdminLocks) isLocked(lockName string) (clientName string, message string, isLocked bool) {
|
|
locks.RLock()
|
|
defer locks.RUnlock()
|
|
adminLock, found := locks.locks[lockName]
|
|
if !found {
|
|
return "", "", false
|
|
}
|
|
glog.V(4).Infof("isLocked %v: %v", adminLock.lastClient, adminLock.lastMessage)
|
|
return adminLock.lastClient, adminLock.lastMessage, adminLock.accessLockTime.Add(LockDuration).After(time.Now())
|
|
}
|
|
|
|
func (locks *AdminLocks) isValidToken(lockName string, ts time.Time, token int64) bool {
|
|
locks.RLock()
|
|
defer locks.RUnlock()
|
|
adminLock, found := locks.locks[lockName]
|
|
if !found {
|
|
return false
|
|
}
|
|
return adminLock.accessLockTime.Equal(ts) && adminLock.accessSecret == token
|
|
}
|
|
|
|
func (locks *AdminLocks) generateToken(lockName string, clientName string) (ts time.Time, token int64) {
|
|
locks.Lock()
|
|
defer locks.Unlock()
|
|
if existing, found := locks.locks[lockName]; found && existing.lastClient != clientName {
|
|
stats.MasterAdminLock.DeleteLabelValues(existing.lastClient)
|
|
}
|
|
lock := &AdminLock{
|
|
accessSecret: rand.Int64(),
|
|
accessLockTime: time.Now(),
|
|
lastClient: clientName,
|
|
}
|
|
locks.locks[lockName] = lock
|
|
stats.MasterAdminLock.WithLabelValues(clientName).Set(1)
|
|
return lock.accessLockTime, lock.accessSecret
|
|
}
|
|
|
|
func (locks *AdminLocks) deleteLock(lockName string) {
|
|
locks.Lock()
|
|
stats.MasterAdminLock.DeleteLabelValues(locks.locks[lockName].lastClient)
|
|
defer locks.Unlock()
|
|
delete(locks.locks, lockName)
|
|
}
|
|
|
|
func (ms *MasterServer) LeaseAdminToken(ctx context.Context, req *master_pb.LeaseAdminTokenRequest) (*master_pb.LeaseAdminTokenResponse, error) {
|
|
resp := &master_pb.LeaseAdminTokenResponse{}
|
|
|
|
if !ms.Topo.IsLeader() {
|
|
return resp, raft.NotLeaderError
|
|
}
|
|
|
|
if lastClient, lastMessage, isLocked := ms.adminLocks.isLocked(req.LockName); isLocked {
|
|
glog.V(4).Infof("LeaseAdminToken %v", lastClient)
|
|
if req.PreviousToken != 0 && ms.adminLocks.isValidToken(req.LockName, time.Unix(0, req.PreviousLockTime), req.PreviousToken) {
|
|
// for renew
|
|
ts, token := ms.adminLocks.generateToken(req.LockName, req.ClientName)
|
|
resp.Token, resp.LockTsNs = token, ts.UnixNano()
|
|
return resp, nil
|
|
}
|
|
// refuse since still locked
|
|
return resp, fmt.Errorf("already locked by %v: %v", lastClient, lastMessage)
|
|
}
|
|
// for fresh lease request
|
|
ts, token := ms.adminLocks.generateToken(req.LockName, req.ClientName)
|
|
resp.Token, resp.LockTsNs = token, ts.UnixNano()
|
|
return resp, nil
|
|
}
|
|
|
|
func (ms *MasterServer) ReleaseAdminToken(ctx context.Context, req *master_pb.ReleaseAdminTokenRequest) (*master_pb.ReleaseAdminTokenResponse, error) {
|
|
resp := &master_pb.ReleaseAdminTokenResponse{}
|
|
if ms.adminLocks.isValidToken(req.LockName, time.Unix(0, req.PreviousLockTime), req.PreviousToken) {
|
|
ms.adminLocks.deleteLock(req.LockName)
|
|
}
|
|
return resp, nil
|
|
}
|
|
|
|
// isKnownPingTarget reports whether target is a peer that the master has
|
|
// learned about as part of cluster membership. Restricting Ping to known
|
|
// peers avoids turning the RPC into a generic outbound dialer. The lookups
|
|
// are O(1) so the gate stays cheap on clusters with thousands of nodes.
|
|
func (ms *MasterServer) isKnownPingTarget(ctx context.Context, target string, targetType string) bool {
|
|
addr := pb.ServerAddress(target)
|
|
switch targetType {
|
|
case cluster.FilerType, cluster.BrokerType, cluster.S3Type:
|
|
return ms.Cluster.IsKnownNode(targetType, addr)
|
|
case cluster.VolumeServerType:
|
|
if ms.Topo == nil {
|
|
return false
|
|
}
|
|
return ms.Topo.LookupDataNodeByAddress(addr) != nil
|
|
case cluster.MasterType:
|
|
if ms.option != nil && ms.option.Master.Equals(addr) {
|
|
return true
|
|
}
|
|
if ms.MasterClient != nil {
|
|
_, ok := ms.MasterClient.ListMasterSet()[addr.ToHttpAddress()]
|
|
return ok
|
|
}
|
|
return false
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (ms *MasterServer) Ping(ctx context.Context, req *master_pb.PingRequest) (resp *master_pb.PingResponse, pingErr error) {
|
|
resp = &master_pb.PingResponse{
|
|
StartTimeNs: time.Now().UnixNano(),
|
|
}
|
|
// Empty target is a self-liveness probe and stays unauthenticated.
|
|
if req.Target != "" && !ms.isKnownPingTarget(ctx, req.Target, req.TargetType) {
|
|
resp.StopTimeNs = time.Now().UnixNano()
|
|
return resp, status.Errorf(codes.InvalidArgument, "unknown ping target %s of type %s", req.Target, req.TargetType)
|
|
}
|
|
if req.TargetType == cluster.FilerType {
|
|
pingErr = pb.WithFilerClient(false, 0, pb.ServerAddress(req.Target), ms.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
|
|
pingResp, err := client.Ping(ctx, &filer_pb.PingRequest{})
|
|
if pingResp != nil {
|
|
resp.RemoteTimeNs = pingResp.StartTimeNs
|
|
}
|
|
return err
|
|
})
|
|
}
|
|
if req.TargetType == cluster.VolumeServerType {
|
|
pingErr = pb.WithVolumeServerClient(false, pb.ServerAddress(req.Target), ms.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error {
|
|
pingResp, err := client.Ping(ctx, &volume_server_pb.PingRequest{})
|
|
if pingResp != nil {
|
|
resp.RemoteTimeNs = pingResp.StartTimeNs
|
|
}
|
|
return err
|
|
})
|
|
}
|
|
if req.TargetType == cluster.MasterType {
|
|
pingErr = pb.WithMasterClient(false, pb.ServerAddress(req.Target), ms.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
|
|
pingResp, err := client.Ping(ctx, &master_pb.PingRequest{})
|
|
if pingResp != nil {
|
|
resp.RemoteTimeNs = pingResp.StartTimeNs
|
|
}
|
|
return err
|
|
})
|
|
}
|
|
if pingErr != nil {
|
|
pingErr = fmt.Errorf("ping %s %s: %v", req.TargetType, req.Target, pingErr)
|
|
}
|
|
resp.StopTimeNs = time.Now().UnixNano()
|
|
return
|
|
}
|