mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-05-22 09:41:28 +00:00
* fix(mini): shut down admin/s3/webdav/filer before volume/master on Ctrl+C Interrupts fired grace hooks in registration order, so master (started first) shut down before its clients, producing heartbeat-canceled errors and masterClient reconnection noise during weed mini shutdown. Admin/s3/ webdav had no interrupt hooks at all and were killed at os.Exit. - grace: execute interrupt hooks in LIFO (defer-style) order so later- started services tear down first. - filer: consolidate the three separate interrupt hooks (gRPC / HTTP / DB) into one that runs in order, so filer shutdown stays correct independent of FIFO/LIFO semantics. - mini: add MiniClientsShutdownCtx (separate from test-facing MiniClusterCtx) plus an OnMiniClientsShutdown helper. Admin, S3, WebDAV and the maintenance worker observe it; runMini registers a cancel hook after startup so under LIFO it fires first and waits up to 10s on a WaitGroup for those services to drain before filer, volume, and master shut down. Resulting order on Ctrl+C: admin/s3/webdav/worker -> filer (gRPC -> HTTP -> DB) -> volume -> master. * refactor(mini): group mini-client shutdown into one state struct The first pass spread the shutdown plumbing across three globals (MiniClientsShutdownCtx, miniClientsWg, cancelMiniClients) and two ctx-derivation sites (OnMiniClientsShutdown and startMiniAdminWithWorker). Group into a private miniClientsState (ctx/cancel/wg) rebuilt per runMini invocation, and chain its ctx from MiniClusterCtx so clients only observe one signal. Tests that cancel MiniClusterCtx still trigger client shutdown via parent-child propagation. - resetMiniClients() installs fresh state at the top of runMini, so in-process test reruns don't inherit stale ctx/wg. - onMiniClientsShutdown(fn) replaces the exported OnMiniClientsShutdown and only observes one ctx. - trackMiniClient() replaces the manual wg.Add/Done dance for the admin goroutine. - miniClientsCtx() gives the admin startup a ctx without re-deriving. - triggerMiniClientsShutdown(timeout) is the interrupt hook body. No behaviour change; existing tests pass. * refactor: generalize shutdown ctx as an option, not a mini-specific helper Several service files (s3, webdav, filer, master, volume) observed the mini-specific MiniClusterCtx or called onMiniClientsShutdown directly. That leaked mini orchestration into code that also runs under weed s3, weed webdav, weed filer, weed master, and weed volume standalone. Replace with a generic `shutdownCtx context.Context` field on each service's Options struct. When non-nil, the server watches it and shuts down gracefully; when nil (standalone), the shutdown path is a no-op. Mini wires the contexts up from a single place (runMini): - miniMasterOptions/miniOptions.v/miniFilerOptions.shutdownCtx = MiniClusterCtx (drives test-triggered teardown) - miniS3Options/miniWebDavOptions.shutdownCtx = miniClientsCtx() (drives Ctrl+C teardown before filer/volume/master) All knowledge of MiniClusterCtx now lives in mini.go. * fix(mini): stop worker before clients ctx so admin shutdown isn't blocked Symptom on Ctrl+C of a clean weed mini: mini's Shutting down admin/s3/ webdav hook sat for 10s then logged "timed out". Admin had started its shutdown but was blocked inside StopWorkerGrpcServer's GracefulStop, waiting for the still-connected worker stream. That in turn left filer clients connected and cascaded into filer's own 10s gRPC graceful-stop timeout. Two causes, both fixed: 1. worker.Stop() deadlocked on clean shutdown. It sent ActionStop (which makes managerLoop `break out` and exit), then called getTaskLoad() which sends to the same unbuffered cmd channel — no receiver, hangs forever. Reorder Stop() to snapshot the admin client and drain tasks BEFORE sending ActionStop, and call Disconnect() via the local snapshot afterwards. 2. Worker's taskRequestLoop raced with Disconnect(): RequestTask reads from c.incoming, which Disconnect closes, yielding a nil response and a panic on response.Message. Handle the closed channel explicitly. 3. Mini now has a preCancel phase (beforeMiniClientsShutdown) that runs synchronously BEFORE the clients ctx is cancelled. Register worker shutdown there so admin's worker-gRPC GracefulStop finds the worker already disconnected and returns immediately, instead of waiting on a stream that is about to close anyway. Observed shutdown of a clean mini: admin/s3/webdav down in <10ms; full process exit in ~11s (the remaining 10s is a pre-existing filer gRPC graceful-stop timeout, not cascaded from the clients tier). * feat(mini): cap filer gRPC graceful stop at 1s under weed mini Full weed mini shutdown was ~11s on a clean exit, dominated by the filer's default 10s gRPC GracefulStop timeout while background SubscribeLocalMetadata streams drained. Expose the timeout as a FilerOptions.gracefulStopTimeout field (default 10s for standalone weed filer) and set it to 1s in mini. Clean weed mini shutdown now takes ~2s.
166 lines
5.6 KiB
Go
166 lines
5.6 KiB
Go
package command
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"os/user"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/util/version"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/security"
|
|
weed_server "github.com/seaweedfs/seaweedfs/weed/server"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
var (
|
|
webDavStandaloneOptions WebDavOption
|
|
)
|
|
|
|
type WebDavOption struct {
|
|
filer *string
|
|
ipBind *string
|
|
filerRootPath *string
|
|
port *int
|
|
collection *string
|
|
replication *string
|
|
disk *string
|
|
tlsPrivateKey *string
|
|
tlsCertificate *string
|
|
cacheDir *string
|
|
cacheSizeMB *int64
|
|
maxMB *int
|
|
// shutdownCtx, when non-nil, tells startWebDav to gracefully shut down the
|
|
// HTTP server once the ctx is cancelled. Used by weed mini; nil for
|
|
// standalone weed webdav.
|
|
shutdownCtx context.Context
|
|
}
|
|
|
|
func init() {
|
|
cmdWebDav.Run = runWebDav // break init cycle
|
|
webDavStandaloneOptions.filer = cmdWebDav.Flag.String("filer", "localhost:8888", "filer server address")
|
|
webDavStandaloneOptions.ipBind = cmdWebDav.Flag.String("ip.bind", "", "ip address to bind to. Default listen to all.")
|
|
webDavStandaloneOptions.port = cmdWebDav.Flag.Int("port", 7333, "webdav server http listen port")
|
|
webDavStandaloneOptions.collection = cmdWebDav.Flag.String("collection", "", "collection to create the files")
|
|
webDavStandaloneOptions.replication = cmdWebDav.Flag.String("replication", "", "replication to create the files")
|
|
webDavStandaloneOptions.disk = cmdWebDav.Flag.String("disk", "", "[hdd|ssd|<tag>] hard drive or solid state drive or any tag")
|
|
webDavStandaloneOptions.tlsPrivateKey = cmdWebDav.Flag.String("key.file", "", "path to the TLS private key file")
|
|
webDavStandaloneOptions.tlsCertificate = cmdWebDav.Flag.String("cert.file", "", "path to the TLS certificate file")
|
|
webDavStandaloneOptions.cacheDir = cmdWebDav.Flag.String("cacheDir", os.TempDir(), "local cache directory for file chunks")
|
|
webDavStandaloneOptions.cacheSizeMB = cmdWebDav.Flag.Int64("cacheCapacityMB", 0, "local cache capacity in MB")
|
|
webDavStandaloneOptions.maxMB = cmdWebDav.Flag.Int("maxMB", 4, "split files larger than the limit")
|
|
webDavStandaloneOptions.filerRootPath = cmdWebDav.Flag.String("filer.path", "/", "use this remote path from filer server")
|
|
}
|
|
|
|
var cmdWebDav = &Command{
|
|
UsageLine: "webdav -port=7333 -filer=<ip:port>",
|
|
Short: "start a webdav server that is backed by a filer",
|
|
Long: `start a webdav server that is backed by a filer.
|
|
|
|
`,
|
|
}
|
|
|
|
func runWebDav(cmd *Command, args []string) bool {
|
|
|
|
util.LoadSecurityConfiguration()
|
|
|
|
listenAddress := fmt.Sprintf("%s:%d", *webDavStandaloneOptions.ipBind, *webDavStandaloneOptions.port)
|
|
glog.V(0).Infof("Starting Seaweed WebDav Server %s at %s", version.Version(), listenAddress)
|
|
|
|
return webDavStandaloneOptions.startWebDav()
|
|
|
|
}
|
|
|
|
func (wo *WebDavOption) startWebDav() bool {
|
|
|
|
// detect current user
|
|
uid, gid := uint32(0), uint32(0)
|
|
if u, err := user.Current(); err == nil {
|
|
if parsedId, pe := strconv.ParseUint(u.Uid, 10, 32); pe == nil {
|
|
uid = uint32(parsedId)
|
|
}
|
|
if parsedId, pe := strconv.ParseUint(u.Gid, 10, 32); pe == nil {
|
|
gid = uint32(parsedId)
|
|
}
|
|
}
|
|
|
|
// parse filer grpc address
|
|
filerAddress := pb.ServerAddress(*wo.filer)
|
|
|
|
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
|
|
|
|
var cipher bool
|
|
// connect to filer
|
|
for {
|
|
err := pb.WithGrpcFilerClient(false, 0, filerAddress, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
|
|
resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{})
|
|
if err != nil {
|
|
return fmt.Errorf("get filer %s configuration: %v", filerAddress, err)
|
|
}
|
|
cipher = resp.Cipher
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
glog.V(2).Infof("wait to connect to filer %s grpc address %s", *wo.filer, filerAddress.ToGrpcAddress())
|
|
time.Sleep(time.Second)
|
|
} else {
|
|
glog.V(0).Infof("connected to filer %s grpc address %s", *wo.filer, filerAddress.ToGrpcAddress())
|
|
break
|
|
}
|
|
}
|
|
|
|
ws, webdavServer_err := weed_server.NewWebDavServer(&weed_server.WebDavOption{
|
|
Filer: filerAddress,
|
|
FilerRootPath: *wo.filerRootPath,
|
|
GrpcDialOption: grpcDialOption,
|
|
Collection: *wo.collection,
|
|
Replication: *wo.replication,
|
|
DiskType: *wo.disk,
|
|
Uid: uid,
|
|
Gid: gid,
|
|
Cipher: cipher,
|
|
CacheDir: util.ResolvePath(*wo.cacheDir),
|
|
CacheSizeMB: *wo.cacheSizeMB,
|
|
MaxMB: *wo.maxMB,
|
|
})
|
|
if webdavServer_err != nil {
|
|
glog.Fatalf("WebDav Server startup error: %v", webdavServer_err)
|
|
}
|
|
|
|
httpS := &http.Server{Handler: ws.Handler}
|
|
|
|
listenAddress := fmt.Sprintf("%s:%d", *wo.ipBind, *wo.port)
|
|
webDavListener, err := util.NewListener(listenAddress, time.Duration(10)*time.Second)
|
|
if err != nil {
|
|
glog.Fatalf("WebDav Server listener on %s error: %v", listenAddress, err)
|
|
}
|
|
|
|
if wo.shutdownCtx != nil {
|
|
go func() {
|
|
<-wo.shutdownCtx.Done()
|
|
httpS.Shutdown(context.Background())
|
|
}()
|
|
}
|
|
|
|
if *wo.tlsPrivateKey != "" {
|
|
glog.V(0).Infof("Start Seaweed WebDav Server %s at https %s", version.Version(), listenAddress)
|
|
if err = httpS.ServeTLS(webDavListener, *wo.tlsCertificate, *wo.tlsPrivateKey); err != nil && err != http.ErrServerClosed {
|
|
glog.Fatalf("WebDav Server Fail to serve: %v", err)
|
|
}
|
|
} else {
|
|
glog.V(0).Infof("Start Seaweed WebDav Server %s at http %s", version.Version(), listenAddress)
|
|
if err = httpS.Serve(webDavListener); err != nil && err != http.ErrServerClosed {
|
|
glog.Fatalf("WebDav Server Fail to serve: %v", err)
|
|
}
|
|
}
|
|
|
|
return true
|
|
|
|
}
|