From eaf561e86c417c14b26316807d84b7dc2feec246 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 14 Apr 2026 09:24:35 -0700 Subject: [PATCH] perf(s3): add optional shared in-memory chunk cache for GET (#9069) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the -s3.cacheCapacityMB flag (default 0, disabled) that attaches an in-memory chunk_cache.ChunkCacheInMemory to the server-wide ReaderCache introduced in the previous commit. When enabled, completed chunks are deposited into the shared cache as they are downloaded, so concurrent and repeat GETs of the same object hit memory instead of re-fetching chunks from volume servers. When 0 (the default) the shared ReaderCache still runs — it just attaches a nil chunk cache, so behaviour matches the previous commit exactly. No behaviour change for clusters that don't opt in. Disk-backed TieredChunkCache was evaluated and rejected: its synchronous SetChunk writes regressed cold reads ~12x on loopback because the chunk fetchers block on local disk I/O that is *slower* than the TCP volume-server fetch it is supposed to accelerate. Memory-only avoids that. Flag registered in all four S3 flag sites (s3.go, server.go, filer.go, mini.go) per the comment on command.S3Options. The chunk size used to convert CacheSizeMB → entry count is encapsulated in the s3ChunkCacheChunkSizeMB constant so it's easy to grep and revisit if the filer default chunk size changes. Measured on weed mini + 1 GiB random object over loopback, single curl on a presigned URL: cacheCapacityMB=0 (off): cold ~2900, warm ~2900 MB/s cacheCapacityMB=4096: cold ~2790, warm ~5050 MB/s (+70%) --- weed/command/filer.go | 1 + weed/command/mini.go | 1 + weed/command/s3.go | 3 +++ weed/command/server.go | 1 + weed/s3api/s3api_server.go | 49 ++++++++++++++++++++++++++++++++++---- 5 files changed, 51 insertions(+), 4 deletions(-) diff --git a/weed/command/filer.go b/weed/command/filer.go index 1fa70dcde..0798417f3 100644 --- a/weed/command/filer.go +++ b/weed/command/filer.go @@ -150,6 +150,7 @@ func init() { filerS3Options.portIceberg = cmdFiler.Flag.Int("s3.port.iceberg", 8181, "Iceberg REST Catalog server listen port (0 to disable)") filerS3Options.externalUrl = cmdFiler.Flag.String("s3.externalUrl", "", "the external URL clients use to connect (e.g. https://api.example.com:9000). Used for S3 signature verification behind a reverse proxy. Falls back to S3_EXTERNAL_URL env var.") filerS3Options.defaultFileMode = cmdFiler.Flag.String("s3.defaultFileMode", "", "default file mode for S3 uploaded objects, e.g. 0660, 0644, 0666") + filerS3Options.cacheSizeMB = cmdFiler.Flag.Int64("s3.cacheCapacityMB", 0, "in-memory chunk cache capacity in MB for S3 GETs shared across requests (0 disables)") // start webdav on filer filerStartWebDav = cmdFiler.Flag.Bool("webdav", false, "whether to start webdav gateway") diff --git a/weed/command/mini.go b/weed/command/mini.go index 35c7adeda..56d8e2934 100644 --- a/weed/command/mini.go +++ b/weed/command/mini.go @@ -251,6 +251,7 @@ func initMiniS3Flags() { miniS3Options.allowDeleteBucketNotEmpty = miniS3AllowDeleteBucketNotEmpty miniS3Options.externalUrl = cmdMini.Flag.String("s3.externalUrl", "", "the external URL clients use to connect (e.g. https://api.example.com:9000). Used for S3 signature verification behind a reverse proxy. Falls back to S3_EXTERNAL_URL env var.") miniS3Options.defaultFileMode = cmdMini.Flag.String("s3.defaultFileMode", "", "default file mode for S3 uploaded objects, e.g. 0660, 0644, 0666") + miniS3Options.cacheSizeMB = cmdMini.Flag.Int64("s3.cacheCapacityMB", 0, "in-memory chunk cache capacity in MB for S3 GETs shared across requests (0 disables)") // In mini mode, S3 uses the shared debug server started at line 681, not its own separate debug server miniS3Options.debug = new(bool) // explicitly false miniS3Options.debugPort = cmdMini.Flag.Int("s3.debug.port", 6060, "http port for debugging (unused in mini mode)") diff --git a/weed/command/s3.go b/weed/command/s3.go index 7c8b72b0e..8c67f0f43 100644 --- a/weed/command/s3.go +++ b/weed/command/s3.go @@ -73,6 +73,7 @@ type S3Options struct { cipher *bool externalUrl *string defaultFileMode *string + cacheSizeMB *int64 } func init() { @@ -109,6 +110,7 @@ func init() { s3StandaloneOptions.cipher = cmdS3.Flag.Bool("encryptVolumeData", false, "encrypt data on volume servers") s3StandaloneOptions.externalUrl = cmdS3.Flag.String("externalUrl", "", "the external URL clients use to connect (e.g. https://api.example.com:9000). Used for S3 signature verification behind a reverse proxy. Falls back to S3_EXTERNAL_URL env var.") s3StandaloneOptions.defaultFileMode = cmdS3.Flag.String("defaultFileMode", "", "default file mode for S3 uploaded objects, e.g. 0660, 0644, 0666") + s3StandaloneOptions.cacheSizeMB = cmdS3.Flag.Int64("cacheCapacityMB", 0, "in-memory chunk cache capacity in MB for S3 GETs shared across requests (0 disables)") } var cmdS3 = &Command{ @@ -343,6 +345,7 @@ func (s3opt *S3Options) startS3Server() bool { GrpcPort: *s3opt.portGrpc, ExternalUrl: s3opt.resolveExternalUrl(), DefaultFileMode: defaultFileMode, + CacheSizeMB: *s3opt.cacheSizeMB, }) if s3ApiServer_err != nil { glog.Fatalf("S3 API Server startup error: %v", s3ApiServer_err) diff --git a/weed/command/server.go b/weed/command/server.go index caa456478..c6e42d9a4 100644 --- a/weed/command/server.go +++ b/weed/command/server.go @@ -181,6 +181,7 @@ func init() { s3Options.cipher = cmdServer.Flag.Bool("s3.encryptVolumeData", false, "encrypt data on volume servers for S3 uploads") s3Options.externalUrl = cmdServer.Flag.String("s3.externalUrl", "", "the external URL clients use to connect (e.g. https://api.example.com:9000). Used for S3 signature verification behind a reverse proxy. Falls back to S3_EXTERNAL_URL env var.") s3Options.defaultFileMode = cmdServer.Flag.String("s3.defaultFileMode", "", "default file mode for S3 uploaded objects, e.g. 0660, 0644, 0666") + s3Options.cacheSizeMB = cmdServer.Flag.Int64("s3.cacheCapacityMB", 0, "in-memory chunk cache capacity in MB for S3 GETs shared across requests (0 disables)") sftpOptions.port = cmdServer.Flag.Int("sftp.port", 2022, "SFTP server listen port") sftpOptions.sshPrivateKey = cmdServer.Flag.String("sftp.sshPrivateKey", "", "path to the SSH private key file for host authentication") diff --git a/weed/s3api/s3api_server.go b/weed/s3api/s3api_server.go index f3a606b92..ba983bf92 100644 --- a/weed/s3api/s3api_server.go +++ b/weed/s3api/s3api_server.go @@ -62,8 +62,16 @@ type S3ApiServerOption struct { GrpcPort int ExternalUrl string // external URL clients use, for signature verification behind a reverse proxy DefaultFileMode uint32 // default file permission mode for S3 uploads (e.g. 0660, 0644) + CacheSizeMB int64 // in-memory chunk cache capacity in MB for the shared ReaderCache; 0 disables } +// s3ChunkCacheChunkSizeMB is the assumed chunk size (in MiB) used to convert +// CacheSizeMB into the entry count the in-memory cache accepts. This matches +// the default -filer.maxMB for all filer/webdav/mini flag sites. It is NOT a +// hard limit — larger chunks still get cached, this just means the byte budget +// is approximate when upload-side chunking is configured larger. +const s3ChunkCacheChunkSizeMB = 4 + type S3ApiServer struct { s3_pb.UnimplementedSeaweedS3IamCacheServer option *S3ApiServerOption @@ -185,13 +193,23 @@ func NewS3ApiServerWithStore(router *mux.Router, option *S3ApiServerOption, expl } }() - // Shared ReaderCache for the S3 GET streaming path. The chunk cache is - // nil for now — all TieredChunkCache receiver methods are nil-safe. A - // follow-up adds an in-memory chunk cache on top. Keeping this shared + // Shared ReaderCache for the S3 GET streaming path. Keeping this shared // (rather than per-request) avoids the per-request Close(), which would // otherwise wait for background chunk downloads that run on // context.Background() even after the client disconnects. // + // The underlying ChunkCache is controlled by option.CacheSizeMB below: + // - CacheSizeMB == 0: a nil *chunk_cache.TieredChunkCache is used (its + // receiver methods are nil-safe). Completed chunks are not deposited + // into a cross-request cache — concurrent readers still share in-flight + // downloads through the ReaderCache's downloaders map, but repeat reads + // refetch from volume servers. + // - CacheSizeMB > 0: a chunk_cache.ChunkCacheInMemory is created and + // wrapped in the ReaderCache, so repeat and concurrent reads hit + // memory. maxEntries is approximated from the byte budget and the + // assumed chunk size (s3ChunkCacheChunkSizeMB), clamped to a small + // floor so tiny caches still function. + // // Downloader slots: each slot holds one in-flight / recently-completed // chunk buffer (~4 MiB by default), so this caps both peak memory for // in-flight chunks (s3ReaderCacheDownloaderLimit × chunkSize) and the @@ -199,7 +217,30 @@ func NewS3ApiServerWithStore(router *mux.Router, option *S3ApiServerOption, expl // because it typically has a handful of clients; S3 serves many // concurrent readers, so we pick a more generous default here. const s3ReaderCacheDownloaderLimit = 256 - readerCache := filer.NewReaderCache(s3ReaderCacheDownloaderLimit, (*chunk_cache.TieredChunkCache)(nil), filerClient.GetLookupFileIdFunction()) + + // Negative CacheSizeMB is a misconfiguration; fail fast rather than + // silently behaving like 0. + if option.CacheSizeMB < 0 { + return nil, fmt.Errorf("invalid -s3.cacheCapacityMB %d: must be >= 0", option.CacheSizeMB) + } + var chunkCache chunk_cache.ChunkCache + if option.CacheSizeMB > 0 { + // ccache sizes entries by count; convert the configured byte budget + // via the assumed chunk size. Clamp to a floor so tiny caches still + // function. + maxEntries := option.CacheSizeMB / s3ChunkCacheChunkSizeMB + if maxEntries < 8 { + maxEntries = 8 + } + chunkCache = chunk_cache.NewChunkCacheInMemory(maxEntries) + // Log the effective capacity after the floor clamp, not the configured + // value — a user passing `-s3.cacheCapacityMB=1` actually gets 8 entries + // ≈ 32 MiB because of the floor. + glog.V(0).Infof("s3 chunk cache enabled: in-memory, ~%dMB (%d chunks of ~%dMB)", maxEntries*s3ChunkCacheChunkSizeMB, maxEntries, s3ChunkCacheChunkSizeMB) + } else { + chunkCache = (*chunk_cache.TieredChunkCache)(nil) + } + readerCache := filer.NewReaderCache(s3ReaderCacheDownloaderLimit, chunkCache, filerClient.GetLookupFileIdFunction()) s3ApiServer = &S3ApiServer{ option: option,