From 1ca67f059011129e5c46114c2aa9da29a9a68436 Mon Sep 17 00:00:00 2001 From: Catherine Date: Thu, 26 Mar 2026 14:52:11 +0000 Subject: [PATCH] Add a configurable limit on concurrent blob uploads. Otherwise uploading a site with over 50,000 files will fail with the default Go runtime configuration. --- conf/config.default.toml | 1 + src/config.go | 2 ++ src/main.go | 7 +++++++ src/manifest.go | 8 ++++++++ 4 files changed, 18 insertions(+) diff --git a/conf/config.default.toml b/conf/config.default.toml index b3a4c68..d98a489 100644 --- a/conf/config.default.toml +++ b/conf/config.default.toml @@ -20,6 +20,7 @@ max-inline-file-size = '256B' git-large-object-threshold = '1MB' max-symlink-depth = 16 update-timeout = '1m0s' +concurrent-uploads = 1024 max-heap-size-ratio = 0.5 forbidden-domains = [] allowed-repository-url-prefixes = [] diff --git a/src/config.go b/src/config.go index 586a968..fe1af2d 100644 --- a/src/config.go +++ b/src/config.go @@ -134,6 +134,8 @@ type LimitsConfig struct { // Maximum time that an update operation (PUT or POST request) could take before being // interrupted. UpdateTimeout Duration `toml:"update-timeout" default:"60s"` + // Maximum number of concurrent blob uploads, globally across every update request. + ConcurrentUploads uint `toml:"concurrent-uploads" default:"1024"` // Soft limit on Go heap size, expressed as a fraction of total available RAM. MaxHeapSizeRatio float64 `toml:"max-heap-size-ratio" default:"0.5"` // List of domains unconditionally forbidden for uploads. diff --git a/src/main.go b/src/main.go index aef5932..ceafaa9 100644 --- a/src/main.go +++ b/src/main.go @@ -61,6 +61,12 @@ func configureMemLimit(ctx context.Context) (err error) { return } +// Can only be safely called during initial configuration. +func configureConcurrency(_ context.Context) (err error) { + blobUploadSemaphore = make(chan struct{}, config.Limits.ConcurrentUploads) + return +} + func configureWildcards(_ context.Context) (err error) { newWildcards, err := TranslateWildcards(config.Wildcard) if err != nil { @@ -284,6 +290,7 @@ func Main() { if err = errors.Join( configureFeatures(ctx), configureMemLimit(ctx), + configureConcurrency(ctx), configureWildcards(ctx), configureFallback(ctx), configureAudit(ctx), diff --git a/src/manifest.go b/src/manifest.go index e91bf83..2ed8313 100644 --- a/src/manifest.go +++ b/src/manifest.go @@ -357,6 +357,12 @@ func PrepareManifest(ctx context.Context, manifest *Manifest) error { var ErrSiteTooLarge = errors.New("site too large") var ErrManifestTooLarge = errors.New("manifest too large") +// Limits the number of concurrent uploads, globally across the entire git-pages process. +// As created, there is no limit, but reinitializing the semaphore with a bounded channel +// limits the concurrency to the channel size. Note that the default *configuration* does +// limit the number of uploads. +var blobUploadSemaphore = make(chan struct{}) + // Uploads inline file data over certain size to the storage backend. Returns a copy of // the manifest updated to refer to an external content-addressable store. func StoreManifest( @@ -438,7 +444,9 @@ func StoreManifest( // If the entry in the original manifest is already an external reference, there's no need // to externalize it (and no way for us to do so, since the entry only contains the blob name). if entry.GetType() == Type_ExternalFile && manifest.Contents[name].GetType() == Type_InlineFile { + blobUploadSemaphore <- struct{}{} // acquire (and maybe block) wg.Go(func() { + defer func() { <-blobUploadSemaphore }() // release err := backend.PutBlob(ctx, string(entry.Data), manifest.Contents[name].Data) if err != nil { ch <- fmt.Errorf("put blob %s: %w", name, err)