diff --git a/conf/config.toml.example b/conf/config.toml.example index 46ddec9..1b44279 100644 --- a/conf/config.toml.example +++ b/conf/config.toml.example @@ -21,3 +21,11 @@ root = "data" # secret-access-key = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" # region = "us-east-1" # bucket = "git-pages-demo" + +# [limits] +# max-site-size = "128M" +# max-manifest-size = "1M" +# max-inline-file-size = "256B" +# git-large-object-threshold = "1M" +# max-symlink-depth = 16 +# update-timeout = "60s" diff --git a/flake.nix b/flake.nix index 1f696c0..c0bd0b9 100644 --- a/flake.nix +++ b/flake.nix @@ -42,7 +42,7 @@ "-s -w" ]; - vendorHash = "sha256-4S4ccnyBuYMFRrFHAxy5N1JeNj9n43xO7+wg5hlCdL0="; + vendorHash = "sha256-RYtQ0+pPzfYeFjPxlJrnSPvceHcG1kyaWu9BFrxGoB4="; fixupPhase = '' # Apparently `go install` doesn't support renaming the binary, so country girls make do. diff --git a/go.mod b/go.mod index 045ed20..f710a5c 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,8 @@ go 1.25.0 require ( github.com/KimMachineGun/automemlimit v0.7.4 + github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 + github.com/creasty/defaults v1.8.0 github.com/go-git/go-billy/v6 v6.0.0-20250627091229-31e2a16eef30 github.com/go-git/go-git/v6 v6.0.0-20250910120214-3a68d0404116 github.com/honeybadger-io/honeybadger-go v0.8.0 diff --git a/go.sum b/go.sum index 036f8c2..1b7b410 100644 --- a/go.sum +++ b/go.sum @@ -10,8 +10,12 @@ github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFI github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= +github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 h1:6lhrsTEnloDPXyeZBvSYvQf8u86jbKehZPVDDlkgDl4= +github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0= github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs= +github.com/creasty/defaults v1.8.0 h1:z27FJxCAa0JKt3utc0sCImAEb+spPucmKoOdLHvHYKk= +github.com/creasty/defaults v1.8.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM= github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s= github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/src/config.go b/src/config.go index a9e61dd..892a250 100644 --- a/src/config.go +++ b/src/config.go @@ -2,7 +2,10 @@ package main import ( "os" + "time" + "github.com/c2h5oh/datasize" + "github.com/creasty/defaults" "github.com/pelletier/go-toml/v2" ) @@ -40,6 +43,25 @@ type Config struct { SiteCache CacheConfig `toml:"site-cache"` } } `toml:"backend"` + Limits struct { + // Maximum size of a single published site. Also used to limit the size of archive + // uploads and other similar overconsumption conditions. + MaxSiteSize datasize.ByteSize `toml:"max-site-size" default:"128M"` + // Maximum size of a single site manifest, computed over its binary Protobuf + // serialization. + MaxManifestSize datasize.ByteSize `toml:"max-manifest-size" default:"1M"` + // Maximum size of a file that will still be inlined into the site manifest. + MaxInlineFileSize datasize.ByteSize `toml:"max-inline-file-size" default:"256B"` + // Maximum size of a Git object that will be cached in memory during Git operations. + GitLargeObjectThreshold datasize.ByteSize `toml:"git-large-object-threshold" default:"1M"` + // Maximum number of symbolic link traversals before the path is considered unreachable. + MaxSymlinkDepth uint `toml:"max-symlink-depth" default:"16"` + // Maximum time that an update operation (PUT or POST request) could take before being + // interrupted. + UpdateTimeout time.Duration `toml:"update-timeout" default:"60s"` + // Soft limit on Go heap size, expressed as a fraction of total available RAM. + MaxHeapSizeRatio float64 `toml:"max-heap-size-ratio" default:"0.5"` + } `toml:"limits"` } var config Config @@ -53,7 +75,13 @@ func ReadConfig(path string) error { decoder := toml.NewDecoder(file) decoder.DisallowUnknownFields() - return decoder.Decode(&config) + if err := decoder.Decode(&config); err != nil { + return err + } + + defaults.MustSet(&config) + + return nil } func updateFromEnv(dest *string, key string) { diff --git a/src/extract.go b/src/extract.go index dd9a334..ad4f1bb 100644 --- a/src/extract.go +++ b/src/extract.go @@ -10,12 +10,18 @@ import ( "io" "strings" + "github.com/c2h5oh/datasize" "github.com/klauspost/compress/zstd" "google.golang.org/protobuf/proto" ) +var ErrArchiveTooLarge = errors.New("archive too large") + func ExtractTar(reader io.Reader) (*Manifest, error) { - archive := tar.NewReader(reader) + boundedReader := ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()), + fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR())) + + archive := tar.NewReader(boundedReader) manifest := Manifest{ Contents: map[string]*Entry{ @@ -46,7 +52,7 @@ func ExtractTar(reader io.Reader) (*Manifest, error) { case tar.TypeReg: fileData, err := io.ReadAll(archive) if err != nil { - return nil, fmt.Errorf("tar: read %s: %w", fileName, err) + return nil, fmt.Errorf("tar: %s: %w", fileName, err) } manifestEntry.Type = Type_InlineFile.Enum() @@ -78,6 +84,7 @@ func ExtractTarGzip(reader io.Reader) (*Manifest, error) { } defer stream.Close() + // stream length is limited in `ExtractTar` return ExtractTar(stream) } @@ -88,11 +95,10 @@ func ExtractTarZstd(reader io.Reader) (*Manifest, error) { } defer stream.Close() + // stream length is limited in `ExtractTar` return ExtractTar(stream) } -var errZipBomb = errors.New("zip file size limit exceeded") - func ExtractZip(reader io.Reader) (*Manifest, error) { data, err := io.ReadAll(reader) if err != nil { @@ -109,8 +115,12 @@ func ExtractZip(reader io.Reader) (*Manifest, error) { for _, file := range archive.File { totalSize += file.UncompressedSize64 } - if totalSize > SiteSizeMax { - return nil, fmt.Errorf("%w: %d > %d bytes", errZipBomb, totalSize, SiteSizeMax) + if totalSize > config.Limits.MaxSiteSize.Bytes() { + return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit", + ErrArchiveTooLarge, + datasize.ByteSize(totalSize).HR(), + config.Limits.MaxSiteSize.HR(), + ) } manifest := Manifest{ @@ -129,7 +139,7 @@ func ExtractZip(reader io.Reader) (*Manifest, error) { fileData, err := io.ReadAll(fileReader) if err != nil { - return nil, fmt.Errorf("zip: read %s: %w", file.Name, err) + return nil, fmt.Errorf("zip: %s: %w", file.Name, err) } manifestEntry.Type = Type_InlineFile.Enum() diff --git a/src/fetch.go b/src/fetch.go index 98249e9..f89ecb8 100644 --- a/src/fetch.go +++ b/src/fetch.go @@ -16,8 +16,6 @@ import ( "google.golang.org/protobuf/proto" ) -const largeObjectThreshold int64 = 1048576 - func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manifest, error) { baseDir, err := os.MkdirTemp("", "fetchRepo") if err != nil { @@ -29,7 +27,7 @@ func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manif cache := cache.NewObjectLRUDefault() storer := filesystem.NewStorageWithOptions(fs, cache, filesystem.Options{ ExclusiveAccess: true, - LargeObjectThreshold: largeObjectThreshold, + LargeObjectThreshold: int64(config.Limits.GitLargeObjectThreshold.Bytes()), }) repo, err := git.CloneContext(ctx, storer, nil, &git.CloneOptions{ Bare: true, diff --git a/src/main.go b/src/main.go index 13ec16c..274f71b 100644 --- a/src/main.go +++ b/src/main.go @@ -103,7 +103,7 @@ func main() { memlimit.FromSystem, ), ), - memlimit.WithRatio(0.9), + memlimit.WithRatio(float64(config.Limits.MaxHeapSizeRatio)), ) if *getManifest != "" { diff --git a/src/manifest.go b/src/manifest.go index b85a8ba..c06428f 100644 --- a/src/manifest.go +++ b/src/manifest.go @@ -12,6 +12,7 @@ import ( "strings" "sync" + "github.com/c2h5oh/datasize" "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/proto" ) @@ -91,14 +92,12 @@ func ManifestDebugJSON(manifest *Manifest) string { return string(result) } -const maxSymlinkLevels int = 128 - -var errSymlinkLoop = errors.New("symbolic link loop") +var ErrSymlinkLoop = errors.New("symbolic link loop") func ExpandSymlinks(manifest *Manifest, inPath string) (string, error) { - var levels int + var levels uint again: - for levels = 0; levels < maxSymlinkLevels; levels += 1 { + for levels = 0; levels < config.Limits.MaxSymlinkDepth; levels += 1 { parts := strings.Split(inPath, "/") for i := 1; i <= len(parts); i++ { linkPath := path.Join(parts[:i]...) @@ -114,10 +113,10 @@ again: } break } - if levels < maxSymlinkLevels { + if levels < config.Limits.MaxSymlinkDepth { return inPath, nil } else { - return "", errSymlinkLoop + return "", ErrSymlinkLoop } } @@ -135,8 +134,6 @@ func PrepareManifest(manifest *Manifest) error { return nil } -const ExternalSizeMin uint32 = 256 - // Replaces inline file data over certain size with references to an external content-addressable // store, without performing any I/O. Returns an updated copy of the manifest. func ExternalizeFiles(manifest *Manifest) *Manifest { @@ -150,7 +147,9 @@ func ExternalizeFiles(manifest *Manifest) *Manifest { } var totalSize uint32 for name, entry := range manifest.Contents { - if entry.GetType() == Type_InlineFile && entry.GetSize() > ExternalSizeMin { + canBeInlined := entry.GetType() == Type_InlineFile && + entry.GetSize() > uint32(config.Limits.MaxInlineFileSize.Bytes()) + if canBeInlined { newManifest.Contents[name] = &Entry{ Type: Type_ExternalFile.Enum(), Size: entry.Size, @@ -165,18 +164,19 @@ func ExternalizeFiles(manifest *Manifest) *Manifest { return &newManifest } -const ManifestSizeMax int = 1048576 - -var errManifestTooLarge = errors.New("manifest size limit exceeded") +var ErrManifestTooLarge = errors.New("manifest too large") // Uploads inline file data over certain size to the storage backend. Returns a copy of // the manifest updated to refer to an external content-addressable store. func StoreManifest(name string, manifest *Manifest) (*Manifest, error) { extManifest := ExternalizeFiles(manifest) extManifestData := EncodeManifest(extManifest) - if len(extManifestData) > ManifestSizeMax { - return nil, fmt.Errorf("%w: %d > %d bytes", - errManifestTooLarge, extManifestData, ManifestSizeMax) + if uint64(len(extManifestData)) > config.Limits.MaxManifestSize.Bytes() { + return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit", + ErrManifestTooLarge, + datasize.ByteSize(len(extManifestData)).HR(), + config.Limits.MaxManifestSize, + ) } if err := backend.StageManifest(extManifest); err != nil { diff --git a/src/pages.go b/src/pages.go index 5ba8b07..69fb830 100644 --- a/src/pages.go +++ b/src/pages.go @@ -17,7 +17,6 @@ import ( ) const notFoundPage = "404.html" -const updateTimeout = 60 * time.Second func makeWebRoot(host string, projectName string) string { return fmt.Sprintf("%s/%s", strings.ToLower(host), projectName) @@ -171,8 +170,6 @@ func getPage(w http.ResponseWriter, r *http.Request) error { return nil } -const SiteSizeMax = 512 * 1048576 - func putPage(w http.ResponseWriter, r *http.Request) error { var result UpdateResult @@ -219,7 +216,7 @@ func putPage(w http.ResponseWriter, r *http.Request) error { return err } - ctx, cancel := context.WithTimeout(r.Context(), updateTimeout) + ctx, cancel := context.WithTimeout(r.Context(), config.Limits.UpdateTimeout) defer cancel() result = UpdateFromRepository(ctx, webRoot, repoURL, branch) } else { @@ -229,17 +226,17 @@ func putPage(w http.ResponseWriter, r *http.Request) error { } // request body contains archive - reader := http.MaxBytesReader(w, r.Body, SiteSizeMax) + reader := http.MaxBytesReader(w, r.Body, int64(config.Limits.MaxSiteSize.Bytes())) result = UpdateFromArchive(webRoot, contentType, reader) } switch result.outcome { case UpdateError: - if errors.Is(result.err, errManifestTooLarge) { + if errors.Is(result.err, ErrManifestTooLarge) { w.WriteHeader(http.StatusRequestEntityTooLarge) } else if errors.Is(result.err, errArchiveFormat) { w.WriteHeader(http.StatusUnsupportedMediaType) - } else if errors.Is(result.err, errZipBomb) { + } else if errors.Is(result.err, ErrArchiveTooLarge) { w.WriteHeader(http.StatusRequestEntityTooLarge) } else { w.WriteHeader(http.StatusServiceUnavailable) @@ -372,7 +369,7 @@ func postPage(w http.ResponseWriter, r *http.Request) error { return err } - ctx, cancel := context.WithTimeout(r.Context(), updateTimeout) + ctx, cancel := context.WithTimeout(r.Context(), config.Limits.UpdateTimeout) defer cancel() result := UpdateFromRepository(ctx, webRoot, repoURL, "pages") switch result.outcome { diff --git a/src/util.go b/src/util.go new file mode 100644 index 0000000..4d92a1c --- /dev/null +++ b/src/util.go @@ -0,0 +1,25 @@ +package main + +import "io" + +type BoundedReader struct { + inner io.Reader + fuel int64 + err error +} + +func ReadAtMost(reader io.Reader, count int64, err error) io.Reader { + return &BoundedReader{reader, count, err} +} + +func (reader *BoundedReader) Read(dest []byte) (count int, err error) { + if reader.fuel <= 0 { + return 0, reader.err + } + if int64(len(dest)) > reader.fuel { + dest = dest[0:reader.fuel] + } + count, err = reader.inner.Read(dest) + reader.fuel -= int64(count) + return +}