Replace hardcoded limits with a config file section.

This commit is contained in:
Catherine
2025-09-21 18:32:10 +00:00
parent e59699ab1a
commit 51606aac98
11 changed files with 109 additions and 37 deletions

View File

@@ -21,3 +21,11 @@ root = "data"
# secret-access-key = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG"
# region = "us-east-1"
# bucket = "git-pages-demo"
# [limits]
# max-site-size = "128M"
# max-manifest-size = "1M"
# max-inline-file-size = "256B"
# git-large-object-threshold = "1M"
# max-symlink-depth = 16
# update-timeout = "60s"

View File

@@ -42,7 +42,7 @@
"-s -w"
];
vendorHash = "sha256-4S4ccnyBuYMFRrFHAxy5N1JeNj9n43xO7+wg5hlCdL0=";
vendorHash = "sha256-RYtQ0+pPzfYeFjPxlJrnSPvceHcG1kyaWu9BFrxGoB4=";
fixupPhase = ''
# Apparently `go install` doesn't support renaming the binary, so country girls make do.

2
go.mod
View File

@@ -4,6 +4,8 @@ go 1.25.0
require (
github.com/KimMachineGun/automemlimit v0.7.4
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500
github.com/creasty/defaults v1.8.0
github.com/go-git/go-billy/v6 v6.0.0-20250627091229-31e2a16eef30
github.com/go-git/go-git/v6 v6.0.0-20250910120214-3a68d0404116
github.com/honeybadger-io/honeybadger-go v0.8.0

4
go.sum
View File

@@ -10,8 +10,12 @@ github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFI
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 h1:6lhrsTEnloDPXyeZBvSYvQf8u86jbKehZPVDDlkgDl4=
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M=
github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0=
github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs=
github.com/creasty/defaults v1.8.0 h1:z27FJxCAa0JKt3utc0sCImAEb+spPucmKoOdLHvHYKk=
github.com/creasty/defaults v1.8.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM=
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

View File

@@ -2,7 +2,10 @@ package main
import (
"os"
"time"
"github.com/c2h5oh/datasize"
"github.com/creasty/defaults"
"github.com/pelletier/go-toml/v2"
)
@@ -40,6 +43,25 @@ type Config struct {
SiteCache CacheConfig `toml:"site-cache"`
}
} `toml:"backend"`
Limits struct {
// Maximum size of a single published site. Also used to limit the size of archive
// uploads and other similar overconsumption conditions.
MaxSiteSize datasize.ByteSize `toml:"max-site-size" default:"128M"`
// Maximum size of a single site manifest, computed over its binary Protobuf
// serialization.
MaxManifestSize datasize.ByteSize `toml:"max-manifest-size" default:"1M"`
// Maximum size of a file that will still be inlined into the site manifest.
MaxInlineFileSize datasize.ByteSize `toml:"max-inline-file-size" default:"256B"`
// Maximum size of a Git object that will be cached in memory during Git operations.
GitLargeObjectThreshold datasize.ByteSize `toml:"git-large-object-threshold" default:"1M"`
// Maximum number of symbolic link traversals before the path is considered unreachable.
MaxSymlinkDepth uint `toml:"max-symlink-depth" default:"16"`
// Maximum time that an update operation (PUT or POST request) could take before being
// interrupted.
UpdateTimeout time.Duration `toml:"update-timeout" default:"60s"`
// Soft limit on Go heap size, expressed as a fraction of total available RAM.
MaxHeapSizeRatio float64 `toml:"max-heap-size-ratio" default:"0.5"`
} `toml:"limits"`
}
var config Config
@@ -53,7 +75,13 @@ func ReadConfig(path string) error {
decoder := toml.NewDecoder(file)
decoder.DisallowUnknownFields()
return decoder.Decode(&config)
if err := decoder.Decode(&config); err != nil {
return err
}
defaults.MustSet(&config)
return nil
}
func updateFromEnv(dest *string, key string) {

View File

@@ -10,12 +10,18 @@ import (
"io"
"strings"
"github.com/c2h5oh/datasize"
"github.com/klauspost/compress/zstd"
"google.golang.org/protobuf/proto"
)
var ErrArchiveTooLarge = errors.New("archive too large")
func ExtractTar(reader io.Reader) (*Manifest, error) {
archive := tar.NewReader(reader)
boundedReader := ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()),
fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR()))
archive := tar.NewReader(boundedReader)
manifest := Manifest{
Contents: map[string]*Entry{
@@ -46,7 +52,7 @@ func ExtractTar(reader io.Reader) (*Manifest, error) {
case tar.TypeReg:
fileData, err := io.ReadAll(archive)
if err != nil {
return nil, fmt.Errorf("tar: read %s: %w", fileName, err)
return nil, fmt.Errorf("tar: %s: %w", fileName, err)
}
manifestEntry.Type = Type_InlineFile.Enum()
@@ -78,6 +84,7 @@ func ExtractTarGzip(reader io.Reader) (*Manifest, error) {
}
defer stream.Close()
// stream length is limited in `ExtractTar`
return ExtractTar(stream)
}
@@ -88,11 +95,10 @@ func ExtractTarZstd(reader io.Reader) (*Manifest, error) {
}
defer stream.Close()
// stream length is limited in `ExtractTar`
return ExtractTar(stream)
}
var errZipBomb = errors.New("zip file size limit exceeded")
func ExtractZip(reader io.Reader) (*Manifest, error) {
data, err := io.ReadAll(reader)
if err != nil {
@@ -109,8 +115,12 @@ func ExtractZip(reader io.Reader) (*Manifest, error) {
for _, file := range archive.File {
totalSize += file.UncompressedSize64
}
if totalSize > SiteSizeMax {
return nil, fmt.Errorf("%w: %d > %d bytes", errZipBomb, totalSize, SiteSizeMax)
if totalSize > config.Limits.MaxSiteSize.Bytes() {
return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit",
ErrArchiveTooLarge,
datasize.ByteSize(totalSize).HR(),
config.Limits.MaxSiteSize.HR(),
)
}
manifest := Manifest{
@@ -129,7 +139,7 @@ func ExtractZip(reader io.Reader) (*Manifest, error) {
fileData, err := io.ReadAll(fileReader)
if err != nil {
return nil, fmt.Errorf("zip: read %s: %w", file.Name, err)
return nil, fmt.Errorf("zip: %s: %w", file.Name, err)
}
manifestEntry.Type = Type_InlineFile.Enum()

View File

@@ -16,8 +16,6 @@ import (
"google.golang.org/protobuf/proto"
)
const largeObjectThreshold int64 = 1048576
func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manifest, error) {
baseDir, err := os.MkdirTemp("", "fetchRepo")
if err != nil {
@@ -29,7 +27,7 @@ func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manif
cache := cache.NewObjectLRUDefault()
storer := filesystem.NewStorageWithOptions(fs, cache, filesystem.Options{
ExclusiveAccess: true,
LargeObjectThreshold: largeObjectThreshold,
LargeObjectThreshold: int64(config.Limits.GitLargeObjectThreshold.Bytes()),
})
repo, err := git.CloneContext(ctx, storer, nil, &git.CloneOptions{
Bare: true,

View File

@@ -103,7 +103,7 @@ func main() {
memlimit.FromSystem,
),
),
memlimit.WithRatio(0.9),
memlimit.WithRatio(float64(config.Limits.MaxHeapSizeRatio)),
)
if *getManifest != "" {

View File

@@ -12,6 +12,7 @@ import (
"strings"
"sync"
"github.com/c2h5oh/datasize"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/proto"
)
@@ -91,14 +92,12 @@ func ManifestDebugJSON(manifest *Manifest) string {
return string(result)
}
const maxSymlinkLevels int = 128
var errSymlinkLoop = errors.New("symbolic link loop")
var ErrSymlinkLoop = errors.New("symbolic link loop")
func ExpandSymlinks(manifest *Manifest, inPath string) (string, error) {
var levels int
var levels uint
again:
for levels = 0; levels < maxSymlinkLevels; levels += 1 {
for levels = 0; levels < config.Limits.MaxSymlinkDepth; levels += 1 {
parts := strings.Split(inPath, "/")
for i := 1; i <= len(parts); i++ {
linkPath := path.Join(parts[:i]...)
@@ -114,10 +113,10 @@ again:
}
break
}
if levels < maxSymlinkLevels {
if levels < config.Limits.MaxSymlinkDepth {
return inPath, nil
} else {
return "", errSymlinkLoop
return "", ErrSymlinkLoop
}
}
@@ -135,8 +134,6 @@ func PrepareManifest(manifest *Manifest) error {
return nil
}
const ExternalSizeMin uint32 = 256
// Replaces inline file data over certain size with references to an external content-addressable
// store, without performing any I/O. Returns an updated copy of the manifest.
func ExternalizeFiles(manifest *Manifest) *Manifest {
@@ -150,7 +147,9 @@ func ExternalizeFiles(manifest *Manifest) *Manifest {
}
var totalSize uint32
for name, entry := range manifest.Contents {
if entry.GetType() == Type_InlineFile && entry.GetSize() > ExternalSizeMin {
canBeInlined := entry.GetType() == Type_InlineFile &&
entry.GetSize() > uint32(config.Limits.MaxInlineFileSize.Bytes())
if canBeInlined {
newManifest.Contents[name] = &Entry{
Type: Type_ExternalFile.Enum(),
Size: entry.Size,
@@ -165,18 +164,19 @@ func ExternalizeFiles(manifest *Manifest) *Manifest {
return &newManifest
}
const ManifestSizeMax int = 1048576
var errManifestTooLarge = errors.New("manifest size limit exceeded")
var ErrManifestTooLarge = errors.New("manifest too large")
// Uploads inline file data over certain size to the storage backend. Returns a copy of
// the manifest updated to refer to an external content-addressable store.
func StoreManifest(name string, manifest *Manifest) (*Manifest, error) {
extManifest := ExternalizeFiles(manifest)
extManifestData := EncodeManifest(extManifest)
if len(extManifestData) > ManifestSizeMax {
return nil, fmt.Errorf("%w: %d > %d bytes",
errManifestTooLarge, extManifestData, ManifestSizeMax)
if uint64(len(extManifestData)) > config.Limits.MaxManifestSize.Bytes() {
return nil, fmt.Errorf("%w: decompressed size %s exceeds %s limit",
ErrManifestTooLarge,
datasize.ByteSize(len(extManifestData)).HR(),
config.Limits.MaxManifestSize,
)
}
if err := backend.StageManifest(extManifest); err != nil {

View File

@@ -17,7 +17,6 @@ import (
)
const notFoundPage = "404.html"
const updateTimeout = 60 * time.Second
func makeWebRoot(host string, projectName string) string {
return fmt.Sprintf("%s/%s", strings.ToLower(host), projectName)
@@ -171,8 +170,6 @@ func getPage(w http.ResponseWriter, r *http.Request) error {
return nil
}
const SiteSizeMax = 512 * 1048576
func putPage(w http.ResponseWriter, r *http.Request) error {
var result UpdateResult
@@ -219,7 +216,7 @@ func putPage(w http.ResponseWriter, r *http.Request) error {
return err
}
ctx, cancel := context.WithTimeout(r.Context(), updateTimeout)
ctx, cancel := context.WithTimeout(r.Context(), config.Limits.UpdateTimeout)
defer cancel()
result = UpdateFromRepository(ctx, webRoot, repoURL, branch)
} else {
@@ -229,17 +226,17 @@ func putPage(w http.ResponseWriter, r *http.Request) error {
}
// request body contains archive
reader := http.MaxBytesReader(w, r.Body, SiteSizeMax)
reader := http.MaxBytesReader(w, r.Body, int64(config.Limits.MaxSiteSize.Bytes()))
result = UpdateFromArchive(webRoot, contentType, reader)
}
switch result.outcome {
case UpdateError:
if errors.Is(result.err, errManifestTooLarge) {
if errors.Is(result.err, ErrManifestTooLarge) {
w.WriteHeader(http.StatusRequestEntityTooLarge)
} else if errors.Is(result.err, errArchiveFormat) {
w.WriteHeader(http.StatusUnsupportedMediaType)
} else if errors.Is(result.err, errZipBomb) {
} else if errors.Is(result.err, ErrArchiveTooLarge) {
w.WriteHeader(http.StatusRequestEntityTooLarge)
} else {
w.WriteHeader(http.StatusServiceUnavailable)
@@ -372,7 +369,7 @@ func postPage(w http.ResponseWriter, r *http.Request) error {
return err
}
ctx, cancel := context.WithTimeout(r.Context(), updateTimeout)
ctx, cancel := context.WithTimeout(r.Context(), config.Limits.UpdateTimeout)
defer cancel()
result := UpdateFromRepository(ctx, webRoot, repoURL, "pages")
switch result.outcome {

25
src/util.go Normal file
View File

@@ -0,0 +1,25 @@
package main
import "io"
type BoundedReader struct {
inner io.Reader
fuel int64
err error
}
func ReadAtMost(reader io.Reader, count int64, err error) io.Reader {
return &BoundedReader{reader, count, err}
}
func (reader *BoundedReader) Read(dest []byte) (count int, err error) {
if reader.fuel <= 0 {
return 0, reader.err
}
if int64(len(dest)) > reader.fuel {
dest = dest[0:reader.fuel]
}
count, err = reader.inner.Read(dest)
reader.fuel -= int64(count)
return
}