From bbdaae72802c96ea456301f4a1f035d56d92e6c4 Mon Sep 17 00:00:00 2001 From: miyuko Date: Sat, 11 Apr 2026 12:00:20 +0000 Subject: [PATCH] Add a domain cache to quickly reject non-existent domains. --- go.mod | 2 + go.sum | 5 ++ gomod2nix.toml | 6 ++ src/backend.go | 5 +- src/backend_fs.go | 5 ++ src/backend_s3.go | 37 ++++++++++++- src/caddy.go | 12 +++- src/domain_cache.go | 132 ++++++++++++++++++++++++++++++++++++++++++++ src/main.go | 5 ++ src/observe.go | 7 +++ src/pages.go | 13 ++++- src/update.go | 1 + 12 files changed, 225 insertions(+), 5 deletions(-) create mode 100644 src/domain_cache.go diff --git a/go.mod b/go.mod index a2829e3..f269d1e 100644 --- a/go.mod +++ b/go.mod @@ -31,6 +31,8 @@ require ( github.com/Microsoft/go-winio v0.6.2 // indirect github.com/ProtonMail/go-crypto v1.4.1 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/bits-and-blooms/bitset v1.24.2 // indirect + github.com/bits-and-blooms/bloom/v3 v3.7.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudflare/circl v1.6.3 // indirect github.com/cyphar/filepath-securejoin v0.6.1 // indirect diff --git a/go.sum b/go.sum index c133707..23416c6 100644 --- a/go.sum +++ b/go.sum @@ -14,6 +14,10 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bits-and-blooms/bitset v1.24.2 h1:M7/NzVbsytmtfHbumG+K2bremQPMJuqv1JD3vOaFxp0= +github.com/bits-and-blooms/bitset v1.24.2/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bits-and-blooms/bloom/v3 v3.7.1 h1:WXovk4TRKZttAMJfoQx6K2DM0zNIt8w+c67UqO+etV0= +github.com/bits-and-blooms/bloom/v3 v3.7.1/go.mod h1:rZzYLLje2dfzXfAkJNxQQHsKurAyK55KUnL43Euk0hU= github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 h1:6lhrsTEnloDPXyeZBvSYvQf8u86jbKehZPVDDlkgDl4= github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -135,6 +139,7 @@ github.com/tj/assert v0.0.3 h1:Df/BlaZ20mq6kuai7f5z2TvPFiwC3xaWJSDQNiIS3Rk= github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pvk= github.com/tj/go-redirects v0.0.0-20200911105812-fd1ba1020b37 h1:K11tjwz8zTTSZkz4TUjfLN+y8uJWP38BbyPqZ2yB/Yk= github.com/tj/go-redirects v0.0.0-20200911105812-fd1ba1020b37/go.mod h1:E0E2H2gQA+uoi27VCSU+a/BULPtadQA78q3cpTjZbZw= +github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo= diff --git a/gomod2nix.toml b/gomod2nix.toml index 727d9b3..8f1ae95 100644 --- a/gomod2nix.toml +++ b/gomod2nix.toml @@ -19,6 +19,12 @@ schema = 3 [mod."github.com/beorn7/perks"] version = "v1.0.1" hash = "sha256-h75GUqfwJKngCJQVE5Ao5wnO3cfKD9lSIteoLp/3xJ4=" + [mod."github.com/bits-and-blooms/bitset"] + version = "v1.24.2" + hash = "sha256-hT88EpdWmKnqdxApJhs/aIAptf33HmtSp2KXPI+Ym7o=" + [mod."github.com/bits-and-blooms/bloom/v3"] + version = "v3.7.1" + hash = "sha256-KZduCu+k4+xqBcFRTfg8Yc/PEf5jfpjn0I1YoxfnVPo=" [mod."github.com/c2h5oh/datasize"] version = "v0.0.0-20231215233829-aa82cc1e6500" hash = "sha256-8MqL7xCvE6fIjanz2jwkaLP1OE5kLu62TOcQx452DHQ=" diff --git a/src/backend.go b/src/backend.go index afe8405..1df11b3 100644 --- a/src/backend.go +++ b/src/backend.go @@ -138,13 +138,16 @@ type Backend interface { // Create a domain. This allows us to start serving content for the domain. CreateDomain(ctx context.Context, domain string) error - // Freeze a domain. This allows a site to be administratively locked, e.g. if it + // Freeze a domain. This allows a site to be administratively locked, e.g. if it // is discovered serving abusive content. FreezeDomain(ctx context.Context, domain string) error // Thaw a domain. This removes the previously placed administrative lock (if any). UnfreezeDomain(ctx context.Context, domain string) error + // Check whether the set of domains we serve has changed since the time passed to this method. + HaveDomainsChanged(ctx context.Context, since time.Time) (changed bool, err error) + // Append a record to the audit log. AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) error diff --git a/src/backend_fs.go b/src/backend_fs.go index 612d587..d507946 100644 --- a/src/backend_fs.go +++ b/src/backend_fs.go @@ -11,6 +11,7 @@ import ( "os" "path/filepath" "strings" + "time" ) type FSBackend struct { @@ -479,6 +480,10 @@ func (fs *FSBackend) UnfreezeDomain(ctx context.Context, domain string) error { } } +func (fs *FSBackend) HaveDomainsChanged(ctx context.Context, since time.Time) (bool, error) { + return true, nil // not implemented +} + func (fs *FSBackend) AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) error { if _, err := fs.auditRoot.Stat(id.String()); err == nil { panic(fmt.Errorf("audit ID collision: %s", id)) diff --git a/src/backend_s3.go b/src/backend_s3.go index 7c4471a..830025f 100644 --- a/src/backend_s3.go +++ b/src/backend_s3.go @@ -643,8 +643,11 @@ func (s3 *S3Backend) DeleteManifest( err := s3.client.RemoveObject(ctx, s3.bucket, manifestObjectName(name), minio.RemoveObjectOptions{}) + if err != nil { + return err + } s3.siteCache.Cache.Invalidate(name) - return err + return s3.bumpLastDomainUpdateTimestamp(ctx) } func (s3 *S3Backend) EnumerateManifests(ctx context.Context) iter.Seq2[*ManifestMetadata, error] { @@ -764,8 +767,19 @@ func (s3 *S3Backend) CheckDomain(ctx context.Context, domain string) (exists boo func (s3 *S3Backend) CreateDomain(ctx context.Context, domain string) error { logc.Printf(ctx, "s3: create domain %s\n", domain) - _, err := s3.client.PutObject(ctx, s3.bucket, domainCheckObjectName(domain), + exists, err := s3.CheckDomain(ctx, domain) + if err != nil { + return err + } + + _, err = s3.client.PutObject(ctx, s3.bucket, domainCheckObjectName(domain), &bytes.Reader{}, 0, minio.PutObjectOptions{}) + if err != nil { + return err + } + if !exists { + err = s3.bumpLastDomainUpdateTimestamp(ctx) + } return err } @@ -790,6 +804,25 @@ func (s3 *S3Backend) UnfreezeDomain(ctx context.Context, domain string) error { } } +const lastDomainUpdateObjectName = "meta/last-domain-update" + +func (s3 *S3Backend) HaveDomainsChanged(ctx context.Context, since time.Time) (bool, error) { + info, err := s3.client.StatObject(ctx, s3.bucket, lastDomainUpdateObjectName, + minio.GetObjectOptions{}) + if err != nil { + return false, err + } + + return info.LastModified.After(since), nil +} + +func (s3 *S3Backend) bumpLastDomainUpdateTimestamp(ctx context.Context) error { + logc.Print(ctx, "s3: bumping last domain update timestamp") + _, err := s3.client.PutObject(ctx, s3.bucket, lastDomainUpdateObjectName, + &bytes.Reader{}, 0, minio.PutObjectOptions{}) + return err +} + func auditObjectName(id AuditID) string { return fmt.Sprintf("audit/%s", id) } diff --git a/src/caddy.go b/src/caddy.go index 9f39544..2c486c3 100644 --- a/src/caddy.go +++ b/src/caddy.go @@ -26,7 +26,17 @@ func ServeCaddy(w http.ResponseWriter, r *http.Request) { return } - found, err := backend.CheckDomain(r.Context(), strings.ToLower(domain)) + var err error + domain = strings.ToLower(domain) + + // Run a cheap check as to whether we might be serving the domain. + var found = domainCache.CheckDomain(r.Context(), domain) + + if !found { + // Run an expensive check as to whether we are actually serving the domain. + found, err = backend.CheckDomain(r.Context(), domain) + } + if !found { // If we don't serve the domain, but a fallback server does, then we should let our // Caddy instance request a TLS certificate. Otherwise, we'll never have an opportunity diff --git a/src/domain_cache.go b/src/domain_cache.go new file mode 100644 index 0000000..c043942 --- /dev/null +++ b/src/domain_cache.go @@ -0,0 +1,132 @@ +package git_pages + +import ( + "context" + "fmt" + "strings" + "sync" + "time" + + "github.com/bits-and-blooms/bloom/v3" +) + +type DomainCache interface { + // Check if we might be serving the domain. + CheckDomain(ctx context.Context, domain string) (found bool) + + // Add the domain to the cache. + AddDomain(ctx context.Context, domain string) +} + +func CreateDomainCache(ctx context.Context) (DomainCache, error) { + if !config.Feature("domain-existence-cache") { + return &dummyDomainCache{}, nil + } + return createBloomDomainCache(ctx) +} + +type bloomDomainCache struct { + filter *bloom.BloomFilter + filterMu sync.Mutex + + accessCh chan struct{} + refreshMu sync.Mutex + lastRefresh time.Time + maxAge time.Duration +} + +func createBloomDomainCache(ctx context.Context) (DomainCache, error) { + cache := bloomDomainCache{ + accessCh: make(chan struct{}), + } + + switch config.Storage.Type { + case "fs": + // the FS backend has no cache + case "s3": + cache.maxAge = time.Duration(config.Storage.S3.SiteCache.MaxAge) + default: + panic(fmt.Errorf("unknown backend: %s", config.Storage.Type)) + } + + if err := cache.refresh(ctx); err != nil { + return nil, err + } + + go cache.handleFilterUpdates(ctx) + + return &cache, nil +} + +func (c *bloomDomainCache) handleFilterUpdates(ctx context.Context) { + for range c.accessCh { + if time.Since(c.lastRefresh) > c.maxAge { + logc.Print(ctx, "domain cache: refreshing") + if err := c.refresh(ctx); err != nil { + logc.Printf(ctx, "domain cache: refresh error: %v", err) + } + } + } +} + +func (c *bloomDomainCache) refresh(ctx context.Context) error { + c.refreshMu.Lock() + defer c.refreshMu.Unlock() + + if changed, err := backend.HaveDomainsChanged(ctx, c.lastRefresh); err != nil { + return err + } else if !changed { + logc.Print(ctx, "domain cache: unchanged") + c.lastRefresh = time.Now() + return nil + } + + // Create a 256 KiB Bloom filter that will fit ~150K entries with 0.1% false positive rate. + filter := bloom.New(256*1024, 10) + for metadata, err := range backend.EnumerateManifests(ctx) { + if err != nil { + return fmt.Errorf("enum manifests: %w", err) + } + domain, _, _ := strings.Cut(metadata.Name, "/") + filter.AddString(domain) + } + + c.filterMu.Lock() + c.filter = filter + c.filterMu.Unlock() + + logc.Printf(ctx, "domain cache: refreshed with approx. %d domains", filter.ApproximatedSize()) + c.lastRefresh = time.Now() + return nil +} + +func (c *bloomDomainCache) CheckDomain(ctx context.Context, domain string) (found bool) { + select { + case c.accessCh <- struct{}{}: + default: + } + + c.filterMu.Lock() + found = c.filter.TestString(domain) + c.filterMu.Unlock() + + logc.Printf(ctx, "domain cache: bloom filter returns %v for %q", found, domain) + return +} + +func (c *bloomDomainCache) AddDomain(ctx context.Context, domain string) { + c.refreshMu.Lock() + defer c.refreshMu.Unlock() + + c.filterMu.Lock() + c.filter.AddString(domain) + c.filterMu.Unlock() + + logc.Printf(ctx, "domain cache: added %q", domain) +} + +type dummyDomainCache struct{} + +func (d dummyDomainCache) CheckDomain(context.Context, string) bool { return true } + +func (d dummyDomainCache) AddDomain(context.Context, string) {} diff --git a/src/main.go b/src/main.go index 085d179..facb66c 100644 --- a/src/main.go +++ b/src/main.go @@ -33,6 +33,7 @@ var config *Config var wildcards []*WildcardPattern var fallback http.Handler var backend Backend +var domainCache DomainCache func configureFeatures(ctx context.Context) (err error) { if len(config.Features) > 0 { @@ -639,6 +640,10 @@ func Main(versionInfo string) { } backend = NewObservedBackend(backend) + if domainCache, err = CreateDomainCache(ctx); err != nil { + logc.Fatalln(ctx, err) + } + middleware := chainHTTPMiddleware( panicHandler, remoteAddrMiddleware, diff --git a/src/observe.go b/src/observe.go index a21edca..074c89b 100644 --- a/src/observe.go +++ b/src/observe.go @@ -346,6 +346,13 @@ func (backend *observedBackend) UnfreezeDomain(ctx context.Context, domain strin return } +func (backend *observedBackend) HaveDomainsChanged(ctx context.Context, since time.Time) (changed bool, err error) { + span, ctx := ObserveFunction(ctx, "HaveDomainsChanged", "since", since) + changed, err = backend.inner.HaveDomainsChanged(ctx, since) + span.Finish() + return +} + func (backend *observedBackend) AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) (err error) { span, ctx := ObserveFunction(ctx, "AppendAuditLog", "audit.id", id) err = backend.inner.AppendAuditLog(ctx, id, record) diff --git a/src/pages.go b/src/pages.go index 0c21a75..5010247 100644 --- a/src/pages.go +++ b/src/pages.go @@ -65,8 +65,12 @@ func observeSiteUpdate(via string, result *UpdateResult) { } } +func normalizeHost(host string) string { + return strings.ToLower(host) +} + func makeWebRoot(host string, projectName string) string { - return path.Join(strings.ToLower(host), projectName) + return path.Join(normalizeHost(host), projectName) } func getWebRoot(r *http.Request) (string, error) { @@ -115,6 +119,13 @@ func getPage(w http.ResponseWriter, r *http.Request) error { return err } + host = normalizeHost(host) + if !domainCache.CheckDomain(r.Context(), host) { + w.WriteHeader(http.StatusNotFound) + fmt.Fprintf(w, "site not found\n") + return nil + } + type indexManifestResult struct { manifest *Manifest metadata ManifestMetadata diff --git a/src/update.go b/src/update.go index e688130..0a93ed9 100644 --- a/src/update.go +++ b/src/update.go @@ -59,6 +59,7 @@ func Update( if err == nil { domain, _, _ := strings.Cut(webRoot, "/") err = backend.CreateDomain(ctx, domain) + domainCache.AddDomain(ctx, domain) } if err == nil { if oldManifest == nil {