Add a domain cache to quickly reject non-existent domains.

This commit is contained in:
miyuko
2026-04-11 12:00:20 +00:00
parent f400f8d246
commit bbdaae7280
12 changed files with 225 additions and 5 deletions

2
go.mod
View File

@@ -31,6 +31,8 @@ require (
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/ProtonMail/go-crypto v1.4.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bits-and-blooms/bitset v1.24.2 // indirect
github.com/bits-and-blooms/bloom/v3 v3.7.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cloudflare/circl v1.6.3 // indirect
github.com/cyphar/filepath-securejoin v0.6.1 // indirect

5
go.sum
View File

@@ -14,6 +14,10 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bits-and-blooms/bitset v1.24.2 h1:M7/NzVbsytmtfHbumG+K2bremQPMJuqv1JD3vOaFxp0=
github.com/bits-and-blooms/bitset v1.24.2/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bits-and-blooms/bloom/v3 v3.7.1 h1:WXovk4TRKZttAMJfoQx6K2DM0zNIt8w+c67UqO+etV0=
github.com/bits-and-blooms/bloom/v3 v3.7.1/go.mod h1:rZzYLLje2dfzXfAkJNxQQHsKurAyK55KUnL43Euk0hU=
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 h1:6lhrsTEnloDPXyeZBvSYvQf8u86jbKehZPVDDlkgDl4=
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
@@ -135,6 +139,7 @@ github.com/tj/assert v0.0.3 h1:Df/BlaZ20mq6kuai7f5z2TvPFiwC3xaWJSDQNiIS3Rk=
github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pvk=
github.com/tj/go-redirects v0.0.0-20200911105812-fd1ba1020b37 h1:K11tjwz8zTTSZkz4TUjfLN+y8uJWP38BbyPqZ2yB/Yk=
github.com/tj/go-redirects v0.0.0-20200911105812-fd1ba1020b37/go.mod h1:E0E2H2gQA+uoi27VCSU+a/BULPtadQA78q3cpTjZbZw=
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo=

View File

@@ -19,6 +19,12 @@ schema = 3
[mod."github.com/beorn7/perks"]
version = "v1.0.1"
hash = "sha256-h75GUqfwJKngCJQVE5Ao5wnO3cfKD9lSIteoLp/3xJ4="
[mod."github.com/bits-and-blooms/bitset"]
version = "v1.24.2"
hash = "sha256-hT88EpdWmKnqdxApJhs/aIAptf33HmtSp2KXPI+Ym7o="
[mod."github.com/bits-and-blooms/bloom/v3"]
version = "v3.7.1"
hash = "sha256-KZduCu+k4+xqBcFRTfg8Yc/PEf5jfpjn0I1YoxfnVPo="
[mod."github.com/c2h5oh/datasize"]
version = "v0.0.0-20231215233829-aa82cc1e6500"
hash = "sha256-8MqL7xCvE6fIjanz2jwkaLP1OE5kLu62TOcQx452DHQ="

View File

@@ -138,13 +138,16 @@ type Backend interface {
// Create a domain. This allows us to start serving content for the domain.
CreateDomain(ctx context.Context, domain string) error
// Freeze a domain. This allows a site to be administratively locked, e.g. if it
// Freeze a domain. This allows a site to be administratively locked, e.g. if it
// is discovered serving abusive content.
FreezeDomain(ctx context.Context, domain string) error
// Thaw a domain. This removes the previously placed administrative lock (if any).
UnfreezeDomain(ctx context.Context, domain string) error
// Check whether the set of domains we serve has changed since the time passed to this method.
HaveDomainsChanged(ctx context.Context, since time.Time) (changed bool, err error)
// Append a record to the audit log.
AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) error

View File

@@ -11,6 +11,7 @@ import (
"os"
"path/filepath"
"strings"
"time"
)
type FSBackend struct {
@@ -479,6 +480,10 @@ func (fs *FSBackend) UnfreezeDomain(ctx context.Context, domain string) error {
}
}
func (fs *FSBackend) HaveDomainsChanged(ctx context.Context, since time.Time) (bool, error) {
return true, nil // not implemented
}
func (fs *FSBackend) AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) error {
if _, err := fs.auditRoot.Stat(id.String()); err == nil {
panic(fmt.Errorf("audit ID collision: %s", id))

View File

@@ -643,8 +643,11 @@ func (s3 *S3Backend) DeleteManifest(
err := s3.client.RemoveObject(ctx, s3.bucket, manifestObjectName(name),
minio.RemoveObjectOptions{})
if err != nil {
return err
}
s3.siteCache.Cache.Invalidate(name)
return err
return s3.bumpLastDomainUpdateTimestamp(ctx)
}
func (s3 *S3Backend) EnumerateManifests(ctx context.Context) iter.Seq2[*ManifestMetadata, error] {
@@ -764,8 +767,19 @@ func (s3 *S3Backend) CheckDomain(ctx context.Context, domain string) (exists boo
func (s3 *S3Backend) CreateDomain(ctx context.Context, domain string) error {
logc.Printf(ctx, "s3: create domain %s\n", domain)
_, err := s3.client.PutObject(ctx, s3.bucket, domainCheckObjectName(domain),
exists, err := s3.CheckDomain(ctx, domain)
if err != nil {
return err
}
_, err = s3.client.PutObject(ctx, s3.bucket, domainCheckObjectName(domain),
&bytes.Reader{}, 0, minio.PutObjectOptions{})
if err != nil {
return err
}
if !exists {
err = s3.bumpLastDomainUpdateTimestamp(ctx)
}
return err
}
@@ -790,6 +804,25 @@ func (s3 *S3Backend) UnfreezeDomain(ctx context.Context, domain string) error {
}
}
const lastDomainUpdateObjectName = "meta/last-domain-update"
func (s3 *S3Backend) HaveDomainsChanged(ctx context.Context, since time.Time) (bool, error) {
info, err := s3.client.StatObject(ctx, s3.bucket, lastDomainUpdateObjectName,
minio.GetObjectOptions{})
if err != nil {
return false, err
}
return info.LastModified.After(since), nil
}
func (s3 *S3Backend) bumpLastDomainUpdateTimestamp(ctx context.Context) error {
logc.Print(ctx, "s3: bumping last domain update timestamp")
_, err := s3.client.PutObject(ctx, s3.bucket, lastDomainUpdateObjectName,
&bytes.Reader{}, 0, minio.PutObjectOptions{})
return err
}
func auditObjectName(id AuditID) string {
return fmt.Sprintf("audit/%s", id)
}

View File

@@ -26,7 +26,17 @@ func ServeCaddy(w http.ResponseWriter, r *http.Request) {
return
}
found, err := backend.CheckDomain(r.Context(), strings.ToLower(domain))
var err error
domain = strings.ToLower(domain)
// Run a cheap check as to whether we might be serving the domain.
var found = domainCache.CheckDomain(r.Context(), domain)
if !found {
// Run an expensive check as to whether we are actually serving the domain.
found, err = backend.CheckDomain(r.Context(), domain)
}
if !found {
// If we don't serve the domain, but a fallback server does, then we should let our
// Caddy instance request a TLS certificate. Otherwise, we'll never have an opportunity

132
src/domain_cache.go Normal file
View File

@@ -0,0 +1,132 @@
package git_pages
import (
"context"
"fmt"
"strings"
"sync"
"time"
"github.com/bits-and-blooms/bloom/v3"
)
type DomainCache interface {
// Check if we might be serving the domain.
CheckDomain(ctx context.Context, domain string) (found bool)
// Add the domain to the cache.
AddDomain(ctx context.Context, domain string)
}
func CreateDomainCache(ctx context.Context) (DomainCache, error) {
if !config.Feature("domain-existence-cache") {
return &dummyDomainCache{}, nil
}
return createBloomDomainCache(ctx)
}
type bloomDomainCache struct {
filter *bloom.BloomFilter
filterMu sync.Mutex
accessCh chan struct{}
refreshMu sync.Mutex
lastRefresh time.Time
maxAge time.Duration
}
func createBloomDomainCache(ctx context.Context) (DomainCache, error) {
cache := bloomDomainCache{
accessCh: make(chan struct{}),
}
switch config.Storage.Type {
case "fs":
// the FS backend has no cache
case "s3":
cache.maxAge = time.Duration(config.Storage.S3.SiteCache.MaxAge)
default:
panic(fmt.Errorf("unknown backend: %s", config.Storage.Type))
}
if err := cache.refresh(ctx); err != nil {
return nil, err
}
go cache.handleFilterUpdates(ctx)
return &cache, nil
}
func (c *bloomDomainCache) handleFilterUpdates(ctx context.Context) {
for range c.accessCh {
if time.Since(c.lastRefresh) > c.maxAge {
logc.Print(ctx, "domain cache: refreshing")
if err := c.refresh(ctx); err != nil {
logc.Printf(ctx, "domain cache: refresh error: %v", err)
}
}
}
}
func (c *bloomDomainCache) refresh(ctx context.Context) error {
c.refreshMu.Lock()
defer c.refreshMu.Unlock()
if changed, err := backend.HaveDomainsChanged(ctx, c.lastRefresh); err != nil {
return err
} else if !changed {
logc.Print(ctx, "domain cache: unchanged")
c.lastRefresh = time.Now()
return nil
}
// Create a 256 KiB Bloom filter that will fit ~150K entries with 0.1% false positive rate.
filter := bloom.New(256*1024, 10)
for metadata, err := range backend.EnumerateManifests(ctx) {
if err != nil {
return fmt.Errorf("enum manifests: %w", err)
}
domain, _, _ := strings.Cut(metadata.Name, "/")
filter.AddString(domain)
}
c.filterMu.Lock()
c.filter = filter
c.filterMu.Unlock()
logc.Printf(ctx, "domain cache: refreshed with approx. %d domains", filter.ApproximatedSize())
c.lastRefresh = time.Now()
return nil
}
func (c *bloomDomainCache) CheckDomain(ctx context.Context, domain string) (found bool) {
select {
case c.accessCh <- struct{}{}:
default:
}
c.filterMu.Lock()
found = c.filter.TestString(domain)
c.filterMu.Unlock()
logc.Printf(ctx, "domain cache: bloom filter returns %v for %q", found, domain)
return
}
func (c *bloomDomainCache) AddDomain(ctx context.Context, domain string) {
c.refreshMu.Lock()
defer c.refreshMu.Unlock()
c.filterMu.Lock()
c.filter.AddString(domain)
c.filterMu.Unlock()
logc.Printf(ctx, "domain cache: added %q", domain)
}
type dummyDomainCache struct{}
func (d dummyDomainCache) CheckDomain(context.Context, string) bool { return true }
func (d dummyDomainCache) AddDomain(context.Context, string) {}

View File

@@ -33,6 +33,7 @@ var config *Config
var wildcards []*WildcardPattern
var fallback http.Handler
var backend Backend
var domainCache DomainCache
func configureFeatures(ctx context.Context) (err error) {
if len(config.Features) > 0 {
@@ -639,6 +640,10 @@ func Main(versionInfo string) {
}
backend = NewObservedBackend(backend)
if domainCache, err = CreateDomainCache(ctx); err != nil {
logc.Fatalln(ctx, err)
}
middleware := chainHTTPMiddleware(
panicHandler,
remoteAddrMiddleware,

View File

@@ -346,6 +346,13 @@ func (backend *observedBackend) UnfreezeDomain(ctx context.Context, domain strin
return
}
func (backend *observedBackend) HaveDomainsChanged(ctx context.Context, since time.Time) (changed bool, err error) {
span, ctx := ObserveFunction(ctx, "HaveDomainsChanged", "since", since)
changed, err = backend.inner.HaveDomainsChanged(ctx, since)
span.Finish()
return
}
func (backend *observedBackend) AppendAuditLog(ctx context.Context, id AuditID, record *AuditRecord) (err error) {
span, ctx := ObserveFunction(ctx, "AppendAuditLog", "audit.id", id)
err = backend.inner.AppendAuditLog(ctx, id, record)

View File

@@ -65,8 +65,12 @@ func observeSiteUpdate(via string, result *UpdateResult) {
}
}
func normalizeHost(host string) string {
return strings.ToLower(host)
}
func makeWebRoot(host string, projectName string) string {
return path.Join(strings.ToLower(host), projectName)
return path.Join(normalizeHost(host), projectName)
}
func getWebRoot(r *http.Request) (string, error) {
@@ -115,6 +119,13 @@ func getPage(w http.ResponseWriter, r *http.Request) error {
return err
}
host = normalizeHost(host)
if !domainCache.CheckDomain(r.Context(), host) {
w.WriteHeader(http.StatusNotFound)
fmt.Fprintf(w, "site not found\n")
return nil
}
type indexManifestResult struct {
manifest *Manifest
metadata ManifestMetadata

View File

@@ -59,6 +59,7 @@ func Update(
if err == nil {
domain, _, _ := strings.Cut(webRoot, "/")
err = backend.CreateDomain(ctx, domain)
domainCache.AddDomain(ctx, domain)
}
if err == nil {
if oldManifest == nil {