From 0d24e1aa707a72de00535cdb9f31d2659b8870d1 Mon Sep 17 00:00:00 2001 From: Catherine Date: Tue, 19 May 2026 11:58:37 +0000 Subject: [PATCH] Rename "site existence cache" to "existence cache", tidy it up. This commit includes no behavioral changes, only cosmetic ones: * Renames the concept to "existence cache". * Makes log messages more concise. * Adds written rationale for the module. * Renames feature to `existence-cache`. --- src/caddy.go | 2 +- src/{existence_cache.go => existence.go} | 66 +++++++++++++++--------- src/main.go | 6 +-- src/pages.go | 4 +- src/update.go | 2 +- 5 files changed, 48 insertions(+), 32 deletions(-) rename src/{existence_cache.go => existence.go} (54%) diff --git a/src/caddy.go b/src/caddy.go index ef3298f..888fab5 100644 --- a/src/caddy.go +++ b/src/caddy.go @@ -30,7 +30,7 @@ func ServeCaddy(w http.ResponseWriter, r *http.Request) { domain = strings.ToLower(domain) // Run a cheap check as to whether we might be serving the domain. - var found = siteExistenceCache.CheckDomain(r.Context(), domain) + var found = existenceCache.CheckDomain(r.Context(), domain) if found { // Run an expensive check as to whether we are actually serving the domain. diff --git a/src/existence_cache.go b/src/existence.go similarity index 54% rename from src/existence_cache.go rename to src/existence.go index 07ba0b6..b331808 100644 --- a/src/existence_cache.go +++ b/src/existence.go @@ -1,3 +1,9 @@ +// The existence cache allows fast rejection of requests for nonexistent domains or sites. +// This is principally important for floods of crawler requests which probe either random +// domains (with A/AAAA records not pointing to the git-pages host), or random sites +// (typically as a result of probing for vulnerable URLs). With the S3 backend this can +// result in severe congestion of the backend channel and high CPU use. + package git_pages import ( @@ -10,7 +16,7 @@ import ( "github.com/bits-and-blooms/bloom/v3" ) -type SiteExistenceCache interface { +type ExistenceCache interface { // Check if we might be serving the site. CheckSite(ctx context.Context, site string) (found bool) @@ -21,14 +27,14 @@ type SiteExistenceCache interface { AddSite(ctx context.Context, site string) } -func CreateSiteExistenceCache(ctx context.Context) (SiteExistenceCache, error) { - if !config.Feature("site-existence-cache") { - return &dummySiteExistenceCache{}, nil +func CreateExistenceCache(ctx context.Context) (ExistenceCache, error) { + if !config.Feature("existence-cache") { + return &dummyExistenceCache{}, nil } - return createBloomSiteExistenceCache(ctx) + return createBloomExistenceCache(ctx) } -type bloomSiteExistenceCache struct { +type bloomExistenceCache struct { sites *bloom.BloomFilter domains *bloom.BloomFilter filterMu sync.Mutex @@ -39,8 +45,8 @@ type bloomSiteExistenceCache struct { maxAge time.Duration } -func createBloomSiteExistenceCache(ctx context.Context) (SiteExistenceCache, error) { - cache := bloomSiteExistenceCache{ +func createBloomExistenceCache(ctx context.Context) (ExistenceCache, error) { + cache := bloomExistenceCache{ accessCh: make(chan struct{}), } @@ -62,33 +68,35 @@ func createBloomSiteExistenceCache(ctx context.Context) (SiteExistenceCache, err return &cache, nil } -func (c *bloomSiteExistenceCache) handleFilterUpdates(ctx context.Context) { +func (c *bloomExistenceCache) handleFilterUpdates(ctx context.Context) { for range c.accessCh { if time.Since(c.lastRefresh) > c.maxAge { - logc.Print(ctx, "site existence cache: refreshing") + logc.Print(ctx, "existence: refreshing") if err := c.refresh(ctx); err != nil { - logc.Printf(ctx, "site existence cache: refresh error: %v", err) + logc.Printf(ctx, "existence: refresh error: %v", err) } } } } -func (c *bloomSiteExistenceCache) refresh(ctx context.Context) error { +func (c *bloomExistenceCache) refresh(ctx context.Context) error { c.refreshMu.Lock() defer c.refreshMu.Unlock() if changed, err := backend.HasSiteListChanged(ctx, c.lastRefresh); err != nil { return err } else if !changed { - logc.Print(ctx, "site existence cache: unchanged") + logc.Print(ctx, "existence: unchanged") c.lastRefresh = time.Now() return nil } - var siteCount int // Create two 256 KiB Bloom filters that will fit ~150K entries each with 0.1% false positive rate. sites := bloom.New(256*1024, 10) domains := bloom.New(256*1024, 10) + + logc.Printf(ctx, "existence: refreshing") + siteCount := 0 for metadata, err := range backend.EnumerateManifests(ctx) { if err != nil { return fmt.Errorf("enum manifests: %w", err) @@ -105,12 +113,12 @@ func (c *bloomSiteExistenceCache) refresh(ctx context.Context) error { c.domains = domains c.filterMu.Unlock() - logc.Printf(ctx, "site existence cache: refreshed with %d sites", siteCount) + logc.Printf(ctx, "existence: refreshed with %d sites", siteCount) c.lastRefresh = time.Now() return nil } -func (c *bloomSiteExistenceCache) CheckSite(ctx context.Context, site string) (found bool) { +func (c *bloomExistenceCache) CheckSite(ctx context.Context, site string) (found bool) { select { case c.accessCh <- struct{}{}: default: @@ -120,11 +128,15 @@ func (c *bloomSiteExistenceCache) CheckSite(ctx context.Context, site string) (f found = c.sites.TestString(site) c.filterMu.Unlock() - logc.Printf(ctx, "site existence cache: bloom filter returns %v for site %q", found, site) + result := "miss" + if found { + result = "hit" + } + logc.Printf(ctx, "existence: site %s: %s", site, result) return } -func (c *bloomSiteExistenceCache) CheckDomain(ctx context.Context, domain string) (found bool) { +func (c *bloomExistenceCache) CheckDomain(ctx context.Context, domain string) (found bool) { select { case c.accessCh <- struct{}{}: default: @@ -134,11 +146,15 @@ func (c *bloomSiteExistenceCache) CheckDomain(ctx context.Context, domain string found = c.domains.TestString(domain) c.filterMu.Unlock() - logc.Printf(ctx, "site existence cache: bloom filter returns %v for domain %q", found, domain) + result := "miss" + if found { + result = "hit" + } + logc.Printf(ctx, "existence: domain %s: %s", domain, result) return } -func (c *bloomSiteExistenceCache) AddSite(ctx context.Context, site string) { +func (c *bloomExistenceCache) AddSite(ctx context.Context, site string) { c.refreshMu.Lock() defer c.refreshMu.Unlock() @@ -149,13 +165,13 @@ func (c *bloomSiteExistenceCache) AddSite(ctx context.Context, site string) { c.domains.AddString(domain) c.filterMu.Unlock() - logc.Printf(ctx, "site existence cache: added site %q", site) + logc.Printf(ctx, "existence: added site %s", site) } -type dummySiteExistenceCache struct{} +type dummyExistenceCache struct{} -func (d dummySiteExistenceCache) CheckSite(context.Context, string) bool { return true } +func (d dummyExistenceCache) CheckSite(context.Context, string) bool { return true } -func (d dummySiteExistenceCache) CheckDomain(context.Context, string) bool { return true } +func (d dummyExistenceCache) CheckDomain(context.Context, string) bool { return true } -func (d dummySiteExistenceCache) AddSite(context.Context, string) {} +func (d dummyExistenceCache) AddSite(context.Context, string) {} diff --git a/src/main.go b/src/main.go index d5bed90..7b77c0f 100644 --- a/src/main.go +++ b/src/main.go @@ -34,7 +34,7 @@ var config *Config var wildcards []*WildcardPattern var fallback http.Handler var backend Backend -var siteExistenceCache SiteExistenceCache +var existenceCache ExistenceCache func configureFeatures(ctx context.Context) (err error) { if len(config.Features) > 0 { @@ -345,7 +345,7 @@ func Main(versionInfo string) { logc.Fatalln(ctx, err) } - if siteExistenceCache, err = CreateSiteExistenceCache(ctx); err != nil { + if existenceCache, err = CreateExistenceCache(ctx); err != nil { logc.Fatalln(ctx, err) } } @@ -742,7 +742,7 @@ func Main(versionInfo string) { } backend = NewObservedBackend(backend) - if siteExistenceCache, err = CreateSiteExistenceCache(ctx); err != nil { + if existenceCache, err = CreateExistenceCache(ctx); err != nil { logc.Fatalln(ctx, err) } diff --git a/src/pages.go b/src/pages.go index 232fac7..7f23490 100644 --- a/src/pages.go +++ b/src/pages.go @@ -139,7 +139,7 @@ func getPage(w http.ResponseWriter, r *http.Request) error { indexManifestCh := make(chan indexManifestResult, 1) go func() { webRoot := makeWebRoot(host, ".index") - if !siteExistenceCache.CheckSite(r.Context(), webRoot) { + if !existenceCache.CheckSite(r.Context(), webRoot) { close(indexManifestCh) return } @@ -154,7 +154,7 @@ func getPage(w http.ResponseWriter, r *http.Request) error { sitePath = strings.TrimPrefix(r.URL.Path, "/") if projectName, projectPath, hasProjectSlash := strings.Cut(sitePath, "/"); projectName != "" { webRoot := makeWebRoot(host, projectName) - if ValidateProjectName(projectName) == nil && siteExistenceCache.CheckSite(r.Context(), webRoot) { + if ValidateProjectName(projectName) == nil && existenceCache.CheckSite(r.Context(), webRoot) { var projectManifest *Manifest var projectMetadata ManifestMetadata projectManifest, projectMetadata, err = backend.GetManifest( diff --git a/src/update.go b/src/update.go index d2c0857..1b8b2cc 100644 --- a/src/update.go +++ b/src/update.go @@ -59,7 +59,7 @@ func Update( if err == nil { domain, _, _ := strings.Cut(webRoot, "/") err = backend.CreateDomain(ctx, domain) - siteExistenceCache.AddSite(ctx, webRoot) + existenceCache.AddSite(ctx, webRoot) } if err == nil { if oldManifest == nil {