Files
git-pages/src/domain_cache.go

133 lines
3.0 KiB
Go

package git_pages
import (
"context"
"fmt"
"strings"
"sync"
"time"
"github.com/bits-and-blooms/bloom/v3"
)
type DomainCache interface {
// Check if we might be serving the domain.
CheckDomain(ctx context.Context, domain string) (found bool)
// Add the domain to the cache.
AddDomain(ctx context.Context, domain string)
}
func CreateDomainCache(ctx context.Context) (DomainCache, error) {
if !config.Feature("domain-existence-cache") {
return &dummyDomainCache{}, nil
}
return createBloomDomainCache(ctx)
}
type bloomDomainCache struct {
filter *bloom.BloomFilter
filterMu sync.Mutex
accessCh chan struct{}
refreshMu sync.Mutex
lastRefresh time.Time
maxAge time.Duration
}
func createBloomDomainCache(ctx context.Context) (DomainCache, error) {
cache := bloomDomainCache{
accessCh: make(chan struct{}),
}
switch config.Storage.Type {
case "fs":
// the FS backend has no cache
case "s3":
cache.maxAge = time.Duration(config.Storage.S3.SiteCache.MaxAge)
default:
panic(fmt.Errorf("unknown backend: %s", config.Storage.Type))
}
if err := cache.refresh(ctx); err != nil {
return nil, err
}
go cache.handleFilterUpdates(ctx)
return &cache, nil
}
func (c *bloomDomainCache) handleFilterUpdates(ctx context.Context) {
for range c.accessCh {
if time.Since(c.lastRefresh) > c.maxAge {
logc.Print(ctx, "domain cache: refreshing")
if err := c.refresh(ctx); err != nil {
logc.Printf(ctx, "domain cache: refresh error: %v", err)
}
}
}
}
func (c *bloomDomainCache) refresh(ctx context.Context) error {
c.refreshMu.Lock()
defer c.refreshMu.Unlock()
if changed, err := backend.HaveDomainsChanged(ctx, c.lastRefresh); err != nil {
return err
} else if !changed {
logc.Print(ctx, "domain cache: unchanged")
c.lastRefresh = time.Now()
return nil
}
// Create a 256 KiB Bloom filter that will fit ~150K entries with 0.1% false positive rate.
filter := bloom.New(256*1024, 10)
for metadata, err := range backend.EnumerateManifests(ctx) {
if err != nil {
return fmt.Errorf("enum manifests: %w", err)
}
domain, _, _ := strings.Cut(metadata.Name, "/")
filter.AddString(domain)
}
c.filterMu.Lock()
c.filter = filter
c.filterMu.Unlock()
logc.Printf(ctx, "domain cache: refreshed with approx. %d domains", filter.ApproximatedSize())
c.lastRefresh = time.Now()
return nil
}
func (c *bloomDomainCache) CheckDomain(ctx context.Context, domain string) (found bool) {
select {
case c.accessCh <- struct{}{}:
default:
}
c.filterMu.Lock()
found = c.filter.TestString(domain)
c.filterMu.Unlock()
logc.Printf(ctx, "domain cache: bloom filter returns %v for %q", found, domain)
return
}
func (c *bloomDomainCache) AddDomain(ctx context.Context, domain string) {
c.refreshMu.Lock()
defer c.refreshMu.Unlock()
c.filterMu.Lock()
c.filter.AddString(domain)
c.filterMu.Unlock()
logc.Printf(ctx, "domain cache: added %q", domain)
}
type dummyDomainCache struct{}
func (d dummyDomainCache) CheckDomain(context.Context, string) bool { return true }
func (d dummyDomainCache) AddDomain(context.Context, string) {}