mirror of
https://tangled.org/evan.jarrett.net/at-container-registry
synced 2026-04-20 16:40:29 +00:00
208 lines
6.0 KiB
Go
208 lines
6.0 KiB
Go
// Package scan implements the vulnerability scanning pipeline:
|
|
// extract layers → generate SBOM → scan vulnerabilities → send result.
|
|
package scan
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"runtime"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
scanner "atcr.io/scanner"
|
|
"atcr.io/scanner/internal/client"
|
|
"atcr.io/scanner/internal/config"
|
|
"atcr.io/scanner/internal/queue"
|
|
)
|
|
|
|
// WorkerPool manages a pool of scan workers
|
|
type WorkerPool struct {
|
|
cfg *config.Config
|
|
queue *queue.JobQueue
|
|
client *client.HoldClient
|
|
wg sync.WaitGroup
|
|
}
|
|
|
|
// NewWorkerPool creates a new worker pool
|
|
func NewWorkerPool(cfg *config.Config, q *queue.JobQueue, c *client.HoldClient) *WorkerPool {
|
|
return &WorkerPool{
|
|
cfg: cfg,
|
|
queue: q,
|
|
client: c,
|
|
}
|
|
}
|
|
|
|
// Start launches worker goroutines
|
|
func (wp *WorkerPool) Start(ctx context.Context) {
|
|
// Point TMPDIR at the configured tmp dir so Grype's DB download
|
|
// (go-getter zstd decompression can be 1 GB+) and stereoscope's layer
|
|
// extraction both land on the same partition as the scanner volume —
|
|
// NOT on /tmp, which is typically tmpfs with ~400 MB and would silently
|
|
// fail mid-extract. This must be set before any scanner/grype goroutine
|
|
// starts and must never be restored to a smaller default mid-process.
|
|
if wp.cfg.Vuln.TmpDir != "" {
|
|
if err := os.MkdirAll(wp.cfg.Vuln.TmpDir, 0o755); err != nil {
|
|
slog.Warn("Failed to create scanner tmp dir", "path", wp.cfg.Vuln.TmpDir, "error", err)
|
|
}
|
|
os.Setenv("TMPDIR", wp.cfg.Vuln.TmpDir)
|
|
}
|
|
|
|
// Initialize vuln database on startup if enabled
|
|
if wp.cfg.Vuln.Enabled {
|
|
go func() {
|
|
if err := initializeVulnDatabase(wp.cfg.Vuln.DBPath); err != nil {
|
|
slog.Error("Failed to initialize vulnerability database", "error", err)
|
|
slog.Warn("Vulnerability scanning will be disabled until database is available")
|
|
}
|
|
}()
|
|
}
|
|
|
|
for i := 0; i < wp.cfg.Scanner.Workers; i++ {
|
|
wp.wg.Add(1)
|
|
go wp.worker(ctx, i)
|
|
}
|
|
|
|
slog.Info("Scanner worker pool started", "workers", wp.cfg.Scanner.Workers)
|
|
}
|
|
|
|
// Wait blocks until all workers finish
|
|
func (wp *WorkerPool) Wait() {
|
|
wp.wg.Wait()
|
|
}
|
|
|
|
func (wp *WorkerPool) worker(ctx context.Context, id int) {
|
|
defer wp.wg.Done()
|
|
|
|
slog.Info("Scanner worker started", "worker_id", id)
|
|
|
|
for {
|
|
job := wp.queue.Dequeue()
|
|
if job == nil {
|
|
slog.Info("Scanner worker shutting down", "worker_id", id)
|
|
return
|
|
}
|
|
|
|
slog.Info("Processing scan job",
|
|
"worker_id", id,
|
|
"repository", job.Repository,
|
|
"tag", job.Tag,
|
|
"digest", job.ManifestDigest,
|
|
"tier", job.Tier)
|
|
|
|
result, err := wp.processJob(ctx, job)
|
|
if err != nil {
|
|
logLevel := slog.LevelError
|
|
if strings.HasPrefix(err.Error(), "skipped:") {
|
|
logLevel = slog.LevelInfo
|
|
}
|
|
slog.Log(ctx, logLevel, "Scan job failed",
|
|
"worker_id", id,
|
|
"repository", job.Repository,
|
|
"error", err)
|
|
wp.client.SendError(job.Seq, err.Error())
|
|
} else {
|
|
wp.client.SendResult(job.Seq, result)
|
|
|
|
slog.Info("Scan job completed",
|
|
"worker_id", id,
|
|
"repository", job.Repository,
|
|
"vulnerabilities", result.Summary.Total)
|
|
}
|
|
|
|
// Free large scan artifacts and trigger GC before the cooldown
|
|
// so memory is reclaimed between jobs. Syft/Grype allocate heavily
|
|
// and Go's GC needs idle time to catch up under sustained load.
|
|
result = nil
|
|
runtime.GC()
|
|
|
|
// Cooldown between scans to reduce sustained memory pressure
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-time.After(10 * time.Second):
|
|
}
|
|
}
|
|
}
|
|
|
|
// unscannable config media types — these are OCI artifacts that aren't
|
|
// container images so Syft/Grype can't analyze their layers.
|
|
var unscannableConfigTypes = map[string]bool{
|
|
"application/vnd.cncf.helm.config.v1+json": true, // Helm charts
|
|
"application/vnd.in-toto+json": true, // In-toto attestations
|
|
"application/vnd.dsse.envelope.v1+json": true, // DSSE envelopes (SLSA)
|
|
}
|
|
|
|
func (wp *WorkerPool) processJob(ctx context.Context, job *scanner.ScanJob) (*scanner.ScanResult, error) {
|
|
startTime := time.Now()
|
|
|
|
// Skip non-container OCI artifacts (Helm charts, WASM modules, etc.)
|
|
if unscannableConfigTypes[job.Config.MediaType] {
|
|
return nil, fmt.Errorf("skipped: unscannable artifact type %s", job.Config.MediaType)
|
|
}
|
|
|
|
// Ensure tmp dir exists
|
|
if err := ensureDir(wp.cfg.Vuln.TmpDir); err != nil {
|
|
return nil, fmt.Errorf("failed to create tmp dir: %w", err)
|
|
}
|
|
|
|
// Check total compressed image size before downloading
|
|
if wp.cfg.Vuln.MaxImageSize > 0 {
|
|
var totalSize int64
|
|
for _, layer := range job.Layers {
|
|
totalSize += layer.Size
|
|
}
|
|
totalSize += job.Config.Size
|
|
if totalSize > wp.cfg.Vuln.MaxImageSize {
|
|
return nil, fmt.Errorf("image too large: %d bytes compressed (limit %d bytes)", totalSize, wp.cfg.Vuln.MaxImageSize)
|
|
}
|
|
}
|
|
|
|
// Step 1: Build OCI image layout from hold via presigned URLs
|
|
slog.Info("Building OCI layout", "repository", job.Repository)
|
|
ociLayoutDir, cleanup, err := buildOCILayout(job, wp.cfg.Vuln.TmpDir, wp.cfg.Hold.Secret)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to build OCI layout: %w", err)
|
|
}
|
|
defer cleanup()
|
|
|
|
// Step 2: Generate SBOM with Syft
|
|
slog.Info("Generating SBOM", "repository", job.Repository)
|
|
sbomResult, sbomJSON, sbomDigest, err := generateSBOM(ctx, ociLayoutDir)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to generate SBOM: %w", err)
|
|
}
|
|
|
|
result := &scanner.ScanResult{
|
|
ManifestDigest: job.ManifestDigest,
|
|
SBOM: sbomJSON,
|
|
SBOMDigest: sbomDigest,
|
|
}
|
|
|
|
// Step 3: Scan SBOM with Grype (if enabled)
|
|
if wp.cfg.Vuln.Enabled {
|
|
slog.Info("Scanning for vulnerabilities", "repository", job.Repository, "handle", job.UserHandle)
|
|
vulnJSON, vulnDigest, summary, err := scanVulnerabilities(ctx, sbomResult, wp.cfg.Vuln.DBPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to scan vulnerabilities: %w", err)
|
|
}
|
|
result.VulnReport = vulnJSON
|
|
result.VulnDigest = vulnDigest
|
|
result.Summary = &summary
|
|
}
|
|
sbomResult = nil // release SBOM catalog for GC
|
|
|
|
duration := time.Since(startTime)
|
|
slog.Info("Scan pipeline completed",
|
|
"repository", job.Repository,
|
|
"duration", duration)
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func ensureDir(path string) error {
|
|
return os.MkdirAll(path, 0755)
|
|
}
|