Files
at-container-registry/scanner/internal/scan/extractor.go
2026-02-18 20:26:49 -06:00

197 lines
5.9 KiB
Go

package scan
import (
"crypto/sha256"
"encoding/json"
"fmt"
"log/slog"
"os"
"path/filepath"
"strings"
scanner "atcr.io/scanner"
"atcr.io/scanner/internal/client"
)
// OCI image layout types for constructing the layout on disk.
type ociDescriptor struct {
MediaType string `json:"mediaType"`
Digest string `json:"digest"`
Size int64 `json:"size"`
}
type ociManifest struct {
SchemaVersion int `json:"schemaVersion"`
MediaType string `json:"mediaType,omitempty"`
Config ociDescriptor `json:"config"`
Layers []ociDescriptor `json:"layers"`
}
type ociIndex struct {
SchemaVersion int `json:"schemaVersion"`
Manifests []ociDescriptor `json:"manifests"`
}
// buildOCILayout downloads image blobs and constructs an OCI image layout directory.
// Instead of extracting layers to a rootfs (which requires decompression and causes
// permission/security issues), this writes compressed blobs directly and lets Syft's
// stereoscope handle layer processing internally.
//
// Layout structure:
//
// scan-*/
// ├── oci-layout
// ├── index.json
// └── blobs/sha256/
// ├── <manifest-hex>
// ├── <config-hex>
// └── <layer-hex>...
func buildOCILayout(job *scanner.ScanJob, tmpDir, secret string) (string, func(), error) {
scanDir, err := os.MkdirTemp(tmpDir, "scan-*")
if err != nil {
return "", nil, fmt.Errorf("failed to create temp directory: %w", err)
}
cleanup := func() {
if err := os.RemoveAll(scanDir); err != nil {
slog.Warn("Failed to clean up temp directory", "dir", scanDir, "error", err)
}
}
blobsDir := filepath.Join(scanDir, "blobs", "sha256")
if err := os.MkdirAll(blobsDir, 0755); err != nil {
cleanup()
return "", nil, fmt.Errorf("failed to create blobs directory: %w", err)
}
// Download config blob
if job.Config.Digest == "" {
cleanup()
return "", nil, fmt.Errorf("config blob has empty digest, cannot download")
}
slog.Info("Downloading config blob", "digest", job.Config.Digest)
if err := downloadBlob(job, job.Config.Digest, blobsDir, secret); err != nil {
cleanup()
return "", nil, fmt.Errorf("failed to download config blob: %w", err)
}
// Download layer blobs (no extraction — kept compressed)
for i, layer := range job.Layers {
if layer.Digest == "" {
slog.Warn("Skipping layer with empty digest", "index", i)
continue
}
// Skip non-tar layers (cosign signatures, in-toto attestations, etc.)
if layer.MediaType != "" && !strings.Contains(layer.MediaType, "tar") {
slog.Info("Skipping non-tar layer", "index", i, "digest", layer.Digest, "mediaType", layer.MediaType)
continue
}
slog.Info("Downloading layer", "index", i, "digest", layer.Digest, "size", layer.Size, "mediaType", layer.MediaType)
if err := downloadBlob(job, layer.Digest, blobsDir, secret); err != nil {
cleanup()
return "", nil, fmt.Errorf("failed to download layer %d: %w", i, err)
}
}
// Build OCI manifest from job descriptors
manifest := ociManifest{
SchemaVersion: 2,
MediaType: "application/vnd.oci.image.manifest.v1+json",
Config: ociDescriptor{
MediaType: defaultMediaType(job.Config.MediaType, "application/vnd.oci.image.config.v1+json"),
Digest: job.Config.Digest,
Size: job.Config.Size,
},
Layers: make([]ociDescriptor, 0, len(job.Layers)),
}
for _, layer := range job.Layers {
if layer.Digest == "" {
continue
}
if layer.MediaType != "" && !strings.Contains(layer.MediaType, "tar") {
continue
}
manifest.Layers = append(manifest.Layers, ociDescriptor{
MediaType: defaultMediaType(layer.MediaType, "application/vnd.oci.image.layer.v1.tar+gzip"),
Digest: layer.Digest,
Size: layer.Size,
})
}
// Write manifest blob
manifestJSON, err := json.Marshal(manifest)
if err != nil {
cleanup()
return "", nil, fmt.Errorf("failed to marshal manifest: %w", err)
}
manifestHash := sha256.Sum256(manifestJSON)
manifestDigest := fmt.Sprintf("sha256:%x", manifestHash)
manifestPath := filepath.Join(blobsDir, fmt.Sprintf("%x", manifestHash))
if err := os.WriteFile(manifestPath, manifestJSON, 0644); err != nil {
cleanup()
return "", nil, fmt.Errorf("failed to write manifest blob: %w", err)
}
// Write index.json
index := ociIndex{
SchemaVersion: 2,
Manifests: []ociDescriptor{
{
MediaType: "application/vnd.oci.image.manifest.v1+json",
Digest: manifestDigest,
Size: int64(len(manifestJSON)),
},
},
}
indexJSON, err := json.Marshal(index)
if err != nil {
cleanup()
return "", nil, fmt.Errorf("failed to marshal index: %w", err)
}
if err := os.WriteFile(filepath.Join(scanDir, "index.json"), indexJSON, 0644); err != nil {
cleanup()
return "", nil, fmt.Errorf("failed to write index.json: %w", err)
}
// Write oci-layout file
ociLayout := []byte(`{"imageLayoutVersion":"1.0.0"}`)
if err := os.WriteFile(filepath.Join(scanDir, "oci-layout"), ociLayout, 0644); err != nil {
cleanup()
return "", nil, fmt.Errorf("failed to write oci-layout: %w", err)
}
slog.Info("OCI layout built",
"dir", scanDir,
"layers", len(manifest.Layers),
"manifestDigest", manifestDigest)
return scanDir, cleanup, nil
}
// downloadBlob downloads a blob by digest to the blobs directory using presigned URLs.
func downloadBlob(job *scanner.ScanJob, digest, blobsDir, secret string) error {
hex := digestHex(digest)
destPath := filepath.Join(blobsDir, hex)
presignedURL, err := client.GetBlobPresignedURL(job.HoldEndpoint, job.HoldDID, digest, secret)
if err != nil {
return fmt.Errorf("failed to get presigned URL for %s: %w", digest, err)
}
return client.DownloadBlob(presignedURL, destPath)
}
// digestHex extracts the hex portion from a digest string (e.g., "sha256:abc123" → "abc123").
func digestHex(digest string) string {
if _, hex, ok := strings.Cut(digest, ":"); ok {
return hex
}
return digest
}
func defaultMediaType(mediaType, fallback string) string {
if mediaType == "" {
return fallback
}
return mediaType
}