Resolve /git/blobs/ symlinks as blob references to the old manifest.

This will be used for incremental archive uploads.
This commit is contained in:
Catherine
2025-12-05 10:52:54 +00:00
parent eb6418b9b6
commit 50d28f3c8b
3 changed files with 64 additions and 21 deletions

View File

@@ -12,11 +12,14 @@ import (
"strings"
"github.com/c2h5oh/datasize"
"github.com/go-git/go-git/v6/plumbing"
"github.com/klauspost/compress/zstd"
)
var ErrArchiveTooLarge = errors.New("archive too large")
const BlobReferencePrefix = "/git/blobs/"
func boundArchiveStream(reader io.Reader) io.Reader {
return ReadAtMost(reader, int64(config.Limits.MaxSiteSize.Bytes()),
fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR()))
@@ -42,9 +45,39 @@ func ExtractZstd(reader io.Reader, next func(io.Reader) (*Manifest, error)) (*Ma
return next(boundArchiveStream(stream))
}
func ExtractTar(reader io.Reader) (*Manifest, error) {
// Returns a map of git hash to entry. If `manifest` is nil, returns an empty map.
func indexManifestByGitHash(manifest *Manifest) map[string]*Entry {
index := map[string]*Entry{}
for _, entry := range manifest.GetContents() {
if hash := entry.GetGitHash(); hash != "" {
if _, ok := plumbing.FromHex(hash); ok {
index[hash] = entry
} else {
panic(fmt.Errorf("index: malformed hash: %s", hash))
}
}
}
return index
}
func addSymlinkOrBlobReference(
manifest *Manifest, fileName string, target string, index map[string]*Entry,
) {
if hash, found := strings.CutPrefix(target, BlobReferencePrefix); found {
if entry, found := index[hash]; found {
manifest.Contents[fileName] = entry
} else {
AddProblem(manifest, fileName, "unresolved reference: %s", target)
}
} else {
AddSymlink(manifest, fileName, target)
}
}
func ExtractTar(reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
archive := tar.NewReader(reader)
index := indexManifestByGitHash(oldManifest)
manifest := NewManifest()
for {
header, err := archive.Next()
@@ -73,7 +106,7 @@ func ExtractTar(reader io.Reader) (*Manifest, error) {
}
AddFile(manifest, fileName, fileData)
case tar.TypeSymlink:
AddSymlink(manifest, fileName, header.Linkname)
addSymlinkOrBlobReference(manifest, fileName, header.Linkname, index)
case tar.TypeDir:
AddDirectory(manifest, fileName)
default:
@@ -84,7 +117,7 @@ func ExtractTar(reader io.Reader) (*Manifest, error) {
return manifest, nil
}
func ExtractZip(reader io.Reader) (*Manifest, error) {
func ExtractZip(reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
data, err := io.ReadAll(reader)
if err != nil {
return nil, err
@@ -108,6 +141,7 @@ func ExtractZip(reader io.Reader) (*Manifest, error) {
)
}
index := indexManifestByGitHash(oldManifest)
manifest := NewManifest()
for _, file := range archive.File {
if strings.HasSuffix(file.Name, "/") {
@@ -125,7 +159,7 @@ func ExtractZip(reader io.Reader) (*Manifest, error) {
}
if file.Mode()&os.ModeSymlink != 0 {
AddSymlink(manifest, file.Name, string(fileData))
addSymlinkOrBlobReference(manifest, file.Name, string(fileData), index)
} else {
AddFile(manifest, file.Name, fileData)
}

View File

@@ -104,7 +104,7 @@ func NewManifestEntry(type_ Type, data []byte) *Entry {
return entry
}
func AddFile(manifest *Manifest, path string, data []byte) *Entry {
func AddFile(manifest *Manifest, fileName string, data []byte) *Entry {
// Fill in `git_hash` even for files not originating from git using the SHA256 algorithm;
// we use this primarily for incremental archive uploads, but when support for git SHA256
// repositories is complete, archive uploads and git checkouts will have cross-support for
@@ -113,30 +113,35 @@ func AddFile(manifest *Manifest, path string, data []byte) *Entry {
hasher.Write(data)
entry := NewManifestEntry(Type_InlineFile, data)
entry.GitHash = proto.String(hasher.Sum().String())
manifest.Contents[path] = entry
manifest.Contents[fileName] = entry
return entry
}
func AddSymlink(manifest *Manifest, path string, target string) *Entry {
entry := NewManifestEntry(Type_Symlink, []byte(target))
manifest.Contents[path] = entry
return entry
func AddSymlink(manifest *Manifest, fileName string, target string) *Entry {
if path.IsAbs(target) {
AddProblem(manifest, fileName, "absolute symlink: %s", target)
return nil
} else {
entry := NewManifestEntry(Type_Symlink, []byte(target))
manifest.Contents[fileName] = entry
return entry
}
}
func AddDirectory(manifest *Manifest, path string) *Entry {
path = strings.TrimSuffix(path, "/")
func AddDirectory(manifest *Manifest, dirName string) *Entry {
dirName = strings.TrimSuffix(dirName, "/")
entry := NewManifestEntry(Type_Directory, nil)
manifest.Contents[path] = entry
manifest.Contents[dirName] = entry
return entry
}
func AddProblem(manifest *Manifest, path, format string, args ...any) error {
func AddProblem(manifest *Manifest, pathName, format string, args ...any) error {
cause := fmt.Sprintf(format, args...)
manifest.Problems = append(manifest.Problems, &Problem{
Path: proto.String(path),
Path: proto.String(pathName),
Cause: proto.String(cause),
})
return fmt.Errorf("%s: %s", path, cause)
return fmt.Errorf("%s: %s", pathName, cause)
}
func GetProblemReport(manifest *Manifest) []string {

View File

@@ -122,23 +122,27 @@ func UpdateFromArchive(
) (result UpdateResult) {
var err error
// Ignore errors; here the old manifest is used only to determine the update outcome.
// Ignore errors; worst case we have to re-fetch all of the blobs.
oldManifest, _, _ := backend.GetManifest(ctx, webRoot, GetManifestOptions{})
extractTar := func(reader io.Reader) (*Manifest, error) {
return ExtractTar(reader, oldManifest)
}
var newManifest *Manifest
switch contentType {
case "application/x-tar":
logc.Printf(ctx, "update %s: (tar)", webRoot)
newManifest, err = ExtractTar(reader) // yellow?
newManifest, err = extractTar(reader) // yellow?
case "application/x-tar+gzip":
logc.Printf(ctx, "update %s: (tar.gz)", webRoot)
newManifest, err = ExtractGzip(reader, ExtractTar) // definitely yellow.
newManifest, err = ExtractGzip(reader, extractTar) // definitely yellow.
case "application/x-tar+zstd":
logc.Printf(ctx, "update %s: (tar.zst)", webRoot)
newManifest, err = ExtractZstd(reader, ExtractTar)
newManifest, err = ExtractZstd(reader, extractTar)
case "application/zip":
logc.Printf(ctx, "update %s: (zip)", webRoot)
newManifest, err = ExtractZip(reader)
newManifest, err = ExtractZip(reader, oldManifest)
default:
err = errArchiveFormat
}