diff --git a/src/extract.go b/src/extract.go index e8d39cd..094f5bd 100644 --- a/src/extract.go +++ b/src/extract.go @@ -5,6 +5,7 @@ import ( "archive/zip" "bytes" "compress/gzip" + "context" "errors" "fmt" "io" @@ -25,24 +26,30 @@ func boundArchiveStream(reader io.Reader) io.Reader { fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR())) } -func ExtractGzip(reader io.Reader, next func(io.Reader) (*Manifest, error)) (*Manifest, error) { +func ExtractGzip( + ctx context.Context, reader io.Reader, + next func(context.Context, io.Reader) (*Manifest, error), +) (*Manifest, error) { stream, err := gzip.NewReader(reader) if err != nil { return nil, err } defer stream.Close() - return next(boundArchiveStream(stream)) + return next(ctx, boundArchiveStream(stream)) } -func ExtractZstd(reader io.Reader, next func(io.Reader) (*Manifest, error)) (*Manifest, error) { +func ExtractZstd( + ctx context.Context, reader io.Reader, + next func(context.Context, io.Reader) (*Manifest, error), +) (*Manifest, error) { stream, err := zstd.NewReader(reader) if err != nil { return nil, err } defer stream.Close() - return next(boundArchiveStream(stream)) + return next(ctx, boundArchiveStream(stream)) } // Returns a map of git hash to entry. If `manifest` is nil, returns an empty map. @@ -62,21 +69,26 @@ func indexManifestByGitHash(manifest *Manifest) map[string]*Entry { func addSymlinkOrBlobReference( manifest *Manifest, fileName string, target string, index map[string]*Entry, -) { +) *Entry { if hash, found := strings.CutPrefix(target, BlobReferencePrefix); found { if entry, found := index[hash]; found { manifest.Contents[fileName] = entry + return entry } else { AddProblem(manifest, fileName, "unresolved reference: %s", target) + return nil } } else { - AddSymlink(manifest, fileName, target) + return AddSymlink(manifest, fileName, target) } } -func ExtractTar(reader io.Reader, oldManifest *Manifest) (*Manifest, error) { +func ExtractTar(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) { archive := tar.NewReader(reader) + var dataBytesRecycled int64 + var dataBytesTransferred int64 + index := indexManifestByGitHash(oldManifest) manifest := NewManifest() for { @@ -105,8 +117,10 @@ func ExtractTar(reader io.Reader, oldManifest *Manifest) (*Manifest, error) { return nil, fmt.Errorf("tar: %s: %w", fileName, err) } AddFile(manifest, fileName, fileData) + dataBytesTransferred += int64(len(fileData)) case tar.TypeSymlink: - addSymlinkOrBlobReference(manifest, fileName, header.Linkname, index) + entry := addSymlinkOrBlobReference(manifest, fileName, header.Linkname, index) + dataBytesRecycled += entry.GetOriginalSize() case tar.TypeDir: AddDirectory(manifest, fileName) default: @@ -114,10 +128,17 @@ func ExtractTar(reader io.Reader, oldManifest *Manifest) (*Manifest, error) { continue } } + + logc.Printf(ctx, + "reuse: %s recycled, %s transferred\n", + datasize.ByteSize(dataBytesRecycled).HR(), + datasize.ByteSize(dataBytesTransferred).HR(), + ) + return manifest, nil } -func ExtractZip(reader io.Reader, oldManifest *Manifest) (*Manifest, error) { +func ExtractZip(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) { data, err := io.ReadAll(reader) if err != nil { return nil, err @@ -141,6 +162,9 @@ func ExtractZip(reader io.Reader, oldManifest *Manifest) (*Manifest, error) { ) } + var dataBytesRecycled int64 + var dataBytesTransferred int64 + index := indexManifestByGitHash(oldManifest) manifest := NewManifest() for _, file := range archive.File { @@ -159,11 +183,20 @@ func ExtractZip(reader io.Reader, oldManifest *Manifest) (*Manifest, error) { } if file.Mode()&os.ModeSymlink != 0 { - addSymlinkOrBlobReference(manifest, file.Name, string(fileData), index) + entry := addSymlinkOrBlobReference(manifest, file.Name, string(fileData), index) + dataBytesRecycled += entry.GetOriginalSize() } else { AddFile(manifest, file.Name, fileData) + dataBytesTransferred += int64(len(fileData)) } } } + + logc.Printf(ctx, + "reuse: %s recycled, %s transferred\n", + datasize.ByteSize(dataBytesRecycled).HR(), + datasize.ByteSize(dataBytesTransferred).HR(), + ) + return manifest, nil } diff --git a/src/fetch.go b/src/fetch.go index 30a7af8..be89920 100644 --- a/src/fetch.go +++ b/src/fetch.go @@ -131,9 +131,8 @@ func FetchRepository( } // Collect checkout statistics. - var dataBytesFromOldManifest int64 - var dataBytesFromGitCheckout int64 - var dataBytesFromGitTransport int64 + var dataBytesRecycled int64 + var dataBytesTransferred int64 // First, see if we can extract the blobs from the old manifest. This is the preferred option // because it avoids both network transfers and recompression. Note that we do not request @@ -143,7 +142,7 @@ func FetchRepository( if manifestEntry, found := blobsNeeded[hash]; found { manifestEntry.Reset() proto.Merge(manifestEntry, oldManifestEntry) - dataBytesFromOldManifest += oldManifestEntry.GetOriginalSize() + dataBytesRecycled += oldManifestEntry.GetOriginalSize() delete(blobsNeeded, hash) } } @@ -154,7 +153,7 @@ func FetchRepository( // clone despite asking for a partial clone. for hash, manifestEntry := range blobsNeeded { if err := readGitBlob(repo, hash, manifestEntry); err == nil { - dataBytesFromGitCheckout += manifestEntry.GetOriginalSize() + dataBytesTransferred += manifestEntry.GetOriginalSize() delete(blobsNeeded, hash) } } @@ -197,15 +196,15 @@ func FetchRepository( if err := readGitBlob(repo, hash, manifestEntry); err != nil { return nil, err } - dataBytesFromGitTransport += manifestEntry.GetOriginalSize() + dataBytesTransferred += manifestEntry.GetOriginalSize() delete(blobsNeeded, hash) } } logc.Printf(ctx, - "fetch: %s reused, %s received\n", - datasize.ByteSize(dataBytesFromOldManifest).HR(), - datasize.ByteSize(dataBytesFromGitCheckout+dataBytesFromGitTransport).HR(), + "reuse: %s recycled, %s transferred\n", + datasize.ByteSize(dataBytesRecycled).HR(), + datasize.ByteSize(dataBytesTransferred).HR(), ) return manifest, nil diff --git a/src/update.go b/src/update.go index 8d90a2b..8020d9d 100644 --- a/src/update.go +++ b/src/update.go @@ -125,24 +125,24 @@ func UpdateFromArchive( // Ignore errors; worst case we have to re-fetch all of the blobs. oldManifest, _, _ := backend.GetManifest(ctx, webRoot, GetManifestOptions{}) - extractTar := func(reader io.Reader) (*Manifest, error) { - return ExtractTar(reader, oldManifest) + extractTar := func(ctx context.Context, reader io.Reader) (*Manifest, error) { + return ExtractTar(ctx, reader, oldManifest) } var newManifest *Manifest switch contentType { case "application/x-tar": logc.Printf(ctx, "update %s: (tar)", webRoot) - newManifest, err = extractTar(reader) // yellow? + newManifest, err = extractTar(ctx, reader) // yellow? case "application/x-tar+gzip": logc.Printf(ctx, "update %s: (tar.gz)", webRoot) - newManifest, err = ExtractGzip(reader, extractTar) // definitely yellow. + newManifest, err = ExtractGzip(ctx, reader, extractTar) // definitely yellow. case "application/x-tar+zstd": logc.Printf(ctx, "update %s: (tar.zst)", webRoot) - newManifest, err = ExtractZstd(reader, extractTar) + newManifest, err = ExtractZstd(ctx, reader, extractTar) case "application/zip": logc.Printf(ctx, "update %s: (zip)", webRoot) - newManifest, err = ExtractZip(reader, oldManifest) + newManifest, err = ExtractZip(ctx, reader, oldManifest) default: err = errArchiveFormat } @@ -177,7 +177,7 @@ func PartialUpdateFromArchive( return UpdateResult{UpdateError, nil, err} } - applyTarPatch := func(reader io.Reader) (*Manifest, error) { + applyTarPatch := func(ctx context.Context, reader io.Reader) (*Manifest, error) { // Clone the manifest before starting to mutate it. `GetManifest` may return cached // `*Manifest` objects, which should never be mutated. newManifest := &Manifest{} @@ -193,13 +193,13 @@ func PartialUpdateFromArchive( switch contentType { case "application/x-tar": logc.Printf(ctx, "patch %s: (tar)", webRoot) - newManifest, err = applyTarPatch(reader) + newManifest, err = applyTarPatch(ctx, reader) case "application/x-tar+gzip": logc.Printf(ctx, "patch %s: (tar.gz)", webRoot) - newManifest, err = ExtractGzip(reader, applyTarPatch) + newManifest, err = ExtractGzip(ctx, reader, applyTarPatch) case "application/x-tar+zstd": logc.Printf(ctx, "patch %s: (tar.zst)", webRoot) - newManifest, err = ExtractZstd(reader, applyTarPatch) + newManifest, err = ExtractZstd(ctx, reader, applyTarPatch) default: err = errArchiveFormat }