mirror of
https://codeberg.org/git-pages/git-pages.git
synced 2026-05-14 11:11:35 +00:00
Collect statistics on blob reuse during archive upload.
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -25,24 +26,30 @@ func boundArchiveStream(reader io.Reader) io.Reader {
|
||||
fmt.Errorf("%w: %s limit exceeded", ErrArchiveTooLarge, config.Limits.MaxSiteSize.HR()))
|
||||
}
|
||||
|
||||
func ExtractGzip(reader io.Reader, next func(io.Reader) (*Manifest, error)) (*Manifest, error) {
|
||||
func ExtractGzip(
|
||||
ctx context.Context, reader io.Reader,
|
||||
next func(context.Context, io.Reader) (*Manifest, error),
|
||||
) (*Manifest, error) {
|
||||
stream, err := gzip.NewReader(reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer stream.Close()
|
||||
|
||||
return next(boundArchiveStream(stream))
|
||||
return next(ctx, boundArchiveStream(stream))
|
||||
}
|
||||
|
||||
func ExtractZstd(reader io.Reader, next func(io.Reader) (*Manifest, error)) (*Manifest, error) {
|
||||
func ExtractZstd(
|
||||
ctx context.Context, reader io.Reader,
|
||||
next func(context.Context, io.Reader) (*Manifest, error),
|
||||
) (*Manifest, error) {
|
||||
stream, err := zstd.NewReader(reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer stream.Close()
|
||||
|
||||
return next(boundArchiveStream(stream))
|
||||
return next(ctx, boundArchiveStream(stream))
|
||||
}
|
||||
|
||||
// Returns a map of git hash to entry. If `manifest` is nil, returns an empty map.
|
||||
@@ -62,21 +69,26 @@ func indexManifestByGitHash(manifest *Manifest) map[string]*Entry {
|
||||
|
||||
func addSymlinkOrBlobReference(
|
||||
manifest *Manifest, fileName string, target string, index map[string]*Entry,
|
||||
) {
|
||||
) *Entry {
|
||||
if hash, found := strings.CutPrefix(target, BlobReferencePrefix); found {
|
||||
if entry, found := index[hash]; found {
|
||||
manifest.Contents[fileName] = entry
|
||||
return entry
|
||||
} else {
|
||||
AddProblem(manifest, fileName, "unresolved reference: %s", target)
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
AddSymlink(manifest, fileName, target)
|
||||
return AddSymlink(manifest, fileName, target)
|
||||
}
|
||||
}
|
||||
|
||||
func ExtractTar(reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
|
||||
func ExtractTar(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
|
||||
archive := tar.NewReader(reader)
|
||||
|
||||
var dataBytesRecycled int64
|
||||
var dataBytesTransferred int64
|
||||
|
||||
index := indexManifestByGitHash(oldManifest)
|
||||
manifest := NewManifest()
|
||||
for {
|
||||
@@ -105,8 +117,10 @@ func ExtractTar(reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
|
||||
return nil, fmt.Errorf("tar: %s: %w", fileName, err)
|
||||
}
|
||||
AddFile(manifest, fileName, fileData)
|
||||
dataBytesTransferred += int64(len(fileData))
|
||||
case tar.TypeSymlink:
|
||||
addSymlinkOrBlobReference(manifest, fileName, header.Linkname, index)
|
||||
entry := addSymlinkOrBlobReference(manifest, fileName, header.Linkname, index)
|
||||
dataBytesRecycled += entry.GetOriginalSize()
|
||||
case tar.TypeDir:
|
||||
AddDirectory(manifest, fileName)
|
||||
default:
|
||||
@@ -114,10 +128,17 @@ func ExtractTar(reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
logc.Printf(ctx,
|
||||
"reuse: %s recycled, %s transferred\n",
|
||||
datasize.ByteSize(dataBytesRecycled).HR(),
|
||||
datasize.ByteSize(dataBytesTransferred).HR(),
|
||||
)
|
||||
|
||||
return manifest, nil
|
||||
}
|
||||
|
||||
func ExtractZip(reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
|
||||
func ExtractZip(ctx context.Context, reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
|
||||
data, err := io.ReadAll(reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -141,6 +162,9 @@ func ExtractZip(reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
|
||||
)
|
||||
}
|
||||
|
||||
var dataBytesRecycled int64
|
||||
var dataBytesTransferred int64
|
||||
|
||||
index := indexManifestByGitHash(oldManifest)
|
||||
manifest := NewManifest()
|
||||
for _, file := range archive.File {
|
||||
@@ -159,11 +183,20 @@ func ExtractZip(reader io.Reader, oldManifest *Manifest) (*Manifest, error) {
|
||||
}
|
||||
|
||||
if file.Mode()&os.ModeSymlink != 0 {
|
||||
addSymlinkOrBlobReference(manifest, file.Name, string(fileData), index)
|
||||
entry := addSymlinkOrBlobReference(manifest, file.Name, string(fileData), index)
|
||||
dataBytesRecycled += entry.GetOriginalSize()
|
||||
} else {
|
||||
AddFile(manifest, file.Name, fileData)
|
||||
dataBytesTransferred += int64(len(fileData))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logc.Printf(ctx,
|
||||
"reuse: %s recycled, %s transferred\n",
|
||||
datasize.ByteSize(dataBytesRecycled).HR(),
|
||||
datasize.ByteSize(dataBytesTransferred).HR(),
|
||||
)
|
||||
|
||||
return manifest, nil
|
||||
}
|
||||
|
||||
17
src/fetch.go
17
src/fetch.go
@@ -131,9 +131,8 @@ func FetchRepository(
|
||||
}
|
||||
|
||||
// Collect checkout statistics.
|
||||
var dataBytesFromOldManifest int64
|
||||
var dataBytesFromGitCheckout int64
|
||||
var dataBytesFromGitTransport int64
|
||||
var dataBytesRecycled int64
|
||||
var dataBytesTransferred int64
|
||||
|
||||
// First, see if we can extract the blobs from the old manifest. This is the preferred option
|
||||
// because it avoids both network transfers and recompression. Note that we do not request
|
||||
@@ -143,7 +142,7 @@ func FetchRepository(
|
||||
if manifestEntry, found := blobsNeeded[hash]; found {
|
||||
manifestEntry.Reset()
|
||||
proto.Merge(manifestEntry, oldManifestEntry)
|
||||
dataBytesFromOldManifest += oldManifestEntry.GetOriginalSize()
|
||||
dataBytesRecycled += oldManifestEntry.GetOriginalSize()
|
||||
delete(blobsNeeded, hash)
|
||||
}
|
||||
}
|
||||
@@ -154,7 +153,7 @@ func FetchRepository(
|
||||
// clone despite asking for a partial clone.
|
||||
for hash, manifestEntry := range blobsNeeded {
|
||||
if err := readGitBlob(repo, hash, manifestEntry); err == nil {
|
||||
dataBytesFromGitCheckout += manifestEntry.GetOriginalSize()
|
||||
dataBytesTransferred += manifestEntry.GetOriginalSize()
|
||||
delete(blobsNeeded, hash)
|
||||
}
|
||||
}
|
||||
@@ -197,15 +196,15 @@ func FetchRepository(
|
||||
if err := readGitBlob(repo, hash, manifestEntry); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dataBytesFromGitTransport += manifestEntry.GetOriginalSize()
|
||||
dataBytesTransferred += manifestEntry.GetOriginalSize()
|
||||
delete(blobsNeeded, hash)
|
||||
}
|
||||
}
|
||||
|
||||
logc.Printf(ctx,
|
||||
"fetch: %s reused, %s received\n",
|
||||
datasize.ByteSize(dataBytesFromOldManifest).HR(),
|
||||
datasize.ByteSize(dataBytesFromGitCheckout+dataBytesFromGitTransport).HR(),
|
||||
"reuse: %s recycled, %s transferred\n",
|
||||
datasize.ByteSize(dataBytesRecycled).HR(),
|
||||
datasize.ByteSize(dataBytesTransferred).HR(),
|
||||
)
|
||||
|
||||
return manifest, nil
|
||||
|
||||
@@ -125,24 +125,24 @@ func UpdateFromArchive(
|
||||
// Ignore errors; worst case we have to re-fetch all of the blobs.
|
||||
oldManifest, _, _ := backend.GetManifest(ctx, webRoot, GetManifestOptions{})
|
||||
|
||||
extractTar := func(reader io.Reader) (*Manifest, error) {
|
||||
return ExtractTar(reader, oldManifest)
|
||||
extractTar := func(ctx context.Context, reader io.Reader) (*Manifest, error) {
|
||||
return ExtractTar(ctx, reader, oldManifest)
|
||||
}
|
||||
|
||||
var newManifest *Manifest
|
||||
switch contentType {
|
||||
case "application/x-tar":
|
||||
logc.Printf(ctx, "update %s: (tar)", webRoot)
|
||||
newManifest, err = extractTar(reader) // yellow?
|
||||
newManifest, err = extractTar(ctx, reader) // yellow?
|
||||
case "application/x-tar+gzip":
|
||||
logc.Printf(ctx, "update %s: (tar.gz)", webRoot)
|
||||
newManifest, err = ExtractGzip(reader, extractTar) // definitely yellow.
|
||||
newManifest, err = ExtractGzip(ctx, reader, extractTar) // definitely yellow.
|
||||
case "application/x-tar+zstd":
|
||||
logc.Printf(ctx, "update %s: (tar.zst)", webRoot)
|
||||
newManifest, err = ExtractZstd(reader, extractTar)
|
||||
newManifest, err = ExtractZstd(ctx, reader, extractTar)
|
||||
case "application/zip":
|
||||
logc.Printf(ctx, "update %s: (zip)", webRoot)
|
||||
newManifest, err = ExtractZip(reader, oldManifest)
|
||||
newManifest, err = ExtractZip(ctx, reader, oldManifest)
|
||||
default:
|
||||
err = errArchiveFormat
|
||||
}
|
||||
@@ -177,7 +177,7 @@ func PartialUpdateFromArchive(
|
||||
return UpdateResult{UpdateError, nil, err}
|
||||
}
|
||||
|
||||
applyTarPatch := func(reader io.Reader) (*Manifest, error) {
|
||||
applyTarPatch := func(ctx context.Context, reader io.Reader) (*Manifest, error) {
|
||||
// Clone the manifest before starting to mutate it. `GetManifest` may return cached
|
||||
// `*Manifest` objects, which should never be mutated.
|
||||
newManifest := &Manifest{}
|
||||
@@ -193,13 +193,13 @@ func PartialUpdateFromArchive(
|
||||
switch contentType {
|
||||
case "application/x-tar":
|
||||
logc.Printf(ctx, "patch %s: (tar)", webRoot)
|
||||
newManifest, err = applyTarPatch(reader)
|
||||
newManifest, err = applyTarPatch(ctx, reader)
|
||||
case "application/x-tar+gzip":
|
||||
logc.Printf(ctx, "patch %s: (tar.gz)", webRoot)
|
||||
newManifest, err = ExtractGzip(reader, applyTarPatch)
|
||||
newManifest, err = ExtractGzip(ctx, reader, applyTarPatch)
|
||||
case "application/x-tar+zstd":
|
||||
logc.Printf(ctx, "patch %s: (tar.zst)", webRoot)
|
||||
newManifest, err = ExtractZstd(reader, applyTarPatch)
|
||||
newManifest, err = ExtractZstd(ctx, reader, applyTarPatch)
|
||||
default:
|
||||
err = errArchiveFormat
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user