Replace s3GetObjectErrorsCount metric with *ResponseCount.

The former metric was misnamed: it only counted NoSuchKey errors.
Also, it was applied *after* the cache, meaning it was just a count
of every request that got a successful 404 from the S3 backend.
Also, it pooled blob and manifest requests together.

The new metric is 1-to-1 correspondent to S3 requests and distinguishes
between different kinds of errors. Also, it distinguishes kinds of
requests. Example output:

    git_pages_s3_get_object_responses_count{code="NoSuchKey",kind="manifest"} 1
    git_pages_s3_get_object_responses_count{code="OK",kind="blob"} 1
    git_pages_s3_get_object_responses_count{code="OK",kind="manifest"} 1
This commit is contained in:
Catherine
2025-11-28 23:56:20 +00:00
parent f9669e1c69
commit 0b82dcbc25

View File

@@ -35,7 +35,7 @@ var (
manifestCacheEvictionsCount prometheus.Counter
s3GetObjectDurationSeconds *prometheus.HistogramVec
s3GetObjectErrorsCount *prometheus.CounterVec
s3GetObjectResponseCount *prometheus.CounterVec
)
func initS3BackendMetrics() {
@@ -95,10 +95,10 @@ func initS3BackendMetrics() {
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 10 * time.Minute,
}, []string{"kind"})
s3GetObjectErrorsCount = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "git_pages_s3_get_object_errors_count",
Help: "Count of s3:GetObject errors",
}, []string{"object_kind"})
s3GetObjectResponseCount = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "git_pages_s3_get_object_responses_count",
Help: "Count of s3:GetObject responses",
}, []string{"kind", "code"})
}
// Blobs can be safely cached indefinitely. They only need to be evicted to preserve memory.
@@ -296,11 +296,20 @@ func (s3 *S3Backend) GetBlob(
return &CachedBlob{data, stat.LastModified}, nil
}
observer := func(ctx context.Context, name string) (*CachedBlob, error) {
cached, err := loader(ctx, name)
var code = "OK"
if resp, ok := err.(minio.ErrorResponse); ok {
code = resp.Code
}
s3GetObjectResponseCount.With(prometheus.Labels{"kind": "blob", "code": code}).Inc()
return cached, err
}
var cached *CachedBlob
cached, err = s3.blobCache.Get(ctx, name, otter.LoaderFunc[string, *CachedBlob](loader))
cached, err = s3.blobCache.Get(ctx, name, otter.LoaderFunc[string, *CachedBlob](observer))
if err != nil {
if errResp := minio.ToErrorResponse(err); errResp.Code == "NoSuchKey" {
s3GetObjectErrorsCount.With(prometheus.Labels{"object_kind": "blob"}).Inc()
err = fmt.Errorf("%w: %s", ErrObjectNotFound, errResp.Key)
}
} else {
@@ -427,8 +436,18 @@ func (l s3ManifestLoader) load(ctx context.Context, name string, oldManifest *Ca
return &CachedManifest{manifest, uint32(len(data)), stat.LastModified, stat.ETag, nil}, nil
}
observer := func() (*CachedManifest, error) {
cached, err := loader()
var code = "OK"
if resp, ok := err.(minio.ErrorResponse); ok {
code = resp.Code
}
s3GetObjectResponseCount.With(prometheus.Labels{"kind": "manifest", "code": code}).Inc()
return cached, err
}
startTime := time.Now()
cached, err := loader()
cached, err := observer()
s3GetObjectDurationSeconds.
With(prometheus.Labels{"kind": "manifest"}).
Observe(time.Since(startTime).Seconds())
@@ -436,7 +455,6 @@ func (l s3ManifestLoader) load(ctx context.Context, name string, oldManifest *Ca
if err != nil {
errResp := minio.ToErrorResponse(err)
if errResp.Code == "NoSuchKey" {
s3GetObjectErrorsCount.With(prometheus.Labels{"object_kind": "manifest"}).Inc()
err = fmt.Errorf("%w: %s", ErrObjectNotFound, errResp.Key)
return &CachedManifest{nil, 1, time.Time{}, "", err}, nil
} else if errResp.StatusCode == http.StatusNotModified && oldManifest != nil {