versitygw/backend/walk_bench_test.go

// Copyright 2026 Versity Software
// This file is licensed under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

// # Walk benchmarks
//
// ## Quick start – run all small benchmarks
//
//	go test -bench='^BenchmarkWalk[^LH]' -benchtime=6x -count=6 ./backend/
//
// ## Run by tier
//
// Small  (5 k files):
//
//	go test -bench='^BenchmarkWalk[^LH]' -benchtime=6x -count=6 ./backend/
//
// Large (100 k files):
//
//	go test -bench='^BenchmarkWalkLarge' -benchtime=3x ./backend/
//
// Huge (500 k files):
//
//	go test -bench='^BenchmarkWalkHuge' -benchtime=3x ./backend/
//
// Note: Large and Huge benchmarks create fixture files under /tmp on first run.
// The fixtures are reused on subsequent runs as long as
// /tmp/versitygw_walk_bench/large is present. Remove that directory to force a
// full rebuild:
//
//	rm -rf /tmp/versitygw_walk_bench/large
//
// ## Comparing against main using benchstat
//
// Install benchstat if needed:
//
//	go install golang.org/x/perf/cmd/benchstat@latest
//
// 1. Record results on the current branch:
//
//	go test -bench='^BenchmarkWalk[^LH]' -benchtime=3x -count=6 ./backend/ > /tmp/new.txt
//
// 2. Create a worktree for main and record results there:
//
//	git worktree add /tmp/versitygw_main main
//	cp backend/walk_bench_test.go /tmp/versitygw_main/backend/
//	(cd /tmp/versitygw_main && go test -bench='^BenchmarkWalk[^LH]' -benchtime=3x -count=6 ./backend/ > /tmp/old.txt)
//	git worktree remove --force /tmp/versitygw_main
//
// 3. Compare:
//
//	benchstat /tmp/old.txt /tmp/new.txt
//
// Use -count=6 (minimum) for statistically meaningful p-values.
// Use -benchtime=3x (at least 3 iterations per count) to reduce per-run noise.
// For Large/Huge tiers use -benchtime=3x -count=1 (each op already takes seconds).
package backend_test

import (
	"context"
	"fmt"
	"io/fs"
	"os"
	"path/filepath"
	"runtime"
	"sync"
	"testing"
	"time"

	"github.com/versity/versitygw/backend"
	"github.com/versity/versitygw/s3response"
)

// benchRoot is the /tmp directory shared across all benchmarks in the run.
// It is created once by setupBenchDir and once (lazily) by setupLargeBenchDir.
const benchRoot = "/tmp/versitygw_walk_bench"

// Tier sizes:
//
//	Small   –   5000 files per tree
//	Large   – 100000 files per tree
//	Huge    – 500000 files flat only
var (
	benchSetupOnce      sync.Once
	benchLargeSetupOnce sync.Once
)

// parallelMkfile creates a batch of file paths in parallel using a worker
// pool sized to GOMAXPROCS, so setup time scales with CPU count.
func parallelMkfile(b *testing.B, paths []string) {
	b.Helper()
	workers := runtime.GOMAXPROCS(0)
	ch := make(chan string, workers*4)
	var wg sync.WaitGroup
	var mu sync.Mutex
	var firstErr error
	for range workers {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for path := range ch {
				if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
					mu.Lock()
					if firstErr == nil {
						firstErr = err
					}
					mu.Unlock()
					continue
				}
				f, err := os.Create(path)
				if err != nil {
					mu.Lock()
					if firstErr == nil {
						firstErr = err
					}
					mu.Unlock()
					continue
				}
				_ = f.Close()
			}
		}()
	}
	for _, p := range paths {
		ch <- p
	}
	close(ch)
	wg.Wait()
	if firstErr != nil {
		b.Fatalf("bench setup: %v", firstErr)
	}
}

// Structure summary (files under benchRoot):
//
//	small/flat/       – 5000 files directly in the directory
//	small/deep/       – 100 top-level dirs × 50 files each = 5000 files (2 levels)
//	small/wide/       – 200 top-level dirs, 5 subdirs × 5 files = 5000 files (3 levels)
//	small/mixed/      – 10 prefix dirs × 100 files = 1000 files
//
//	large/flat/       – 100000 files flat
//	large/deep/       – 1000 dirs × 100 files = 100000 files
//	large/wide/       – 500 dirs × 10 subdirs × 20 files = 100000 files
//	large/mixed/      – 100 prefix dirs × 1 000 files = 100000 files
//	large/flat_huge/  – 500000 files flat
func setupBenchDir(b *testing.B) {
	b.Helper()
	benchSetupOnce.Do(func() {
		// Reuse fixtures from a previous run if they exist.
		sentinel := filepath.Join(benchRoot, "small", "flat", "file04999.txt")
		if _, err := os.Stat(sentinel); err == nil {
			return
		}
		_ = os.RemoveAll(filepath.Join(benchRoot, "small"))

		var paths []string

		// small/flat/ – 5000 files
		for i := range 5000 {
			paths = append(paths, filepath.Join(benchRoot, "small", "flat", fmt.Sprintf("file%05d.txt", i)))
		}

		// small/deep/ – 100 dirs × 50 files
		for i := range 100 {
			for j := range 50 {
				paths = append(paths, filepath.Join(benchRoot, "small", "deep",
					fmt.Sprintf("dir%03d", i),
					fmt.Sprintf("file%03d.txt", j)))
			}
		}

		// small/wide/ – 200 dirs × 5 subdirs × 5 files
		for i := range 200 {
			for j := range 5 {
				for k := range 5 {
					paths = append(paths, filepath.Join(benchRoot, "small", "wide",
						fmt.Sprintf("dir%03d", i),
						fmt.Sprintf("sub%02d", j),
						fmt.Sprintf("file%02d.txt", k)))
				}
			}
		}

		// small/mixed/ – 10 prefix dirs × 100 files
		for i := range 10 {
			for j := range 100 {
				paths = append(paths, filepath.Join(benchRoot, "small", "mixed",
					fmt.Sprintf("prefix%02d", i),
					fmt.Sprintf("file%04d.txt", j)))
			}
		}

		parallelMkfile(b, paths)
	})
}

func setupLargeBenchDir(b *testing.B) {
	b.Helper()
	setupBenchDir(b) // ensure small/ exists too
	benchLargeSetupOnce.Do(func() {
		// Reuse fixtures from a previous run if they exist.
		sentinel := filepath.Join(benchRoot, "large", "flat_huge", "file0499999.txt")
		if _, err := os.Stat(sentinel); err == nil {
			return
		}
		_ = os.RemoveAll(filepath.Join(benchRoot, "large"))
		var paths []string

		// large/flat/ – 100000 files flat
		for i := range 100_000 {
			paths = append(paths, filepath.Join(benchRoot, "large", "flat", fmt.Sprintf("file%06d.txt", i)))
		}

		// large/deep/ – 1000 dirs × 100 files = 100000 files
		for i := range 1000 {
			for j := range 100 {
				paths = append(paths, filepath.Join(benchRoot, "large", "deep",
					fmt.Sprintf("dir%04d", i),
					fmt.Sprintf("file%03d.txt", j)))
			}
		}

		// large/wide/ – 500 dirs × 10 subdirs × 20 files = 100000 files
		for i := range 500 {
			for j := range 10 {
				for k := range 20 {
					paths = append(paths, filepath.Join(benchRoot, "large", "wide",
						fmt.Sprintf("dir%03d", i),
						fmt.Sprintf("sub%02d", j),
						fmt.Sprintf("file%02d.txt", k)))
				}
			}
		}

		// large/mixed/ – 100 prefix dirs × 1000 files = 100000 files
		for i := range 100 {
			for j := range 1000 {
				paths = append(paths, filepath.Join(benchRoot, "large", "mixed",
					fmt.Sprintf("prefix%03d", i),
					fmt.Sprintf("file%05d.txt", j)))
			}
		}

		// large/flat_huge/ – 500000 files flat
		for i := range 500_000 {
			paths = append(paths, filepath.Join(benchRoot, "large", "flat_huge", fmt.Sprintf("file%07d.txt", i)))
		}

		b.Log("creating large bench fixtures (this runs once per /tmp clean)...")
		parallelMkfile(b, paths)
	})
}

// benchGetObj is a lightweight GetObjFunc that mimics what the POSIX backend
// does: stat the entry and populate the minimum Object fields, returning
// ErrSkipObj for bare directories (no stored etag = no explicit S3 PUT).
func benchGetObj(path string, d fs.DirEntry) (s3response.Object, error) {
	if d.IsDir() {
		return s3response.Object{}, backend.ErrSkipObj
	}
	fi, err := d.Info()
	if err != nil {
		return s3response.Object{}, err
	}
	sz := fi.Size()
	mt := fi.ModTime()
	etag := path // cheap stand-in; avoids md5 cost skewing the walk measurement
	return s3response.Object{
		Key:          &path,
		Size:         &sz,
		LastModified: &mt,
		ETag:         &etag,
	}, nil
}

// runWalk is a convenience wrapper so each benchmark loop is one line.
func runWalk(b *testing.B, fsys fs.FS, prefix, delim, marker string, max int32) {
	b.Helper()
	_, err := backend.Walk(context.Background(), fsys, prefix, delim, marker, max, benchGetObj, nil)
	if err != nil {
		b.Fatalf("Walk: %v", err)
	}
}

// ── Small (5 k files) ─────────────────────────────────────────────────────────

// BenchmarkWalkFlat lists all 5000 files in a single flat directory.
func BenchmarkWalkFlat(b *testing.B) {
	setupBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
	for b.Loop() {
		runWalk(b, fsys, "", "", "", 10000)
	}
}

// BenchmarkWalkFlatDelim lists with a "/" delimiter (all files collapse into
// the root; tests the common-prefix fast-path for a flat bucket).
func BenchmarkWalkFlatDelim(b *testing.B) {
	setupBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
	for b.Loop() {
		runWalk(b, fsys, "", "/", "", 10000)
	}
}

// BenchmarkWalkFlatPaged benchmarks paginated listing (1000 objects per page,
// walking through 5 pages of the flat directory).
func BenchmarkWalkFlatPaged(b *testing.B) {
	setupBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
	for b.Loop() {
		marker := ""
		for {
			res, err := backend.Walk(context.Background(),
				fsys, "", "", marker, 1000, benchGetObj, nil)
			if err != nil {
				b.Fatalf("Walk: %v", err)
			}
			if !res.Truncated {
				break
			}
			marker = res.NextMarker
		}
	}
}

// BenchmarkWalkDeep lists all files recursively through 2 levels of dirs.
func BenchmarkWalkDeep(b *testing.B) {
	setupBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "small", "deep"))
	for b.Loop() {
		runWalk(b, fsys, "", "", "", 10000)
	}
}

// BenchmarkWalkDeepDelim lists with "/" delimiter, returning 100 common
// prefixes instead of descending into every directory.
func BenchmarkWalkDeepDelim(b *testing.B) {
	setupBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "small", "deep"))
	for b.Loop() {
		runWalk(b, fsys, "", "/", "", 10000)
	}
}

// BenchmarkWalkWide lists all 5000 files across a wide tree (3 levels).
func BenchmarkWalkWide(b *testing.B) {
	setupBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "small", "wide"))
	for b.Loop() {
		runWalk(b, fsys, "", "", "", 10000)
	}
}

// BenchmarkWalkWideDelim collapses the wide tree to the top-level dirs.
func BenchmarkWalkWideDelim(b *testing.B) {
	setupBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "small", "wide"))
	for b.Loop() {
		runWalk(b, fsys, "", "/", "", 10000)
	}
}

// BenchmarkWalkWithPrefix exercises the prefix-optimisation.
func BenchmarkWalkWithPrefix(b *testing.B) {
	setupBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "small"))
	for b.Loop() {
		runWalk(b, fsys, "mixed/prefix05/", "", "", 1000)
	}
}

// BenchmarkWalkWithPrefixDelim lists with a prefix and delimiter.
func BenchmarkWalkWithPrefixDelim(b *testing.B) {
	setupBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "small"))
	for b.Loop() {
		runWalk(b, fsys, "mixed/", "/", "", 1000)
	}
}

// BenchmarkWalkWithMarker benchmarks resuming a listing mid-way through the
// mixed/ prefix (simulates the second page of a paginated request).
func BenchmarkWalkWithMarker(b *testing.B) {
	setupBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "small"))
	marker := "mixed/prefix05/file0050.txt"
	for b.Loop() {
		runWalk(b, fsys, "mixed/", "", marker, 1000)
	}
}

// ── Large (100 k files) ──────────────────────────────────────────
// Run with: go test -bench=WalkLarge -benchtime=10s ./backend/

// BenchmarkWalkLargeFlat lists 100000 files in a flat directory.
func BenchmarkWalkLargeFlat(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat"))
	for b.Loop() {
		runWalk(b, fsys, "", "", "", 200000)
	}
}

// BenchmarkWalkLargeFlatDelim collapses 100000 flat files under a "/" delimiter.
func BenchmarkWalkLargeFlatDelim(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat"))
	for b.Loop() {
		runWalk(b, fsys, "", "/", "", 200000)
	}
}

// BenchmarkWalkLargeFlatPaged pages through 100000 flat files, 1000 per page.
func BenchmarkWalkLargeFlatPaged(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat"))
	for b.Loop() {
		marker := ""
		for {
			res, err := backend.Walk(context.Background(),
				fsys, "", "", marker, 1000, benchGetObj, nil)
			if err != nil {
				b.Fatalf("Walk: %v", err)
			}
			if !res.Truncated {
				break
			}
			marker = res.NextMarker
		}
	}
}

// BenchmarkWalkLargeDeep fully recurses through 1000 dirs × 100 files.
func BenchmarkWalkLargeDeep(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large", "deep"))
	for b.Loop() {
		runWalk(b, fsys, "", "", "", 200000)
	}
}

// BenchmarkWalkLargeDeepDelim lists 1000 common-prefixes without recursing.
func BenchmarkWalkLargeDeepDelim(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large", "deep"))
	for b.Loop() {
		runWalk(b, fsys, "", "/", "", 200000)
	}
}

// BenchmarkWalkLargeWide fully recurses a 3-level wide tree (100000 files).
func BenchmarkWalkLargeWide(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large", "wide"))
	for b.Loop() {
		runWalk(b, fsys, "", "", "", 200000)
	}
}

// BenchmarkWalkLargeWideDelim collapses the wide tree to its 500 top-level dirs.
func BenchmarkWalkLargeWideDelim(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large", "wide"))
	for b.Loop() {
		runWalk(b, fsys, "", "/", "", 200000)
	}
}

// BenchmarkWalkLargeWithPrefix jumps directly into a subdirectory via prefix.
func BenchmarkWalkLargeWithPrefix(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large"))
	for b.Loop() {
		runWalk(b, fsys, "mixed/prefix050/", "", "", 5000)
	}
}

// BenchmarkWalkLargeWithMarker resumes a listing mid-way through a 100000-file tree.
func BenchmarkWalkLargeWithMarker(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large"))
	marker := "mixed/prefix050/file00500.txt"
	for b.Loop() {
		runWalk(b, fsys, "mixed/", "", marker, 5000)
	}
}

// ── Huge (500 k files) ────────────────────────────────────────────
// Run with: go test -bench=WalkHuge -benchtime=3x ./backend/
// (use -benchtime=Nx to limit iterations given each op is >1 s)

// BenchmarkWalkHugeFlat fully lists 500000 files in a single directory.
func BenchmarkWalkHugeFlat(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat_huge"))
	for b.Loop() {
		runWalk(b, fsys, "", "", "", 1_000_000)
	}
}

// BenchmarkWalkHugeFlatMarker resumes a listing mid-way through 500000 flat
// files returning the next 1000.
func BenchmarkWalkHugeFlatMarker(b *testing.B) {
	setupLargeBenchDir(b)
	fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat_huge"))
	// Marker sits at roughly the 50% point of the flat listing.
	marker := fmt.Sprintf("file%07d.txt", 250_000)
	for b.Loop() {
		runWalk(b, fsys, "", "", marker, 1000)
	}
}

// ── Timing sanity check ───────────────────────────────────────────────────────

// BenchmarkWalkSetupTime measures just the one-time setup cost.
func BenchmarkWalkSetupTime(b *testing.B) {
	start := time.Now()
	setupBenchDir(b)
	b.ReportMetric(float64(time.Since(start).Milliseconds()), "setup_ms")
	fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
	for b.Loop() {
		runWalk(b, fsys, "", "", "", 1)
	}
}