Files
versitygw/backend/walk_bench_test.go
tonyipm 33917ad6f3 feat: implement quicker backend/posix walk algorithm
Rewrite the posix Walk implementation to avoid the extra ReadDir per
directory that was noted as a TODO in the old code.  The new algorithm
holds all traversal state in a walkState struct and uses processDir to
interleave sibling entries in correct S3 lexicographic order without a
second syscall.

Key changes:
prefix optimisation: jump directly into the deepest matching directory
rather than scanning from the root on every call

marker short-circuit: skip entire subtrees that are lexically before
the marker, making paginated listing faster

Co-authored-by: Ben McClelland <ben.mcclelland@versity.com>
2026-03-18 17:39:16 -07:00

534 lines
16 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright 2026 Versity Software
// This file is licensed under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// # Walk benchmarks
//
// ## Quick start run all small benchmarks
//
// go test -bench='^BenchmarkWalk[^LH]' -benchtime=6x -count=6 ./backend/
//
// ## Run by tier
//
// Small (5 k files):
//
// go test -bench='^BenchmarkWalk[^LH]' -benchtime=6x -count=6 ./backend/
//
// Large (100 k files):
//
// go test -bench='^BenchmarkWalkLarge' -benchtime=3x ./backend/
//
// Huge (500 k files):
//
// go test -bench='^BenchmarkWalkHuge' -benchtime=3x ./backend/
//
// Note: Large and Huge benchmarks create fixture files under /tmp on first run.
// The fixtures are reused on subsequent runs as long as
// /tmp/versitygw_walk_bench/large is present. Remove that directory to force a
// full rebuild:
//
// rm -rf /tmp/versitygw_walk_bench/large
//
// ## Comparing against main using benchstat
//
// Install benchstat if needed:
//
// go install golang.org/x/perf/cmd/benchstat@latest
//
// 1. Record results on the current branch:
//
// go test -bench='^BenchmarkWalk[^LH]' -benchtime=3x -count=6 ./backend/ > /tmp/new.txt
//
// 2. Create a worktree for main and record results there:
//
// git worktree add /tmp/versitygw_main main
// cp backend/walk_bench_test.go /tmp/versitygw_main/backend/
// (cd /tmp/versitygw_main && go test -bench='^BenchmarkWalk[^LH]' -benchtime=3x -count=6 ./backend/ > /tmp/old.txt)
// git worktree remove --force /tmp/versitygw_main
//
// 3. Compare:
//
// benchstat /tmp/old.txt /tmp/new.txt
//
// Use -count=6 (minimum) for statistically meaningful p-values.
// Use -benchtime=3x (at least 3 iterations per count) to reduce per-run noise.
// For Large/Huge tiers use -benchtime=3x -count=1 (each op already takes seconds).
package backend_test
import (
"context"
"fmt"
"io/fs"
"os"
"path/filepath"
"runtime"
"sync"
"testing"
"time"
"github.com/versity/versitygw/backend"
"github.com/versity/versitygw/s3response"
)
// benchRoot is the /tmp directory shared across all benchmarks in the run.
// It is created once by setupBenchDir and once (lazily) by setupLargeBenchDir.
const benchRoot = "/tmp/versitygw_walk_bench"
// Tier sizes:
//
// Small 5000 files per tree
// Large 100000 files per tree
// Huge 500000 files flat only
var (
benchSetupOnce sync.Once
benchLargeSetupOnce sync.Once
)
// parallelMkfile creates a batch of file paths in parallel using a worker
// pool sized to GOMAXPROCS, so setup time scales with CPU count.
func parallelMkfile(b *testing.B, paths []string) {
b.Helper()
workers := runtime.GOMAXPROCS(0)
ch := make(chan string, workers*4)
var wg sync.WaitGroup
var mu sync.Mutex
var firstErr error
for range workers {
wg.Add(1)
go func() {
defer wg.Done()
for path := range ch {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
mu.Lock()
if firstErr == nil {
firstErr = err
}
mu.Unlock()
continue
}
f, err := os.Create(path)
if err != nil {
mu.Lock()
if firstErr == nil {
firstErr = err
}
mu.Unlock()
continue
}
_ = f.Close()
}
}()
}
for _, p := range paths {
ch <- p
}
close(ch)
wg.Wait()
if firstErr != nil {
b.Fatalf("bench setup: %v", firstErr)
}
}
// Structure summary (files under benchRoot):
//
// small/flat/ 5000 files directly in the directory
// small/deep/ 100 top-level dirs × 50 files each = 5000 files (2 levels)
// small/wide/ 200 top-level dirs, 5 subdirs × 5 files = 5000 files (3 levels)
// small/mixed/ 10 prefix dirs × 100 files = 1000 files
//
// large/flat/ 100000 files flat
// large/deep/ 1000 dirs × 100 files = 100000 files
// large/wide/ 500 dirs × 10 subdirs × 20 files = 100000 files
// large/mixed/ 100 prefix dirs × 1 000 files = 100000 files
// large/flat_huge/ 500000 files flat
func setupBenchDir(b *testing.B) {
b.Helper()
benchSetupOnce.Do(func() {
// Reuse fixtures from a previous run if they exist.
sentinel := filepath.Join(benchRoot, "small", "flat", "file04999.txt")
if _, err := os.Stat(sentinel); err == nil {
return
}
_ = os.RemoveAll(filepath.Join(benchRoot, "small"))
var paths []string
// small/flat/ 5000 files
for i := range 5000 {
paths = append(paths, filepath.Join(benchRoot, "small", "flat", fmt.Sprintf("file%05d.txt", i)))
}
// small/deep/ 100 dirs × 50 files
for i := range 100 {
for j := range 50 {
paths = append(paths, filepath.Join(benchRoot, "small", "deep",
fmt.Sprintf("dir%03d", i),
fmt.Sprintf("file%03d.txt", j)))
}
}
// small/wide/ 200 dirs × 5 subdirs × 5 files
for i := range 200 {
for j := range 5 {
for k := range 5 {
paths = append(paths, filepath.Join(benchRoot, "small", "wide",
fmt.Sprintf("dir%03d", i),
fmt.Sprintf("sub%02d", j),
fmt.Sprintf("file%02d.txt", k)))
}
}
}
// small/mixed/ 10 prefix dirs × 100 files
for i := range 10 {
for j := range 100 {
paths = append(paths, filepath.Join(benchRoot, "small", "mixed",
fmt.Sprintf("prefix%02d", i),
fmt.Sprintf("file%04d.txt", j)))
}
}
parallelMkfile(b, paths)
})
}
func setupLargeBenchDir(b *testing.B) {
b.Helper()
setupBenchDir(b) // ensure small/ exists too
benchLargeSetupOnce.Do(func() {
// Reuse fixtures from a previous run if they exist.
sentinel := filepath.Join(benchRoot, "large", "flat_huge", "file0499999.txt")
if _, err := os.Stat(sentinel); err == nil {
return
}
_ = os.RemoveAll(filepath.Join(benchRoot, "large"))
var paths []string
// large/flat/ 100000 files flat
for i := range 100_000 {
paths = append(paths, filepath.Join(benchRoot, "large", "flat", fmt.Sprintf("file%06d.txt", i)))
}
// large/deep/ 1000 dirs × 100 files = 100000 files
for i := range 1000 {
for j := range 100 {
paths = append(paths, filepath.Join(benchRoot, "large", "deep",
fmt.Sprintf("dir%04d", i),
fmt.Sprintf("file%03d.txt", j)))
}
}
// large/wide/ 500 dirs × 10 subdirs × 20 files = 100000 files
for i := range 500 {
for j := range 10 {
for k := range 20 {
paths = append(paths, filepath.Join(benchRoot, "large", "wide",
fmt.Sprintf("dir%03d", i),
fmt.Sprintf("sub%02d", j),
fmt.Sprintf("file%02d.txt", k)))
}
}
}
// large/mixed/ 100 prefix dirs × 1000 files = 100000 files
for i := range 100 {
for j := range 1000 {
paths = append(paths, filepath.Join(benchRoot, "large", "mixed",
fmt.Sprintf("prefix%03d", i),
fmt.Sprintf("file%05d.txt", j)))
}
}
// large/flat_huge/ 500000 files flat
for i := range 500_000 {
paths = append(paths, filepath.Join(benchRoot, "large", "flat_huge", fmt.Sprintf("file%07d.txt", i)))
}
b.Log("creating large bench fixtures (this runs once per /tmp clean)...")
parallelMkfile(b, paths)
})
}
// benchGetObj is a lightweight GetObjFunc that mimics what the POSIX backend
// does: stat the entry and populate the minimum Object fields, returning
// ErrSkipObj for bare directories (no stored etag = no explicit S3 PUT).
func benchGetObj(path string, d fs.DirEntry) (s3response.Object, error) {
if d.IsDir() {
return s3response.Object{}, backend.ErrSkipObj
}
fi, err := d.Info()
if err != nil {
return s3response.Object{}, err
}
sz := fi.Size()
mt := fi.ModTime()
etag := path // cheap stand-in; avoids md5 cost skewing the walk measurement
return s3response.Object{
Key: &path,
Size: &sz,
LastModified: &mt,
ETag: &etag,
}, nil
}
// runWalk is a convenience wrapper so each benchmark loop is one line.
func runWalk(b *testing.B, fsys fs.FS, prefix, delim, marker string, max int32) {
b.Helper()
_, err := backend.Walk(context.Background(), fsys, prefix, delim, marker, max, benchGetObj, nil)
if err != nil {
b.Fatalf("Walk: %v", err)
}
}
// ── Small (5 k files) ─────────────────────────────────────────────────────────
// BenchmarkWalkFlat lists all 5000 files in a single flat directory.
func BenchmarkWalkFlat(b *testing.B) {
setupBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
for b.Loop() {
runWalk(b, fsys, "", "", "", 10000)
}
}
// BenchmarkWalkFlatDelim lists with a "/" delimiter (all files collapse into
// the root; tests the common-prefix fast-path for a flat bucket).
func BenchmarkWalkFlatDelim(b *testing.B) {
setupBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
for b.Loop() {
runWalk(b, fsys, "", "/", "", 10000)
}
}
// BenchmarkWalkFlatPaged benchmarks paginated listing (1000 objects per page,
// walking through 5 pages of the flat directory).
func BenchmarkWalkFlatPaged(b *testing.B) {
setupBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
for b.Loop() {
marker := ""
for {
res, err := backend.Walk(context.Background(),
fsys, "", "", marker, 1000, benchGetObj, nil)
if err != nil {
b.Fatalf("Walk: %v", err)
}
if !res.Truncated {
break
}
marker = res.NextMarker
}
}
}
// BenchmarkWalkDeep lists all files recursively through 2 levels of dirs.
func BenchmarkWalkDeep(b *testing.B) {
setupBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "small", "deep"))
for b.Loop() {
runWalk(b, fsys, "", "", "", 10000)
}
}
// BenchmarkWalkDeepDelim lists with "/" delimiter, returning 100 common
// prefixes instead of descending into every directory.
func BenchmarkWalkDeepDelim(b *testing.B) {
setupBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "small", "deep"))
for b.Loop() {
runWalk(b, fsys, "", "/", "", 10000)
}
}
// BenchmarkWalkWide lists all 5000 files across a wide tree (3 levels).
func BenchmarkWalkWide(b *testing.B) {
setupBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "small", "wide"))
for b.Loop() {
runWalk(b, fsys, "", "", "", 10000)
}
}
// BenchmarkWalkWideDelim collapses the wide tree to the top-level dirs.
func BenchmarkWalkWideDelim(b *testing.B) {
setupBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "small", "wide"))
for b.Loop() {
runWalk(b, fsys, "", "/", "", 10000)
}
}
// BenchmarkWalkWithPrefix exercises the prefix-optimisation.
func BenchmarkWalkWithPrefix(b *testing.B) {
setupBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "small"))
for b.Loop() {
runWalk(b, fsys, "mixed/prefix05/", "", "", 1000)
}
}
// BenchmarkWalkWithPrefixDelim lists with a prefix and delimiter.
func BenchmarkWalkWithPrefixDelim(b *testing.B) {
setupBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "small"))
for b.Loop() {
runWalk(b, fsys, "mixed/", "/", "", 1000)
}
}
// BenchmarkWalkWithMarker benchmarks resuming a listing mid-way through the
// mixed/ prefix (simulates the second page of a paginated request).
func BenchmarkWalkWithMarker(b *testing.B) {
setupBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "small"))
marker := "mixed/prefix05/file0050.txt"
for b.Loop() {
runWalk(b, fsys, "mixed/", "", marker, 1000)
}
}
// ── Large (100 k files) ──────────────────────────────────────────
// Run with: go test -bench=WalkLarge -benchtime=10s ./backend/
// BenchmarkWalkLargeFlat lists 100000 files in a flat directory.
func BenchmarkWalkLargeFlat(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat"))
for b.Loop() {
runWalk(b, fsys, "", "", "", 200000)
}
}
// BenchmarkWalkLargeFlatDelim collapses 100000 flat files under a "/" delimiter.
func BenchmarkWalkLargeFlatDelim(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat"))
for b.Loop() {
runWalk(b, fsys, "", "/", "", 200000)
}
}
// BenchmarkWalkLargeFlatPaged pages through 100000 flat files, 1000 per page.
func BenchmarkWalkLargeFlatPaged(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat"))
for b.Loop() {
marker := ""
for {
res, err := backend.Walk(context.Background(),
fsys, "", "", marker, 1000, benchGetObj, nil)
if err != nil {
b.Fatalf("Walk: %v", err)
}
if !res.Truncated {
break
}
marker = res.NextMarker
}
}
}
// BenchmarkWalkLargeDeep fully recurses through 1000 dirs × 100 files.
func BenchmarkWalkLargeDeep(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large", "deep"))
for b.Loop() {
runWalk(b, fsys, "", "", "", 200000)
}
}
// BenchmarkWalkLargeDeepDelim lists 1000 common-prefixes without recursing.
func BenchmarkWalkLargeDeepDelim(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large", "deep"))
for b.Loop() {
runWalk(b, fsys, "", "/", "", 200000)
}
}
// BenchmarkWalkLargeWide fully recurses a 3-level wide tree (100000 files).
func BenchmarkWalkLargeWide(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large", "wide"))
for b.Loop() {
runWalk(b, fsys, "", "", "", 200000)
}
}
// BenchmarkWalkLargeWideDelim collapses the wide tree to its 500 top-level dirs.
func BenchmarkWalkLargeWideDelim(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large", "wide"))
for b.Loop() {
runWalk(b, fsys, "", "/", "", 200000)
}
}
// BenchmarkWalkLargeWithPrefix jumps directly into a subdirectory via prefix.
func BenchmarkWalkLargeWithPrefix(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large"))
for b.Loop() {
runWalk(b, fsys, "mixed/prefix050/", "", "", 5000)
}
}
// BenchmarkWalkLargeWithMarker resumes a listing mid-way through a 100000-file tree.
func BenchmarkWalkLargeWithMarker(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large"))
marker := "mixed/prefix050/file00500.txt"
for b.Loop() {
runWalk(b, fsys, "mixed/", "", marker, 5000)
}
}
// ── Huge (500 k files) ────────────────────────────────────────────
// Run with: go test -bench=WalkHuge -benchtime=3x ./backend/
// (use -benchtime=Nx to limit iterations given each op is >1 s)
// BenchmarkWalkHugeFlat fully lists 500000 files in a single directory.
func BenchmarkWalkHugeFlat(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat_huge"))
for b.Loop() {
runWalk(b, fsys, "", "", "", 1_000_000)
}
}
// BenchmarkWalkHugeFlatMarker resumes a listing mid-way through 500000 flat
// files returning the next 1000.
func BenchmarkWalkHugeFlatMarker(b *testing.B) {
setupLargeBenchDir(b)
fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat_huge"))
// Marker sits at roughly the 50% point of the flat listing.
marker := fmt.Sprintf("file%07d.txt", 250_000)
for b.Loop() {
runWalk(b, fsys, "", "", marker, 1000)
}
}
// ── Timing sanity check ───────────────────────────────────────────────────────
// BenchmarkWalkSetupTime measures just the one-time setup cost.
func BenchmarkWalkSetupTime(b *testing.B) {
start := time.Now()
setupBenchDir(b)
b.ReportMetric(float64(time.Since(start).Milliseconds()), "setup_ms")
fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
for b.Loop() {
runWalk(b, fsys, "", "", "", 1)
}
}