mirror of
https://github.com/versity/versitygw.git
synced 2026-03-27 18:05:00 +00:00
Rewrite the posix Walk implementation to avoid the extra ReadDir per directory that was noted as a TODO in the old code. The new algorithm holds all traversal state in a walkState struct and uses processDir to interleave sibling entries in correct S3 lexicographic order without a second syscall. Key changes: prefix optimisation: jump directly into the deepest matching directory rather than scanning from the root on every call marker short-circuit: skip entire subtrees that are lexically before the marker, making paginated listing faster Co-authored-by: Ben McClelland <ben.mcclelland@versity.com>
534 lines
16 KiB
Go
534 lines
16 KiB
Go
// Copyright 2026 Versity Software
|
||
// This file is licensed under the Apache License, Version 2.0
|
||
// (the "License"); you may not use this file except in compliance
|
||
// with the License. You may obtain a copy of the License at
|
||
//
|
||
// http://www.apache.org/licenses/LICENSE-2.0
|
||
//
|
||
// Unless required by applicable law or agreed to in writing,
|
||
// software distributed under the License is distributed on an
|
||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||
// KIND, either express or implied. See the License for the
|
||
// specific language governing permissions and limitations
|
||
// under the License.
|
||
|
||
// # Walk benchmarks
|
||
//
|
||
// ## Quick start – run all small benchmarks
|
||
//
|
||
// go test -bench='^BenchmarkWalk[^LH]' -benchtime=6x -count=6 ./backend/
|
||
//
|
||
// ## Run by tier
|
||
//
|
||
// Small (5 k files):
|
||
//
|
||
// go test -bench='^BenchmarkWalk[^LH]' -benchtime=6x -count=6 ./backend/
|
||
//
|
||
// Large (100 k files):
|
||
//
|
||
// go test -bench='^BenchmarkWalkLarge' -benchtime=3x ./backend/
|
||
//
|
||
// Huge (500 k files):
|
||
//
|
||
// go test -bench='^BenchmarkWalkHuge' -benchtime=3x ./backend/
|
||
//
|
||
// Note: Large and Huge benchmarks create fixture files under /tmp on first run.
|
||
// The fixtures are reused on subsequent runs as long as
|
||
// /tmp/versitygw_walk_bench/large is present. Remove that directory to force a
|
||
// full rebuild:
|
||
//
|
||
// rm -rf /tmp/versitygw_walk_bench/large
|
||
//
|
||
// ## Comparing against main using benchstat
|
||
//
|
||
// Install benchstat if needed:
|
||
//
|
||
// go install golang.org/x/perf/cmd/benchstat@latest
|
||
//
|
||
// 1. Record results on the current branch:
|
||
//
|
||
// go test -bench='^BenchmarkWalk[^LH]' -benchtime=3x -count=6 ./backend/ > /tmp/new.txt
|
||
//
|
||
// 2. Create a worktree for main and record results there:
|
||
//
|
||
// git worktree add /tmp/versitygw_main main
|
||
// cp backend/walk_bench_test.go /tmp/versitygw_main/backend/
|
||
// (cd /tmp/versitygw_main && go test -bench='^BenchmarkWalk[^LH]' -benchtime=3x -count=6 ./backend/ > /tmp/old.txt)
|
||
// git worktree remove --force /tmp/versitygw_main
|
||
//
|
||
// 3. Compare:
|
||
//
|
||
// benchstat /tmp/old.txt /tmp/new.txt
|
||
//
|
||
// Use -count=6 (minimum) for statistically meaningful p-values.
|
||
// Use -benchtime=3x (at least 3 iterations per count) to reduce per-run noise.
|
||
// For Large/Huge tiers use -benchtime=3x -count=1 (each op already takes seconds).
|
||
package backend_test
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"io/fs"
|
||
"os"
|
||
"path/filepath"
|
||
"runtime"
|
||
"sync"
|
||
"testing"
|
||
"time"
|
||
|
||
"github.com/versity/versitygw/backend"
|
||
"github.com/versity/versitygw/s3response"
|
||
)
|
||
|
||
// benchRoot is the /tmp directory shared across all benchmarks in the run.
|
||
// It is created once by setupBenchDir and once (lazily) by setupLargeBenchDir.
|
||
const benchRoot = "/tmp/versitygw_walk_bench"
|
||
|
||
// Tier sizes:
|
||
//
|
||
// Small – 5000 files per tree
|
||
// Large – 100000 files per tree
|
||
// Huge – 500000 files flat only
|
||
var (
|
||
benchSetupOnce sync.Once
|
||
benchLargeSetupOnce sync.Once
|
||
)
|
||
|
||
// parallelMkfile creates a batch of file paths in parallel using a worker
|
||
// pool sized to GOMAXPROCS, so setup time scales with CPU count.
|
||
func parallelMkfile(b *testing.B, paths []string) {
|
||
b.Helper()
|
||
workers := runtime.GOMAXPROCS(0)
|
||
ch := make(chan string, workers*4)
|
||
var wg sync.WaitGroup
|
||
var mu sync.Mutex
|
||
var firstErr error
|
||
for range workers {
|
||
wg.Add(1)
|
||
go func() {
|
||
defer wg.Done()
|
||
for path := range ch {
|
||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||
mu.Lock()
|
||
if firstErr == nil {
|
||
firstErr = err
|
||
}
|
||
mu.Unlock()
|
||
continue
|
||
}
|
||
f, err := os.Create(path)
|
||
if err != nil {
|
||
mu.Lock()
|
||
if firstErr == nil {
|
||
firstErr = err
|
||
}
|
||
mu.Unlock()
|
||
continue
|
||
}
|
||
_ = f.Close()
|
||
}
|
||
}()
|
||
}
|
||
for _, p := range paths {
|
||
ch <- p
|
||
}
|
||
close(ch)
|
||
wg.Wait()
|
||
if firstErr != nil {
|
||
b.Fatalf("bench setup: %v", firstErr)
|
||
}
|
||
}
|
||
|
||
// Structure summary (files under benchRoot):
|
||
//
|
||
// small/flat/ – 5000 files directly in the directory
|
||
// small/deep/ – 100 top-level dirs × 50 files each = 5000 files (2 levels)
|
||
// small/wide/ – 200 top-level dirs, 5 subdirs × 5 files = 5000 files (3 levels)
|
||
// small/mixed/ – 10 prefix dirs × 100 files = 1000 files
|
||
//
|
||
// large/flat/ – 100000 files flat
|
||
// large/deep/ – 1000 dirs × 100 files = 100000 files
|
||
// large/wide/ – 500 dirs × 10 subdirs × 20 files = 100000 files
|
||
// large/mixed/ – 100 prefix dirs × 1 000 files = 100000 files
|
||
// large/flat_huge/ – 500000 files flat
|
||
func setupBenchDir(b *testing.B) {
|
||
b.Helper()
|
||
benchSetupOnce.Do(func() {
|
||
// Reuse fixtures from a previous run if they exist.
|
||
sentinel := filepath.Join(benchRoot, "small", "flat", "file04999.txt")
|
||
if _, err := os.Stat(sentinel); err == nil {
|
||
return
|
||
}
|
||
_ = os.RemoveAll(filepath.Join(benchRoot, "small"))
|
||
|
||
var paths []string
|
||
|
||
// small/flat/ – 5000 files
|
||
for i := range 5000 {
|
||
paths = append(paths, filepath.Join(benchRoot, "small", "flat", fmt.Sprintf("file%05d.txt", i)))
|
||
}
|
||
|
||
// small/deep/ – 100 dirs × 50 files
|
||
for i := range 100 {
|
||
for j := range 50 {
|
||
paths = append(paths, filepath.Join(benchRoot, "small", "deep",
|
||
fmt.Sprintf("dir%03d", i),
|
||
fmt.Sprintf("file%03d.txt", j)))
|
||
}
|
||
}
|
||
|
||
// small/wide/ – 200 dirs × 5 subdirs × 5 files
|
||
for i := range 200 {
|
||
for j := range 5 {
|
||
for k := range 5 {
|
||
paths = append(paths, filepath.Join(benchRoot, "small", "wide",
|
||
fmt.Sprintf("dir%03d", i),
|
||
fmt.Sprintf("sub%02d", j),
|
||
fmt.Sprintf("file%02d.txt", k)))
|
||
}
|
||
}
|
||
}
|
||
|
||
// small/mixed/ – 10 prefix dirs × 100 files
|
||
for i := range 10 {
|
||
for j := range 100 {
|
||
paths = append(paths, filepath.Join(benchRoot, "small", "mixed",
|
||
fmt.Sprintf("prefix%02d", i),
|
||
fmt.Sprintf("file%04d.txt", j)))
|
||
}
|
||
}
|
||
|
||
parallelMkfile(b, paths)
|
||
})
|
||
}
|
||
|
||
func setupLargeBenchDir(b *testing.B) {
|
||
b.Helper()
|
||
setupBenchDir(b) // ensure small/ exists too
|
||
benchLargeSetupOnce.Do(func() {
|
||
// Reuse fixtures from a previous run if they exist.
|
||
sentinel := filepath.Join(benchRoot, "large", "flat_huge", "file0499999.txt")
|
||
if _, err := os.Stat(sentinel); err == nil {
|
||
return
|
||
}
|
||
_ = os.RemoveAll(filepath.Join(benchRoot, "large"))
|
||
var paths []string
|
||
|
||
// large/flat/ – 100000 files flat
|
||
for i := range 100_000 {
|
||
paths = append(paths, filepath.Join(benchRoot, "large", "flat", fmt.Sprintf("file%06d.txt", i)))
|
||
}
|
||
|
||
// large/deep/ – 1000 dirs × 100 files = 100000 files
|
||
for i := range 1000 {
|
||
for j := range 100 {
|
||
paths = append(paths, filepath.Join(benchRoot, "large", "deep",
|
||
fmt.Sprintf("dir%04d", i),
|
||
fmt.Sprintf("file%03d.txt", j)))
|
||
}
|
||
}
|
||
|
||
// large/wide/ – 500 dirs × 10 subdirs × 20 files = 100000 files
|
||
for i := range 500 {
|
||
for j := range 10 {
|
||
for k := range 20 {
|
||
paths = append(paths, filepath.Join(benchRoot, "large", "wide",
|
||
fmt.Sprintf("dir%03d", i),
|
||
fmt.Sprintf("sub%02d", j),
|
||
fmt.Sprintf("file%02d.txt", k)))
|
||
}
|
||
}
|
||
}
|
||
|
||
// large/mixed/ – 100 prefix dirs × 1000 files = 100000 files
|
||
for i := range 100 {
|
||
for j := range 1000 {
|
||
paths = append(paths, filepath.Join(benchRoot, "large", "mixed",
|
||
fmt.Sprintf("prefix%03d", i),
|
||
fmt.Sprintf("file%05d.txt", j)))
|
||
}
|
||
}
|
||
|
||
// large/flat_huge/ – 500000 files flat
|
||
for i := range 500_000 {
|
||
paths = append(paths, filepath.Join(benchRoot, "large", "flat_huge", fmt.Sprintf("file%07d.txt", i)))
|
||
}
|
||
|
||
b.Log("creating large bench fixtures (this runs once per /tmp clean)...")
|
||
parallelMkfile(b, paths)
|
||
})
|
||
}
|
||
|
||
// benchGetObj is a lightweight GetObjFunc that mimics what the POSIX backend
|
||
// does: stat the entry and populate the minimum Object fields, returning
|
||
// ErrSkipObj for bare directories (no stored etag = no explicit S3 PUT).
|
||
func benchGetObj(path string, d fs.DirEntry) (s3response.Object, error) {
|
||
if d.IsDir() {
|
||
return s3response.Object{}, backend.ErrSkipObj
|
||
}
|
||
fi, err := d.Info()
|
||
if err != nil {
|
||
return s3response.Object{}, err
|
||
}
|
||
sz := fi.Size()
|
||
mt := fi.ModTime()
|
||
etag := path // cheap stand-in; avoids md5 cost skewing the walk measurement
|
||
return s3response.Object{
|
||
Key: &path,
|
||
Size: &sz,
|
||
LastModified: &mt,
|
||
ETag: &etag,
|
||
}, nil
|
||
}
|
||
|
||
// runWalk is a convenience wrapper so each benchmark loop is one line.
|
||
func runWalk(b *testing.B, fsys fs.FS, prefix, delim, marker string, max int32) {
|
||
b.Helper()
|
||
_, err := backend.Walk(context.Background(), fsys, prefix, delim, marker, max, benchGetObj, nil)
|
||
if err != nil {
|
||
b.Fatalf("Walk: %v", err)
|
||
}
|
||
}
|
||
|
||
// ── Small (5 k files) ─────────────────────────────────────────────────────────
|
||
|
||
// BenchmarkWalkFlat lists all 5000 files in a single flat directory.
|
||
func BenchmarkWalkFlat(b *testing.B) {
|
||
setupBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "", "", 10000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkFlatDelim lists with a "/" delimiter (all files collapse into
|
||
// the root; tests the common-prefix fast-path for a flat bucket).
|
||
func BenchmarkWalkFlatDelim(b *testing.B) {
|
||
setupBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "/", "", 10000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkFlatPaged benchmarks paginated listing (1000 objects per page,
|
||
// walking through 5 pages of the flat directory).
|
||
func BenchmarkWalkFlatPaged(b *testing.B) {
|
||
setupBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
|
||
for b.Loop() {
|
||
marker := ""
|
||
for {
|
||
res, err := backend.Walk(context.Background(),
|
||
fsys, "", "", marker, 1000, benchGetObj, nil)
|
||
if err != nil {
|
||
b.Fatalf("Walk: %v", err)
|
||
}
|
||
if !res.Truncated {
|
||
break
|
||
}
|
||
marker = res.NextMarker
|
||
}
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkDeep lists all files recursively through 2 levels of dirs.
|
||
func BenchmarkWalkDeep(b *testing.B) {
|
||
setupBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small", "deep"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "", "", 10000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkDeepDelim lists with "/" delimiter, returning 100 common
|
||
// prefixes instead of descending into every directory.
|
||
func BenchmarkWalkDeepDelim(b *testing.B) {
|
||
setupBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small", "deep"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "/", "", 10000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkWide lists all 5000 files across a wide tree (3 levels).
|
||
func BenchmarkWalkWide(b *testing.B) {
|
||
setupBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small", "wide"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "", "", 10000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkWideDelim collapses the wide tree to the top-level dirs.
|
||
func BenchmarkWalkWideDelim(b *testing.B) {
|
||
setupBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small", "wide"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "/", "", 10000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkWithPrefix exercises the prefix-optimisation.
|
||
func BenchmarkWalkWithPrefix(b *testing.B) {
|
||
setupBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "mixed/prefix05/", "", "", 1000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkWithPrefixDelim lists with a prefix and delimiter.
|
||
func BenchmarkWalkWithPrefixDelim(b *testing.B) {
|
||
setupBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "mixed/", "/", "", 1000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkWithMarker benchmarks resuming a listing mid-way through the
|
||
// mixed/ prefix (simulates the second page of a paginated request).
|
||
func BenchmarkWalkWithMarker(b *testing.B) {
|
||
setupBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small"))
|
||
marker := "mixed/prefix05/file0050.txt"
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "mixed/", "", marker, 1000)
|
||
}
|
||
}
|
||
|
||
// ── Large (100 k files) ──────────────────────────────────────────
|
||
// Run with: go test -bench=WalkLarge -benchtime=10s ./backend/
|
||
|
||
// BenchmarkWalkLargeFlat lists 100000 files in a flat directory.
|
||
func BenchmarkWalkLargeFlat(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "", "", 200000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkLargeFlatDelim collapses 100000 flat files under a "/" delimiter.
|
||
func BenchmarkWalkLargeFlatDelim(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "/", "", 200000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkLargeFlatPaged pages through 100000 flat files, 1000 per page.
|
||
func BenchmarkWalkLargeFlatPaged(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat"))
|
||
for b.Loop() {
|
||
marker := ""
|
||
for {
|
||
res, err := backend.Walk(context.Background(),
|
||
fsys, "", "", marker, 1000, benchGetObj, nil)
|
||
if err != nil {
|
||
b.Fatalf("Walk: %v", err)
|
||
}
|
||
if !res.Truncated {
|
||
break
|
||
}
|
||
marker = res.NextMarker
|
||
}
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkLargeDeep fully recurses through 1000 dirs × 100 files.
|
||
func BenchmarkWalkLargeDeep(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large", "deep"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "", "", 200000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkLargeDeepDelim lists 1000 common-prefixes without recursing.
|
||
func BenchmarkWalkLargeDeepDelim(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large", "deep"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "/", "", 200000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkLargeWide fully recurses a 3-level wide tree (100000 files).
|
||
func BenchmarkWalkLargeWide(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large", "wide"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "", "", 200000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkLargeWideDelim collapses the wide tree to its 500 top-level dirs.
|
||
func BenchmarkWalkLargeWideDelim(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large", "wide"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "/", "", 200000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkLargeWithPrefix jumps directly into a subdirectory via prefix.
|
||
func BenchmarkWalkLargeWithPrefix(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "mixed/prefix050/", "", "", 5000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkLargeWithMarker resumes a listing mid-way through a 100000-file tree.
|
||
func BenchmarkWalkLargeWithMarker(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large"))
|
||
marker := "mixed/prefix050/file00500.txt"
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "mixed/", "", marker, 5000)
|
||
}
|
||
}
|
||
|
||
// ── Huge (500 k files) ────────────────────────────────────────────
|
||
// Run with: go test -bench=WalkHuge -benchtime=3x ./backend/
|
||
// (use -benchtime=Nx to limit iterations given each op is >1 s)
|
||
|
||
// BenchmarkWalkHugeFlat fully lists 500000 files in a single directory.
|
||
func BenchmarkWalkHugeFlat(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat_huge"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "", "", 1_000_000)
|
||
}
|
||
}
|
||
|
||
// BenchmarkWalkHugeFlatMarker resumes a listing mid-way through 500000 flat
|
||
// files returning the next 1000.
|
||
func BenchmarkWalkHugeFlatMarker(b *testing.B) {
|
||
setupLargeBenchDir(b)
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "large", "flat_huge"))
|
||
// Marker sits at roughly the 50% point of the flat listing.
|
||
marker := fmt.Sprintf("file%07d.txt", 250_000)
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "", marker, 1000)
|
||
}
|
||
}
|
||
|
||
// ── Timing sanity check ───────────────────────────────────────────────────────
|
||
|
||
// BenchmarkWalkSetupTime measures just the one-time setup cost.
|
||
func BenchmarkWalkSetupTime(b *testing.B) {
|
||
start := time.Now()
|
||
setupBenchDir(b)
|
||
b.ReportMetric(float64(time.Since(start).Milliseconds()), "setup_ms")
|
||
fsys := os.DirFS(filepath.Join(benchRoot, "small", "flat"))
|
||
for b.Loop() {
|
||
runWalk(b, fsys, "", "", "", 1)
|
||
}
|
||
}
|