From faa8c3963bd5acff67b63154ed4e56d89ea1bfbb Mon Sep 17 00:00:00 2001 From: AlexALei Date: Tue, 23 Jun 2026 16:49:35 +0800 Subject: [PATCH] fix(chunk_cache): close data/index files on initialization error (#10057) * fix(chunk_cache): close data/index files on initialization error * chunk_cache: assign outer err on the .dat open path The error-path defer keys off the function-level err, but the .dat OpenFile used := and shadowed it, so that path relied on nothing being open yet rather than the cleanup invariant. Assign the outer err so every error return is uniform. * chunk_cache: verify descriptor closure on POSIX, not just Windows os.Remove succeeds on open files on Linux/macOS, so the removal check only proved closure on Windows. Compare the open-fd count before and after the failed load; gate the removal check to Windows. --------- Co-authored-by: Contributor Co-authored-by: Chris Lu --- weed/util/chunk_cache/chunk_cache_on_disk.go | 23 +++++-- .../chunk_cache/chunk_cache_on_disk_test.go | 68 +++++++++++++++++++ 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/weed/util/chunk_cache/chunk_cache_on_disk.go b/weed/util/chunk_cache/chunk_cache_on_disk.go index 1f9254711..0b947239d 100644 --- a/weed/util/chunk_cache/chunk_cache_on_disk.go +++ b/weed/util/chunk_cache/chunk_cache_on_disk.go @@ -37,6 +37,18 @@ func LoadOrCreateChunkCacheVolume(fileName string, preallocate int64) (*ChunkCac } var err error + var indexFile *os.File + + defer func() { + if err != nil { + if v.DataBackend != nil { + v.DataBackend.Close() + } + if indexFile != nil { + indexFile.Close() + } + } + }() if exists, canRead, canWrite, modTime, fileSize := util.CheckFile(v.fileName + ".dat"); exists { if !canRead { @@ -45,13 +57,13 @@ func LoadOrCreateChunkCacheVolume(fileName string, preallocate int64) (*ChunkCac if !canWrite { return nil, fmt.Errorf("cannot write cache file %s.dat", v.fileName) } - if dataFile, err := os.OpenFile(v.fileName+".dat", os.O_RDWR|os.O_CREATE, 0644); err != nil { + var dataFile *os.File + if dataFile, err = os.OpenFile(v.fileName+".dat", os.O_RDWR|os.O_CREATE, 0644); err != nil { return nil, fmt.Errorf("cannot create cache file %s.dat: %v", v.fileName, err) - } else { - v.DataBackend = backend.NewDiskFile(dataFile) - v.lastModTime = modTime - v.fileSize = fileSize } + v.DataBackend = backend.NewDiskFile(dataFile) + v.lastModTime = modTime + v.fileSize = fileSize } else { if v.DataBackend, err = backend.CreateVolumeFile(v.fileName+".dat", preallocate, 0); err != nil { return nil, fmt.Errorf("cannot create cache file %s.dat: %v", v.fileName, err) @@ -59,7 +71,6 @@ func LoadOrCreateChunkCacheVolume(fileName string, preallocate int64) (*ChunkCac v.lastModTime = time.Now() } - var indexFile *os.File if indexFile, err = os.OpenFile(v.fileName+".idx", os.O_RDWR|os.O_CREATE, 0644); err != nil { return nil, fmt.Errorf("cannot write cache index %s.idx: %v", v.fileName, err) } diff --git a/weed/util/chunk_cache/chunk_cache_on_disk_test.go b/weed/util/chunk_cache/chunk_cache_on_disk_test.go index 04e6bc669..531f01089 100644 --- a/weed/util/chunk_cache/chunk_cache_on_disk_test.go +++ b/weed/util/chunk_cache/chunk_cache_on_disk_test.go @@ -4,6 +4,9 @@ import ( "bytes" "fmt" "math/rand" + "os" + "path/filepath" + "runtime" "testing" "github.com/seaweedfs/seaweedfs/weed/util/mem" @@ -131,3 +134,68 @@ func TestOnDisk(t *testing.T) { cache.Shutdown() } + +func TestLoadOrCreateChunkCacheVolumeClosesFilesOnError(t *testing.T) { + tmpDir := t.TempDir() + fileName := filepath.Join(tmpDir, "cache") + + // Pre-create .dat so LoadOrCreateChunkCacheVolume opens it. + if err := os.WriteFile(fileName+".dat", []byte("data"), 0644); err != nil { + t.Fatal(err) + } + + // Create .ldb as a regular file so NewLevelDbNeedleMap fails. + // leveldb.OpenFile expects a directory. + if err := os.WriteFile(fileName+".ldb", []byte("not a directory"), 0644); err != nil { + t.Fatal(err) + } + + before := openFDCount() + + _, err := LoadOrCreateChunkCacheVolume(fileName, 1024) + if err == nil { + t.Fatal("expected error") + } + + // On POSIX a leaked descriptor keeps the count elevated; the error path + // should close both files, returning the count to its baseline. + if before >= 0 { + if after := openFDCount(); after > before { + t.Errorf("descriptors leaked on error: before=%d after=%d", before, after) + } + } + + // On Windows open files cannot be removed, so successful removal also + // confirms the descriptors were closed. + if runtime.GOOS == "windows" { + for _, ext := range []string{".dat", ".idx"} { + path := fileName + ext + if _, statErr := os.Stat(path); os.IsNotExist(statErr) { + continue + } + if rmErr := os.Remove(path); rmErr != nil { + t.Errorf("failed to remove %s: %v", path, rmErr) + } + } + } +} + +// openFDCount returns the number of open file descriptors for the current +// process, or -1 where it cannot be determined (e.g. Windows). +// Readdirnames is used instead of os.ReadDir because the latter stats every +// entry, which fails on macOS /dev/fd ("bad file descriptor"). +func openFDCount() int { + for _, dir := range []string{"/proc/self/fd", "/dev/fd"} { + f, err := os.Open(dir) + if err != nil { + continue + } + names, err := f.Readdirnames(-1) + f.Close() + if err != nil { + continue + } + return len(names) + } + return -1 +}