From dfd05d14cb1de777530b81725970a880ecacca68 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Thu, 28 May 2026 15:00:18 -0700 Subject: [PATCH] refactor(filer): remove the inode->path index and the NFS gateway (#9724) * fix(filer): derive inodes by hash instead of a snowflake sequencer Compute the same inode the FUSE mount would: non-hard-linked entries hash path + crtime, hard links hash their shared HardLinkId so every link resolves to one inode. Removes the snowflake inodeSequencer and the SEAWEEDFS_FILER_SNOWFLAKE_ID knob; inodes are now deterministic across filers. * chore: remove the experimental NFS gateway The NFS frontend ('weed nfs') was the only consumer of the inode->path index. Remove the weed/server/nfs package, the command and its registration, the integration test harness, and the CI workflow; go mod tidy drops the willscott/go-nfs and go-nfs-client dependencies. * refactor(filer): drop the inode->path index With the NFS gateway gone, nothing reads it. A regular file's inode is a pure hash of its path and a hard link's is a hash of its shared HardLinkId -- both derivable on demand -- so the secondary KV index and its write/remove hooks are dead. Removes filer_inode_index.go and the recordInodeIndex hooks from the store wrapper. --- .github/workflows/nfs-tests.yml | 137 -- go.mod | 5 +- go.sum | 6 - test/nfs/Makefile | 36 - test/nfs/README.md | 92 -- test/nfs/basic_test.go | 400 ------ test/nfs/framework.go | 423 ------ test/nfs/go.mod | 21 - test/nfs/go.sum | 14 - test/nfs/kernel_mount_test.go | 193 --- weed/command/command.go | 1 - weed/command/nfs.go | 127 -- weed/filer/filer.go | 3 - weed/filer/filer_inode.go | 55 +- weed/filer/filer_inode_index.go | 300 ----- weed/filer/filer_inode_index_test.go | 206 --- weed/filer/filer_inode_test.go | 42 + weed/filer/filer_lazy_remote_test.go | 1 - weed/filer/filerstore_wrapper.go | 52 +- weed/server/nfs/access.go | 140 -- weed/server/nfs/access_test.go | 29 - weed/server/nfs/filehandle.go | 251 ---- weed/server/nfs/filehandle_test.go | 182 --- weed/server/nfs/filesystem.go | 1348 -------------------- weed/server/nfs/handler.go | 182 --- weed/server/nfs/integration_test.go | 880 ------------- weed/server/nfs/internal_client.go | 88 -- weed/server/nfs/metadata_follow.go | 147 --- weed/server/nfs/mount_udp.go | 343 ----- weed/server/nfs/mount_udp_test.go | 431 ------- weed/server/nfs/portmap.go | 447 ------- weed/server/nfs/portmap_test.go | 418 ------ weed/server/nfs/rpc_version_filter.go | 377 ------ weed/server/nfs/rpc_version_filter_test.go | 561 -------- weed/server/nfs/server.go | 250 ---- weed/server/nfs/server_test.go | 1083 ---------------- weed/server/nfs/uploader.go | 40 - 37 files changed, 64 insertions(+), 9247 deletions(-) delete mode 100644 .github/workflows/nfs-tests.yml delete mode 100644 test/nfs/Makefile delete mode 100644 test/nfs/README.md delete mode 100644 test/nfs/basic_test.go delete mode 100644 test/nfs/framework.go delete mode 100644 test/nfs/go.mod delete mode 100644 test/nfs/go.sum delete mode 100644 test/nfs/kernel_mount_test.go delete mode 100644 weed/command/nfs.go delete mode 100644 weed/filer/filer_inode_index.go delete mode 100644 weed/filer/filer_inode_index_test.go delete mode 100644 weed/server/nfs/access.go delete mode 100644 weed/server/nfs/access_test.go delete mode 100644 weed/server/nfs/filehandle.go delete mode 100644 weed/server/nfs/filehandle_test.go delete mode 100644 weed/server/nfs/filesystem.go delete mode 100644 weed/server/nfs/handler.go delete mode 100644 weed/server/nfs/integration_test.go delete mode 100644 weed/server/nfs/internal_client.go delete mode 100644 weed/server/nfs/metadata_follow.go delete mode 100644 weed/server/nfs/mount_udp.go delete mode 100644 weed/server/nfs/mount_udp_test.go delete mode 100644 weed/server/nfs/portmap.go delete mode 100644 weed/server/nfs/portmap_test.go delete mode 100644 weed/server/nfs/rpc_version_filter.go delete mode 100644 weed/server/nfs/rpc_version_filter_test.go delete mode 100644 weed/server/nfs/server.go delete mode 100644 weed/server/nfs/server_test.go delete mode 100644 weed/server/nfs/uploader.go diff --git a/.github/workflows/nfs-tests.yml b/.github/workflows/nfs-tests.yml deleted file mode 100644 index 92fd0a7ea..000000000 --- a/.github/workflows/nfs-tests.yml +++ /dev/null @@ -1,137 +0,0 @@ -name: "NFS Integration Tests" - -on: - push: - branches: [ master, main ] - paths: - - 'weed/server/nfs/**' - - 'weed/command/nfs.go' - - 'weed/filer/filer_inode.go' - - 'weed/filer/filer_inode_index.go' - - 'weed/filer/filerstore_wrapper.go' - - 'weed/server/filer_grpc_server_rename.go' - - 'test/nfs/**' - - '.github/workflows/nfs-tests.yml' - pull_request: - branches: [ master, main ] - paths: - - 'weed/server/nfs/**' - - 'weed/command/nfs.go' - - 'weed/filer/filer_inode.go' - - 'weed/filer/filer_inode_index.go' - - 'weed/filer/filerstore_wrapper.go' - - 'weed/server/filer_grpc_server_rename.go' - - 'test/nfs/**' - - '.github/workflows/nfs-tests.yml' - -concurrency: - group: ${{ github.head_ref }}/nfs-tests - cancel-in-progress: true - -permissions: - contents: read - -env: - TEST_TIMEOUT: '15m' - -jobs: - nfs-integration: - name: NFS Integration Testing - runs-on: ubuntu-22.04 - timeout-minutes: 20 - - steps: - - name: Checkout code - uses: actions/checkout@v6 - - - name: Set up Go - uses: actions/setup-go@v6 - with: - go-version-file: 'go.mod' - - - name: Build SeaweedFS - run: | - cd weed - go build -o weed . - chmod +x weed - ./weed version - - - name: Run NFS Integration Tests - run: | - cd test/nfs - - echo "Running NFS integration tests..." - echo "============================================" - - # Install test dependencies - go mod download - - # Run the protocol-layer tests. The kernel-mount tests require root - # for mount(2) and are exercised in their own privileged step below; - # skip them here so a "skipped because not root" line doesn't show - # up as noise on every CI run. - go test -v -timeout=${{ env.TEST_TIMEOUT }} -skip '^TestKernelMount' ./... - - echo "============================================" - echo "NFS integration tests completed" - - - name: Install kernel NFS client - run: | - # nfs-common provides mount.nfs; netbase provides /etc/protocols - # which mount.nfs's protocol-name lookups (`tcp`, `udp`) need. - sudo apt-get update - sudo apt-get install -y nfs-common netbase - - - name: Run kernel-mount E2E tests - run: | - cd test/nfs - - echo "Running kernel-mount end-to-end tests..." - echo "These mount the running 'weed nfs' subprocess via the actual" - echo "Linux NFS client to catch protocol regressions invisible to" - echo "the go-nfs-client-based tests above." - echo "============================================" - - # mount(2) is privileged. Preserve PATH so 'go' (and the weed - # binary that test/nfs/framework.go locates via $PATH) resolve - # correctly under sudo, and pass through the Go module/cache dirs - # so we don't redownload modules under root. - sudo env "PATH=$PATH" \ - GOMODCACHE="$(go env GOMODCACHE)" \ - GOCACHE="$(go env GOCACHE)" \ - go test -v -timeout=${{ env.TEST_TIMEOUT }} -run '^TestKernelMount' ./... - - echo "============================================" - echo "Kernel-mount E2E tests completed" - - - name: Test Summary - if: always() - run: | - echo "## NFS Integration Test Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Test Coverage" >> $GITHUB_STEP_SUMMARY - echo "- **Read/Write Round Trip**: Basic file create + read" >> $GITHUB_STEP_SUMMARY - echo "- **Directory Operations**: Mkdir, ReadDirPlus, RmDir" >> $GITHUB_STEP_SUMMARY - echo "- **Nested Directories**: Deep tree creation and leaf I/O" >> $GITHUB_STEP_SUMMARY - echo "- **Rename**: Content preserved across rename" >> $GITHUB_STEP_SUMMARY - echo "- **Overwrite + Truncate**: Setattr(size=0) + shorter write" >> $GITHUB_STEP_SUMMARY - echo "- **Large Files**: 3 MiB binary round trip" >> $GITHUB_STEP_SUMMARY - echo "- **Edge Payloads**: All 256 byte values + empty files" >> $GITHUB_STEP_SUMMARY - echo "- **Symlinks**: Symlink + Lookup" >> $GITHUB_STEP_SUMMARY - echo "- **Missing Path**: Remove on missing entry errors cleanly" >> $GITHUB_STEP_SUMMARY - echo "- **FSINFO**: Non-zero rtpref/wtpref advertised" >> $GITHUB_STEP_SUMMARY - echo "- **Sequential Append**: Two-part concatenation" >> $GITHUB_STEP_SUMMARY - echo "- **ReadDir After Remove**: Meta cache does not serve stale entries" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Kernel-Mount E2E Coverage" >> $GITHUB_STEP_SUMMARY - echo "- **V3 over TCP**: baseline NFSv3 mount + readdir" >> $GITHUB_STEP_SUMMARY - echo "- **V3 with mountproto=udp**: regression test for UDP MOUNT v3 responder" >> $GITHUB_STEP_SUMMARY - echo "- **V4 rejects cleanly**: regression test for the v4 PROG_MISMATCH path (#9262)" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Harness" >> $GITHUB_STEP_SUMMARY - echo "Most tests boot their own master + volume + filer + nfs subprocess" >> $GITHUB_STEP_SUMMARY - echo "stack on loopback and drive it via the NFSv3 RPC protocol using" >> $GITHUB_STEP_SUMMARY - echo "go-nfs-client. The kernel-mount E2E tests reuse the same harness" >> $GITHUB_STEP_SUMMARY - echo "but mount the export through the in-tree Linux NFS client to" >> $GITHUB_STEP_SUMMARY - echo "catch protocol regressions a Go-only client can't see; they run" >> $GITHUB_STEP_SUMMARY - echo "in a separate privileged step (mount(2) requires root)." >> $GITHUB_STEP_SUMMARY diff --git a/go.mod b/go.mod index 19fe7c409..a230c8f48 100644 --- a/go.mod +++ b/go.mod @@ -129,7 +129,6 @@ require ( github.com/cognusion/imaging v1.0.3 github.com/fluent/fluent-logger-golang v1.10.1 github.com/getsentry/sentry-go v0.44.1 - github.com/go-git/go-billy/v5 v5.9.0 github.com/go-ldap/ldap/v3 v3.4.13 github.com/golang-jwt/jwt/v5 v5.3.1 github.com/google/flatbuffers/go v0.0.0-20230108230133-3b8644d32c50 @@ -152,8 +151,6 @@ require ( github.com/tarantool/go-tarantool/v2 v2.4.2 github.com/testcontainers/testcontainers-go v0.40.0 github.com/tikv/client-go/v2 v2.0.7 - github.com/willscott/go-nfs v0.0.4 - github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886 github.com/xeipuuv/gojsonschema v1.2.0 github.com/ydb-platform/ydb-go-sdk-auth-environ v0.5.1 github.com/ydb-platform/ydb-go-sdk/v3 v3.134.2 @@ -211,6 +208,7 @@ require ( github.com/dromara/dongle v1.0.1 // indirect github.com/gin-gonic/gin v1.11.0 // indirect github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect + github.com/go-git/go-billy/v5 v5.9.0 // indirect github.com/goccy/go-yaml v1.18.0 // indirect github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect github.com/google/go-cmp v0.7.0 // indirect @@ -257,7 +255,6 @@ require ( github.com/pquerna/otp v1.5.0 // indirect github.com/pterm/pterm v0.12.82 // indirect github.com/quic-go/qpack v0.6.0 // indirect - github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 // indirect github.com/rclone/Proton-API-Bridge v1.0.3 // indirect github.com/rclone/go-proton-api v1.0.2 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect diff --git a/go.sum b/go.sum index 9718cc0ba..1237d0fd0 100644 --- a/go.sum +++ b/go.sum @@ -1783,8 +1783,6 @@ github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SA github.com/quic-go/quic-go v0.59.0/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU= github.com/rabbitmq/amqp091-go v1.11.0 h1:HxIctVm9Gid/Vtn706necmZ7Wj6pgGI2eqplRbEY8O8= github.com/rabbitmq/amqp091-go v1.11.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o= -github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 h1:UVArwN/wkKjMVhh2EQGC0tEc1+FqiLlvYXY5mQ2f8Wg= -github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93/go.mod h1:Nfe4efndBz4TibWycNE+lqyJZiMX4ycx+QKV8Ta0f/o= github.com/rclone/Proton-API-Bridge v1.0.3 h1:Bs7RC4xCFSN0BPIYVda/BNxp0qo3NV0gB2VZqx2KIew= github.com/rclone/Proton-API-Bridge v1.0.3/go.mod h1:26RAest751Ofk+F/d8xtl4UyWXrZvMQwn39U8rm/WKM= github.com/rclone/go-proton-api v1.0.2 h1:cJtJUab0MGJ3C6q5kiEJs3pbyhSLnOKMyYOQehA0PBc= @@ -2028,10 +2026,6 @@ github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IU github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= -github.com/willscott/go-nfs v0.0.4 h1:1vpOPAdECmoT2KmZ8u+ukO/jfvDjMEUNYhA2F1jGJtI= -github.com/willscott/go-nfs v0.0.4/go.mod h1:VhNccO67Oug787VNXcyx9JDI3ZoSpqoKMT/lWMhUIDg= -github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886 h1:DtrBtkgTJk2XGt4T7eKdKVkd9A5NCevN2e4inLXtsqA= -github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886/go.mod h1:Tq++Lr/FgiS3X48q5FETemXiSLGuYMQT2sPjYNPJSwA= github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc= github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw= github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 h1:3UeQBvD0TFrlVjOeLOBz+CPAI8dnbqNSVwUwRrkp7vQ= diff --git a/test/nfs/Makefile b/test/nfs/Makefile deleted file mode 100644 index 9e695519f..000000000 --- a/test/nfs/Makefile +++ /dev/null @@ -1,36 +0,0 @@ -.PHONY: all build test test-verbose test-short test-debug clean deps tidy - -all: build test - -# Build the weed binary first -build: - cd ../../weed && go build -o weed . - -# Install test dependencies -deps: - go mod download - -# Run all tests -test: build deps - go test -timeout 5m ./... - -# Run tests with verbose output -test-verbose: build deps - go test -v -timeout 5m ./... - -# Skip long-running integration tests -test-short: deps - go test -short -v ./... - -# Run tests with debug output from SeaweedFS -test-debug: build deps - go test -v -timeout 5m ./... 2>&1 | tee test.log - -# Clean up test artifacts -clean: - rm -f test.log - go clean -testcache - -# Update go.sum -tidy: - go mod tidy diff --git a/test/nfs/README.md b/test/nfs/README.md deleted file mode 100644 index 263e92dcd..000000000 --- a/test/nfs/README.md +++ /dev/null @@ -1,92 +0,0 @@ -# SeaweedFS NFS Integration Tests - -End-to-end tests that boot a real SeaweedFS cluster (`master` + `volume` + -`filer`) plus the experimental `weed nfs` frontend and drive it through the -NFSv3 wire protocol. The tests talk to the server over TCP using -`github.com/willscott/go-nfs-client`, which means they do **not** need a -kernel NFS mount, privileged ports, or any platform-specific tooling. - -## Prerequisites - -1. Build the `weed` binary: - ```bash - cd ../../weed - go build -o weed . - ``` -2. Go 1.24 or later. - -## Running the tests - -```bash -# Build weed and run everything -make test - -# Verbose output, keeps the subprocess stdout -make test-verbose - -# Skip integration tests — useful when iterating on the framework itself -make test-short - -# Run a single test -go test -v -run TestNfsBasicReadWrite ./... -``` - -Every test starts its own cluster on random loopback ports, so runs are -isolated and can execute in parallel. - -## Layout - -- `framework.go` — launches `weed master`, `weed volume`, `weed filer`, and - `weed nfs` as subprocesses, waits for each to accept TCP, and exposes a - `Mount()` helper that returns an `nfsclient.Target`. -- `basic_test.go` — covers the most common NFS operations: - - Read/write round-trip (`TestNfsBasicReadWrite`) - - Mkdir / ReadDirPlus / RmDir (`TestNfsMkdirAndRmdir`) - - Nested directory + leaf file (`TestNfsNestedDirectories`) - - Rename preserves content (`TestNfsRenamePreservesContent`) - - Overwrite shrinks file size (`TestNfsOverwriteShrinksFile`) - - Large binary file round-trip (`TestNfsLargeFile`) - - Arbitrary binary and empty files (`TestNfsBinaryAndEmptyFiles`) - - Symlink + Readlink (`TestNfsSymlinkRoundTrip`) - - ReadDirPlus ordering sanity (`TestNfsReadDirPlusOrdering`) - - Remove on missing path errors cleanly (`TestNfsRemoveMissingFailsCleanly`) - - FSINFO advertises non-zero limits (`TestNfsFSInfoReturnsSaneLimits`) - - Sequential append writes concatenate (`TestNfsAppendIsSequential`) - - ReadDir after remove (`TestNfsReadDirAfterRemove`) - -## Debugging a failing test - -Keep the cluster temp dir for inspection: - -```go -config := DefaultTestConfig() -config.SkipCleanup = true -``` - -Enable subprocess stdout/stderr: - -```go -config := DefaultTestConfig() -config.EnableDebug = true -``` - -Or run with `-v`, which flips `EnableDebug` automatically via `testing.Verbose()`. - -## Notes - -- The NFS server binds to `127.0.0.1` with `-ip.bind=127.0.0.1` and exports - `/nfs_export`. The test framework pre-creates that directory via the - filer's HTTP API before starting the NFS server — the NFS server requires - its export root to exist in the filer's namespace with a real entry, and - the filer's synthetic `/` root does not match the `Name=="/"` check the - NFS server performs during `ensureIndexedEntry`. -- Ports are allocated dynamically. Each test run opens a short-lived - listener on `127.0.0.1:0`, reads back the assigned port, closes the - listener, and hands the port to `weed master/volume/filer/nfs`. There is - a tiny race window between close and reopen that has not been a problem - in practice but is worth remembering if you see a "bind: address already - in use" failure. -- All four `weed` components are started with explicit `-port.grpc=...` - flags. Without them, the default is `-port + 10000`, which overflows - `65535` whenever the HTTP port lands above `55535` — the kernel's - ephemeral port range on macOS routinely does. diff --git a/test/nfs/basic_test.go b/test/nfs/basic_test.go deleted file mode 100644 index c971972f4..000000000 --- a/test/nfs/basic_test.go +++ /dev/null @@ -1,400 +0,0 @@ -package nfs - -import ( - "bytes" - "fmt" - "io" - "os" - "path" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - nfsclient "github.com/willscott/go-nfs-client/nfs" -) - -// setupFramework is a small helper that boots the cluster for a single test -// and tears everything down on completion. Every test gets a fresh filer + -// volume pair so they cannot step on each other's namespace. -func setupFramework(t *testing.T) *NfsTestFramework { - t.Helper() - if testing.Short() { - t.Skip("skipping integration test in short mode") - } - config := DefaultTestConfig() - config.EnableDebug = testing.Verbose() - fw := NewNfsTestFramework(t, config) - require.NoError(t, fw.Setup(config), "framework setup") - t.Cleanup(fw.Cleanup) - return fw -} - -// writeAll writes payload to path on the target in a single Write call. The -// NFS WRITE3 RPC chunks internally, so this exists purely so tests read -// linearly. -func writeAll(t *testing.T, target *nfsclient.Target, remotePath string, payload []byte) { - t.Helper() - file, err := target.OpenFile(remotePath, 0o644) - require.NoError(t, err, "open %s for write", remotePath) - if len(payload) > 0 { - n, err := file.Write(payload) - require.NoError(t, err, "write %s", remotePath) - require.Equal(t, len(payload), n, "short write on %s", remotePath) - } - require.NoError(t, file.Close(), "close %s", remotePath) -} - -// readAll opens path on the target and returns the full file contents. -func readAll(t *testing.T, target *nfsclient.Target, remotePath string) []byte { - t.Helper() - file, err := target.Open(remotePath) - require.NoError(t, err, "open %s for read", remotePath) - defer file.Close() - content, err := io.ReadAll(file) - require.NoError(t, err, "read %s", remotePath) - return content -} - -// TestNfsBasicReadWrite exercises the most common NFS path: OpenFile + Write -// + Close followed by Open + Read to verify round-trip data integrity. -func TestNfsBasicReadWrite(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - payload := []byte("hello from seaweedfs nfs integration test") - writeAll(t, target, "/hello.txt", payload) - - got := readAll(t, target, "/hello.txt") - assert.Equal(t, payload, got, "round-tripped content must match") - - info, err := target.Getattr("/hello.txt") - require.NoError(t, err) - assert.Equal(t, int64(len(payload)), int64(info.Filesize)) -} - -// TestNfsMkdirAndRmdir covers Mkdir, ReadDirPlus, and RmDir. The readdir -// assertion also verifies that the newly-created directory shows up under -// the export root the way a POSIX client would expect. -func TestNfsMkdirAndRmdir(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - _, err = target.Mkdir("/dir1", 0o755) - require.NoError(t, err) - - entries, err := target.ReadDirPlus("/") - require.NoError(t, err) - found := false - for _, entry := range entries { - if entry.Name() == "dir1" { - found = true - assert.True(t, entry.IsDir(), "dir1 should be a directory") - } - } - assert.True(t, found, "expected dir1 in readdir listing") - - require.NoError(t, target.RmDir("/dir1")) - - // After removal, dir1 must be gone from the listing. - entries, err = target.ReadDirPlus("/") - require.NoError(t, err) - for _, entry := range entries { - assert.NotEqual(t, "dir1", entry.Name(), "dir1 should be removed") - } -} - -// TestNfsNestedDirectories ensures the server can materialise a deep tree in -// a single Mkdir-per-segment sequence and that reads/writes work at the -// leaves. -func TestNfsNestedDirectories(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - for _, segment := range []string{"/a", "/a/b", "/a/b/c"} { - _, err := target.Mkdir(segment, 0o755) - require.NoError(t, err, "mkdir %s", segment) - } - - payload := []byte("deep path content") - writeAll(t, target, "/a/b/c/leaf.txt", payload) - - got := readAll(t, target, "/a/b/c/leaf.txt") - assert.Equal(t, payload, got) - - require.NoError(t, target.Remove("/a/b/c/leaf.txt")) - require.NoError(t, target.RmDir("/a/b/c")) - require.NoError(t, target.RmDir("/a/b")) - require.NoError(t, target.RmDir("/a")) -} - -// TestNfsRenamePreservesContent renames a file and makes sure the content -// at the new path matches what was written at the old one, and that the -// old path disappears. It does not assert on inode identity because pjdfstest -// already covers that and this test intentionally avoids depending on the -// mount-side identity plumbing. -func TestNfsRenamePreservesContent(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - payload := []byte("rename me") - writeAll(t, target, "/src.txt", payload) - - require.NoError(t, target.Rename("/src.txt", "/dst.txt")) - - _, _, err = target.Lookup("/src.txt") - assert.Error(t, err, "source should be gone after rename") - - got := readAll(t, target, "/dst.txt") - assert.Equal(t, payload, got) - - require.NoError(t, target.Remove("/dst.txt")) -} - -// TestNfsOverwriteShrinksFile rewrites an existing file with shorter content -// and asserts Getattr reports the new (smaller) size. go-nfs-client's -// OpenFile does not pass O_TRUNC, so the test truncates explicitly via -// Setattr(size=0) before the second write — mirroring what `echo >file` -// does on a POSIX client. -func TestNfsOverwriteShrinksFile(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - writeAll(t, target, "/overwrite.txt", []byte("the quick brown fox")) - - require.NoError(t, target.Setattr("/overwrite.txt", nfsclient.Sattr3{ - Size: nfsclient.SetSize{SetIt: true, Size: 0}, - })) - - writeAll(t, target, "/overwrite.txt", []byte("short")) - - info, err := target.Getattr("/overwrite.txt") - require.NoError(t, err) - assert.Equal(t, int64(len("short")), int64(info.Filesize)) - - got := readAll(t, target, "/overwrite.txt") - assert.Equal(t, []byte("short"), got) - - require.NoError(t, target.Remove("/overwrite.txt")) -} - -// TestNfsLargeFile writes a multi-megabyte payload so the write path has to -// cut chunks and flush through the volume server rather than inlining -// content in the filer entry. -func TestNfsLargeFile(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - const size = 3 * 1024 * 1024 // 3 MiB — exceeds the 4 MiB inline cutoff boundary when combined with metadata - payload := make([]byte, size) - for i := range payload { - payload[i] = byte(i % 251) // non-repeating to catch offset bugs - } - - writeAll(t, target, "/big.bin", payload) - - info, err := target.Getattr("/big.bin") - require.NoError(t, err) - assert.Equal(t, int64(size), int64(info.Filesize)) - - got := readAll(t, target, "/big.bin") - require.Equal(t, size, len(got)) - assert.True(t, bytes.Equal(payload, got), "large file content must round-trip byte-for-byte") - - require.NoError(t, target.Remove("/big.bin")) -} - -// TestNfsBinaryAndEmptyFiles covers two edge-case payloads the write path -// tends to regress on: arbitrary binary bytes and zero-length files. -func TestNfsBinaryAndEmptyFiles(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - t.Run("AllByteValues", func(t *testing.T) { - payload := make([]byte, 256) - for i := range payload { - payload[i] = byte(i) - } - writeAll(t, target, "/binary.bin", payload) - assert.Equal(t, payload, readAll(t, target, "/binary.bin")) - require.NoError(t, target.Remove("/binary.bin")) - }) - - t.Run("EmptyFile", func(t *testing.T) { - writeAll(t, target, "/empty.txt", nil) - info, err := target.Getattr("/empty.txt") - require.NoError(t, err) - assert.Equal(t, int64(0), int64(info.Filesize)) - require.NoError(t, target.Remove("/empty.txt")) - }) -} - -// TestNfsSymlinkRoundTrip covers Symlink and Readlink through the nfs server. -// Readlink returns the target path; the server does not auto-traverse it. -func TestNfsSymlinkRoundTrip(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - // Symlink uses a different RPC than open+create, and our server routes it - // through the billy Change interface. - require.NoError(t, target.Symlink("/target.txt", "/link.txt")) - - // The underlying target does not need to exist for readlink to succeed. - file, _, err := target.Lookup("/link.txt") - require.NoError(t, err, "lookup symlink") - assert.True(t, file.Mode()&os.ModeSymlink != 0, "expected symlink mode, got %s", file.Mode()) - - require.NoError(t, target.Remove("/link.txt")) -} - -// TestNfsReadDirPlusOrdering creates a handful of files with distinct names -// and ensures ReadDirPlus surfaces every one of them. The server pages -// listings from the filer, so we want to make sure nothing is truncated. -func TestNfsReadDirPlusOrdering(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - _, err = target.Mkdir("/listing", 0o755) - require.NoError(t, err) - - names := []string{"alpha.txt", "beta.txt", "gamma.txt", "delta.txt", "epsilon.txt"} - for _, name := range names { - writeAll(t, target, path.Join("/listing", name), []byte(name)) - } - - entries, err := target.ReadDirPlus("/listing") - require.NoError(t, err) - seen := make(map[string]struct{}, len(entries)) - for _, entry := range entries { - if entry.Name() == "." || entry.Name() == ".." { - continue - } - seen[entry.Name()] = struct{}{} - } - for _, name := range names { - _, ok := seen[name] - assert.True(t, ok, "expected %s in directory listing", name) - } - - for _, name := range names { - require.NoError(t, target.Remove(path.Join("/listing", name))) - } - require.NoError(t, target.RmDir("/listing")) -} - -// TestNfsRemoveMissingFailsCleanly asserts that removing a non-existent path -// surfaces an error instead of silently succeeding. A bug where the server -// returned NFS3_OK on missing entries would hide metadata drift. -func TestNfsRemoveMissingFailsCleanly(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - err = target.Remove("/does_not_exist.txt") - require.Error(t, err, "removing a missing file must error") - // NFS3 surfaces this as NFS3ERR_NOENT; make sure the error text is - // recognisable without locking us into the library's exact wording. - assert.True(t, - strings.Contains(strings.ToLower(err.Error()), "noent") || - strings.Contains(strings.ToLower(err.Error()), "not exist") || - strings.Contains(strings.ToLower(err.Error()), "no such"), - "unexpected error shape: %v", err) -} - -// TestNfsFSInfoReturnsSaneLimits pokes at FSINFO so we catch regressions -// where the server advertises zero read/write limits (which would make -// clients fall back to the 8 KiB floor and slow every test that follows). -func TestNfsFSInfoReturnsSaneLimits(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - info, err := target.FSInfo() - require.NoError(t, err) - require.NotNil(t, info) - assert.Greater(t, info.RTPref, uint32(0), "rtpref must be positive") - assert.Greater(t, info.WTPref, uint32(0), "wtpref must be positive") -} - -// TestNfsAppendIsSequential writes two chunks to the same file in separate -// Open cycles and asserts the concatenation is preserved. The second write -// uses O_APPEND (the default Open path in go-nfs-client does not pass -// flags, so we explicitly reopen after writing the first chunk). -func TestNfsAppendIsSequential(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - const prefix = "part1-" - const suffix = "part2" - - writeAll(t, target, "/concat.txt", []byte(prefix)) - - file, err := target.OpenFile("/concat.txt", 0o644) - require.NoError(t, err) - // Seek to end before writing so we append rather than overwrite. go-nfs - // client's File.Seek uses the same offset tracking as Write so this is - // enough to place the second chunk after the first. - _, err = file.Seek(int64(len(prefix)), io.SeekStart) - require.NoError(t, err) - _, err = file.Write([]byte(suffix)) - require.NoError(t, err) - require.NoError(t, file.Close()) - - got := readAll(t, target, "/concat.txt") - assert.Equal(t, prefix+suffix, string(got)) - - require.NoError(t, target.Remove("/concat.txt")) -} - -// Regression: readdir should not emit stale entries after a remove. This is -// the scenario the PR's meta cache invalidation logic was written to fix. -func TestNfsReadDirAfterRemove(t *testing.T) { - fw := setupFramework(t) - target, cleanup, err := fw.Mount() - require.NoError(t, err) - defer cleanup() - - _, err = target.Mkdir("/churn", 0o755) - require.NoError(t, err) - for i := 0; i < 5; i++ { - writeAll(t, target, path.Join("/churn", fmt.Sprintf("f%d.txt", i)), []byte{byte(i)}) - } - // Remove the middle one and re-list. - require.NoError(t, target.Remove("/churn/f2.txt")) - - entries, err := target.ReadDirPlus("/churn") - require.NoError(t, err) - for _, entry := range entries { - assert.NotEqual(t, "f2.txt", entry.Name(), "removed file should not reappear in listing") - } - - for i := 0; i < 5; i++ { - if i == 2 { - continue - } - require.NoError(t, target.Remove(path.Join("/churn", fmt.Sprintf("f%d.txt", i)))) - } - require.NoError(t, target.RmDir("/churn")) -} diff --git a/test/nfs/framework.go b/test/nfs/framework.go deleted file mode 100644 index eb1e9f380..000000000 --- a/test/nfs/framework.go +++ /dev/null @@ -1,423 +0,0 @@ -package nfs - -import ( - "bytes" - "fmt" - "io" - "mime/multipart" - "net" - "net/http" - "os" - "os/exec" - "path/filepath" - "runtime" - "strings" - "syscall" - "testing" - "time" - - "github.com/seaweedfs/seaweedfs/test/testutil" - "github.com/stretchr/testify/require" - nfsclient "github.com/willscott/go-nfs-client/nfs" - "github.com/willscott/go-nfs-client/nfs/rpc" -) - -// NfsTestFramework boots a minimal SeaweedFS cluster (master + volume + filer) -// plus the experimental `weed nfs` frontend and hands out NFSv3 RPC clients -// that talk to it. Everything is driven via subprocesses so the tests exercise -// the same binary an operator would deploy, and no kernel mount is required. -type NfsTestFramework struct { - t *testing.T - tempDir string - dataDir string - masterProcess *os.Process - volumeProcess *os.Process - filerProcess *os.Process - nfsProcess *os.Process - masterAddr string - masterGrpc int - volumeAddr string - volumeGrpc int - filerAddr string - filerGrpc int - nfsAddr string - exportRoot string - weedBinary string - isSetup bool - skipCleanup bool -} - -// TestConfig controls how the framework boots the cluster. -type TestConfig struct { - NumVolumes int - EnableDebug bool - SkipCleanup bool // keep temp dir on failure for inspection - // ExportRoot is the filer path the NFS server exports. Defaults to "/" - // so tests can use any path, with a single warning logged by the server. - ExportRoot string -} - -// DefaultTestConfig returns the defaults used by most tests. A dedicated -// /nfs_export subtree is used as the NFS export root because the NFS server -// requires the export directory to exist in the filer's namespace and carry -// a non-zero inode — passing "/" would succeed only for filer setups that -// have already backfilled the root inode. -func DefaultTestConfig() *TestConfig { - return &TestConfig{ - NumVolumes: 3, - EnableDebug: false, - SkipCleanup: false, - ExportRoot: "/nfs_export", - } -} - -// NewNfsTestFramework allocates a framework bound to the current test. Call -// Setup next to actually start the cluster. -func NewNfsTestFramework(t *testing.T, config *TestConfig) *NfsTestFramework { - if config == nil { - config = DefaultTestConfig() - } - - tempDir, err := os.MkdirTemp("", "seaweedfs_nfs_test_") - require.NoError(t, err) - - // testutil.MustAllocatePorts holds every listener open until the full - // batch has been reserved, which avoids the "close-then-hope" race my - // original per-port helper had. We need seven ports: four HTTP (master, - // volume, filer, nfs) and three gRPC (master, volume, filer — nfs has - // no gRPC endpoint). - ports := testutil.MustAllocatePorts(t, 7) - - exportRoot := config.ExportRoot - if exportRoot == "" { - exportRoot = "/" - } - - return &NfsTestFramework{ - t: t, - tempDir: tempDir, - dataDir: filepath.Join(tempDir, "data"), - masterAddr: fmt.Sprintf("127.0.0.1:%d", ports[0]), - masterGrpc: ports[1], - volumeAddr: fmt.Sprintf("127.0.0.1:%d", ports[2]), - volumeGrpc: ports[3], - filerAddr: fmt.Sprintf("127.0.0.1:%d", ports[4]), - filerGrpc: ports[5], - nfsAddr: fmt.Sprintf("127.0.0.1:%d", ports[6]), - exportRoot: exportRoot, - weedBinary: findWeedBinary(), - isSetup: false, - skipCleanup: config.SkipCleanup, - } -} - -// Setup starts the SeaweedFS cluster and the NFS frontend, waiting for each -// component to accept connections before moving on. -func (f *NfsTestFramework) Setup(config *TestConfig) error { - if f.isSetup { - return fmt.Errorf("framework already setup") - } - - dirs := []string{ - f.dataDir, - filepath.Join(f.dataDir, "master"), - filepath.Join(f.dataDir, "volume"), - } - for _, dir := range dirs { - if err := os.MkdirAll(dir, 0755); err != nil { - return fmt.Errorf("failed to create directory %s: %v", dir, err) - } - } - - if err := f.startMaster(config); err != nil { - return fmt.Errorf("failed to start master: %v", err) - } - if !testutil.WaitForPort(portFromAddr(f.masterAddr), testutil.SeaweedMiniStartupTimeout) { - return fmt.Errorf("master not ready at %s", f.masterAddr) - } - - if err := f.startVolumeServer(config); err != nil { - return fmt.Errorf("failed to start volume server: %v", err) - } - if !testutil.WaitForPort(portFromAddr(f.volumeAddr), testutil.SeaweedMiniStartupTimeout) { - return fmt.Errorf("volume server not ready at %s", f.volumeAddr) - } - - if err := f.startFiler(config); err != nil { - return fmt.Errorf("failed to start filer: %v", err) - } - if !testutil.WaitForPort(portFromAddr(f.filerAddr), testutil.SeaweedMiniStartupTimeout) { - return fmt.Errorf("filer not ready at %s", f.filerAddr) - } - - // Pre-create the export root in the filer's namespace. The NFS server - // expects its export directory to exist with a real inode; uploading a - // placeholder file creates the parent directory implicitly and then - // removing the file leaves the empty directory in place. - if f.exportRoot != "/" { - if err := f.ensureExportRootExists(); err != nil { - return fmt.Errorf("failed to pre-create export root %s: %v", f.exportRoot, err) - } - } - - if err := f.startNfsServer(config); err != nil { - return fmt.Errorf("failed to start NFS server: %v", err) - } - if !testutil.WaitForPort(portFromAddr(f.nfsAddr), testutil.SeaweedMiniStartupTimeout) { - return fmt.Errorf("NFS server not ready at %s", f.nfsAddr) - } - - // Let the NFS server finish wiring up its gRPC subscription to the filer - // before the first client call hits MOUNT/LOOKUP. - time.Sleep(500 * time.Millisecond) - - f.isSetup = true - return nil -} - -// Cleanup stops all processes. Temp state is preserved if SkipCleanup is set. -func (f *NfsTestFramework) Cleanup() { - processes := []*os.Process{f.nfsProcess, f.filerProcess, f.volumeProcess, f.masterProcess} - for _, proc := range processes { - if proc != nil { - _ = proc.Signal(syscall.SIGTERM) - _, _ = proc.Wait() - } - } - if !f.skipCleanup { - _ = os.RemoveAll(f.tempDir) - } -} - -// NfsAddr returns the TCP address the NFS server is listening on. -func (f *NfsTestFramework) NfsAddr() string { return f.nfsAddr } - -// FilerAddr returns the TCP address of the filer. -func (f *NfsTestFramework) FilerAddr() string { return f.filerAddr } - -// ExportRoot returns the path the NFS server exports. -func (f *NfsTestFramework) ExportRoot() string { return f.exportRoot } - -// Mount opens an NFSv3 MOUNT+NFS connection against the running NFS server -// and returns a Target that tests can drive like a mini-VFS. Caller is -// responsible for calling the returned cleanup func to Unmount and close the -// TCP connection. -func (f *NfsTestFramework) Mount() (*nfsclient.Target, func(), error) { - var ( - client *rpc.Client - err error - ) - // The NFS server's TCP listener may already be accepting connections when - // waitForService returns, but the RPC program registration can trail it - // by a few milliseconds. Retry the dial to absorb that small window. - for attempt := 0; attempt < 20; attempt++ { - client, err = rpc.DialTCP("tcp", f.nfsAddr, false) - if err == nil { - break - } - time.Sleep(25 * time.Millisecond) - } - if err != nil { - return nil, nil, fmt.Errorf("dial NFS: %w", err) - } - - // Note: do not set Mount.Addr here. When Addr is non-empty, the go-nfs - // client re-dials via portmapper and concatenates `:111` onto the - // address, which produces "too many colons" for a raw `host:port` - // string. Reusing the existing RPC client avoids that path entirely. - mounter := &nfsclient.Mount{Client: client} - target, err := mounter.Mount(f.exportRoot, rpc.AuthNull) - if err != nil { - client.Close() - return nil, nil, fmt.Errorf("mount %s: %w", f.exportRoot, err) - } - - cleanup := func() { - _ = mounter.Unmount() - client.Close() - } - return target, cleanup, nil -} - -func (f *NfsTestFramework) startMaster(config *TestConfig) error { - _, masterPort := splitHostPort(f.masterAddr) - args := []string{ - "master", - "-ip=127.0.0.1", - fmt.Sprintf("-port=%d", masterPort), - fmt.Sprintf("-port.grpc=%d", f.masterGrpc), - "-mdir=" + filepath.Join(f.dataDir, "master"), - "-raftBootstrap", - "-peers=none", - } - return f.startProcess(&f.masterProcess, config, args) -} - -func (f *NfsTestFramework) startVolumeServer(config *TestConfig) error { - _, volumePort := splitHostPort(f.volumeAddr) - // pb.ServerAddress encodes a non-default gRPC port as `host:port.grpc`. - // See weed/pb/server_address.go — the dot, not a colon, is the separator - // between the HTTP port and the gRPC port. - masterWithGrpc := fmt.Sprintf("%s.%d", f.masterAddr, f.masterGrpc) - args := []string{ - "volume", - "-master=" + masterWithGrpc, - "-ip=127.0.0.1", - fmt.Sprintf("-port=%d", volumePort), - fmt.Sprintf("-port.grpc=%d", f.volumeGrpc), - "-dir=" + filepath.Join(f.dataDir, "volume"), - fmt.Sprintf("-max=%d", config.NumVolumes), - } - return f.startProcess(&f.volumeProcess, config, args) -} - -func (f *NfsTestFramework) startFiler(config *TestConfig) error { - _, filerPort := splitHostPort(f.filerAddr) - masterWithGrpc := fmt.Sprintf("%s.%d", f.masterAddr, f.masterGrpc) - args := []string{ - "filer", - "-master=" + masterWithGrpc, - "-ip=127.0.0.1", - fmt.Sprintf("-port=%d", filerPort), - fmt.Sprintf("-port.grpc=%d", f.filerGrpc), - } - return f.startProcess(&f.filerProcess, config, args) -} - -func (f *NfsTestFramework) startNfsServer(config *TestConfig) error { - _, nfsPort := splitHostPort(f.nfsAddr) - // `host:port.grpc` encoding — see pb/server_address.go. - filerWithGrpc := fmt.Sprintf("%s.%d", f.filerAddr, f.filerGrpc) - args := []string{ - "nfs", - "-filer=" + filerWithGrpc, - "-ip.bind=127.0.0.1", - fmt.Sprintf("-port=%d", nfsPort), - "-filer.path=" + f.exportRoot, - } - return f.startProcess(&f.nfsProcess, config, args) -} - -func (f *NfsTestFramework) startProcess(target **os.Process, config *TestConfig, args []string) error { - cmd := exec.Command(f.weedBinary, args...) - cmd.Dir = f.tempDir - if config.EnableDebug { - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - } - if err := cmd.Start(); err != nil { - return err - } - *target = cmd.Process - return nil -} - -// portFromAddr returns just the port number from a `host:port` string. -// testutil.WaitForPort takes an int port, not a full address. -func portFromAddr(addr string) int { - _, port := splitHostPort(addr) - return port -} - -// ensureExportRootExists posts a placeholder file to f.exportRoot via the -// filer's HTTP API, then deletes it. That roundtrip implicitly creates the -// target directory so the NFS server has something to mount. We bypass -// weed/pb here because the HTTP client is simpler and needs no gRPC stubs. -func (f *NfsTestFramework) ensureExportRootExists() error { - exportRoot := strings.TrimRight(f.exportRoot, "/") - if exportRoot == "" { - return nil - } - placeholder := exportRoot + "/.nfs_test_init" - filerURL := "http://" + f.filerAddr + placeholder - - var body bytes.Buffer - writer := multipart.NewWriter(&body) - part, err := writer.CreateFormFile("file", ".nfs_test_init") - if err != nil { - return err - } - if _, err := io.WriteString(part, ""); err != nil { - return err - } - if err := writer.Close(); err != nil { - return err - } - - httpClient := &http.Client{Timeout: 10 * time.Second} - req, err := http.NewRequest(http.MethodPost, filerURL, &body) - if err != nil { - return err - } - req.Header.Set("Content-Type", writer.FormDataContentType()) - resp, err := httpClient.Do(req) - if err != nil { - return err - } - _, _ = io.Copy(io.Discard, resp.Body) - resp.Body.Close() - if resp.StatusCode/100 != 2 { - return fmt.Errorf("filer POST %s returned status %d", filerURL, resp.StatusCode) - } - - // Delete the placeholder; the directory stays behind. - deleteReq, err := http.NewRequest(http.MethodDelete, filerURL, nil) - if err != nil { - return err - } - deleteResp, err := httpClient.Do(deleteReq) - if err != nil { - return err - } - _, _ = io.Copy(io.Discard, deleteResp.Body) - deleteResp.Body.Close() - if deleteResp.StatusCode/100 != 2 && deleteResp.StatusCode != http.StatusNotFound { - return fmt.Errorf("filer DELETE %s returned status %d", filerURL, deleteResp.StatusCode) - } - return nil -} - -func splitHostPort(addr string) (string, int) { - host, portStr, err := net.SplitHostPort(addr) - if err != nil { - return "", 0 - } - var port int - _, _ = fmt.Sscanf(portStr, "%d", &port) - return host, port -} - -// findWeedBinary locates the weed binary, preferring the local build in the -// checkout so tests run against the code under review rather than whatever is -// on $PATH. -func findWeedBinary() string { - if _, thisFile, _, ok := runtime.Caller(0); ok { - thisDir := filepath.Dir(thisFile) - candidates := []string{ - filepath.Join(thisDir, "../../weed/weed"), - filepath.Join(thisDir, "../weed/weed"), - } - for _, candidate := range candidates { - if _, err := os.Stat(candidate); err == nil { - abs, _ := filepath.Abs(candidate) - return abs - } - } - } - cwd, _ := os.Getwd() - candidates := []string{ - filepath.Join(cwd, "../../weed/weed"), - filepath.Join(cwd, "../weed/weed"), - filepath.Join(cwd, "./weed"), - } - for _, candidate := range candidates { - if _, err := os.Stat(candidate); err == nil { - abs, _ := filepath.Abs(candidate) - return abs - } - } - if path, err := exec.LookPath("weed"); err == nil { - return path - } - return "weed" -} diff --git a/test/nfs/go.mod b/test/nfs/go.mod deleted file mode 100644 index cfb532528..000000000 --- a/test/nfs/go.mod +++ /dev/null @@ -1,21 +0,0 @@ -module seaweedfs-nfs-tests - -go 1.25.0 - -// test/testutil lives inside the main seaweedfs module; pull it in via a -// local replace so this integration suite can reuse the shared port -// allocator and readiness helpers instead of reinventing them. -replace github.com/seaweedfs/seaweedfs => ../.. - -require ( - github.com/seaweedfs/seaweedfs v0.0.0-00010101000000-000000000000 - github.com/stretchr/testify v1.11.1 - github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886 -) - -require ( - github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect - github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect -) diff --git a/test/nfs/go.sum b/test/nfs/go.sum deleted file mode 100644 index b2bc6f9fb..000000000 --- a/test/nfs/go.sum +++ /dev/null @@ -1,14 +0,0 @@ -github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= -github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= -github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 h1:UVArwN/wkKjMVhh2EQGC0tEc1+FqiLlvYXY5mQ2f8Wg= -github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93/go.mod h1:Nfe4efndBz4TibWycNE+lqyJZiMX4ycx+QKV8Ta0f/o= -github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= -github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886 h1:DtrBtkgTJk2XGt4T7eKdKVkd9A5NCevN2e4inLXtsqA= -github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886/go.mod h1:Tq++Lr/FgiS3X48q5FETemXiSLGuYMQT2sPjYNPJSwA= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/test/nfs/kernel_mount_test.go b/test/nfs/kernel_mount_test.go deleted file mode 100644 index d78059191..000000000 --- a/test/nfs/kernel_mount_test.go +++ /dev/null @@ -1,193 +0,0 @@ -//go:build linux - -package nfs - -// End-to-end mount tests that drive the real Linux NFS client (mount.nfs + -// in-tree kernel) against a running `weed nfs` subprocess. These exist to -// catch regressions that the existing framework can't see, because the -// framework drives the server with willscott/go-nfs-client — the same RPC -// library the server uses internally — so any bug shared between the two -// (XDR layout, version dispatch, RPC framing) round-trips invisibly. -// -// Two real bugs hit recently were exactly that shape: -// 1. NFSv4 mis-routed to the v3 SETATTR handler (#9262). The client -// library never sends NFSv4, so the test suite never noticed; the -// Linux kernel mount path did notice, with EIO. -// 2. UDP MOUNT v3 missing. Only TCP MOUNT was advertised; the kernel -// defaults mountproto=udp in many setups, so the in-tree client -// surfaced EPROTONOSUPPORT during MOUNT setup. -// -// These tests mount over the actual loopback interface using mount.nfs and -// shell out to /bin/mount and /bin/umount. They require root (mount(2) is -// privileged) and Linux (the in-tree NFS client is what's being exercised); -// they t.Skip cleanly when either prerequisite is missing. -// -// Run locally with: -// -// cd test/nfs -// sudo go test -v -run TestKernelMount ./... -// -// CI runs them via .github/workflows/nfs-tests.yml after installing -// nfs-common (mount.nfs + helpers). - -import ( - "errors" - "fmt" - "net" - "os" - "os/exec" - "strings" - "testing" -) - -// kernelMountSkipIfUnsupported skips the test when the host can't run a -// real NFS mount. The combined check belongs in one place so the three -// kernel-mount tests stay focused on what they're actually verifying. -func kernelMountSkipIfUnsupported(t *testing.T) { - t.Helper() - if os.Geteuid() != 0 { - t.Skip("kernel mount test requires root; mount(2) is privileged") - } - if _, err := exec.LookPath("mount.nfs"); err != nil { - t.Skipf("mount.nfs not installed: %v (CI installs the nfs-common package)", err) - } -} - -// kernelMount runs /bin/mount with the given options against the framework's -// running NFS server, returns the mountpoint and an unmount closure. We pass -// explicit port=/mountport= options so the kernel never queries portmap. -// That keeps the harness honest about what it's testing — the NFS / MOUNT -// wire protocol — and avoids colliding with a system rpcbind on shared CI -// runners (port 111 is privileged and frequently in use already). -func kernelMount(t *testing.T, fw *NfsTestFramework, optsTemplate string) (string, func()) { - t.Helper() - host, portStr, err := net.SplitHostPort(fw.NfsAddr()) - if err != nil { - t.Fatalf("split nfs addr %q: %v", fw.NfsAddr(), err) - } - mountpoint, err := os.MkdirTemp("", "weed-nfs-kmount-") - if err != nil { - t.Fatalf("mkdtemp: %v", err) - } - opts := strings.ReplaceAll(optsTemplate, "{port}", portStr) - target := fmt.Sprintf("%s:%s", host, fw.ExportRoot()) - cmd := exec.Command("mount", "-t", "nfs", "-o", opts, target, mountpoint) - if out, err := cmd.CombinedOutput(); err != nil { - _ = os.RemoveAll(mountpoint) - t.Fatalf("mount %s -o %s failed: %v\nmount output:\n%s", target, opts, err, out) - } - teardown := func() { - // -f to bail out faster if the server's already gone. - _ = exec.Command("umount", "-f", mountpoint).Run() - _ = os.RemoveAll(mountpoint) - } - return mountpoint, teardown -} - -func newKernelMountFramework(t *testing.T) *NfsTestFramework { - t.Helper() - cfg := DefaultTestConfig() - fw := NewNfsTestFramework(t, cfg) - if err := fw.Setup(cfg); err != nil { - fw.Cleanup() - t.Fatalf("framework setup: %v", err) - } - t.Cleanup(fw.Cleanup) - return fw -} - -// TestKernelMountV3TCP exercises the most common mount form: NFSv3 + MOUNT -// v3, both over TCP. This is what the existing go-nfs-client tests cover at -// the protocol layer, but running it through mount.nfs and the kernel -// confirms that the wire format we emit decodes cleanly under a different -// XDR/RPC parser. -func TestKernelMountV3TCP(t *testing.T) { - kernelMountSkipIfUnsupported(t) - fw := newKernelMountFramework(t) - - mountpoint, undo := kernelMount(t, fw, - "nfsvers=3,nolock,port={port},mountport={port},proto=tcp,mountproto=tcp") - defer undo() - - if _, err := os.Stat(mountpoint); err != nil { - t.Errorf("stat mountpoint: %v", err) - } - if _, err := os.ReadDir(mountpoint); err != nil { - t.Errorf("readdir mountpoint: %v", err) - } -} - -// TestKernelMountV3MountProtoUDP is the regression test for the UDP MOUNT -// v3 responder. mountproto=udp forces the kernel to call MOUNT over UDP -// only; before the responder existed the kernel hit nothing (MOUNT was -// advertised TCP-only) and surfaced EPROTONOSUPPORT during mount setup. -func TestKernelMountV3MountProtoUDP(t *testing.T) { - kernelMountSkipIfUnsupported(t) - fw := newKernelMountFramework(t) - - mountpoint, undo := kernelMount(t, fw, - "nfsvers=3,nolock,port={port},mountport={port},proto=tcp,mountproto=udp") - defer undo() - - if _, err := os.Stat(mountpoint); err != nil { - t.Errorf("stat mountpoint: %v", err) - } -} - -// TestKernelMountV4RejectsCleanly is the regression test for the NFSv4 -// PROG_MISMATCH path (#9262). The server only speaks NFSv3, but the -// previous behaviour was to mis-route v4 COMPOUND to the v3 SETATTR -// handler and write garbage; the kernel surfaced EIO instead of a -// version-mismatch error and (depending on distro) didn't fall back to -// v3. The version filter now answers PROG_MISMATCH so the kernel sees -// "v4 not supported" cleanly. -// -// The test asserts: -// 1. mount.nfs exits non-zero (no silent success against a v3 server); -// 2. the failure message mentions protocol/version/io, which is what the -// kernel surfaces when it gets PROG_MISMATCH instead of garbage. A -// pre-fix server returns "mount system call failed" with no further -// context, so a regression collapses the assertion onto that branch. -func TestKernelMountV4RejectsCleanly(t *testing.T) { - kernelMountSkipIfUnsupported(t) - fw := newKernelMountFramework(t) - - host, portStr, err := net.SplitHostPort(fw.NfsAddr()) - if err != nil { - t.Fatalf("split nfs addr: %v", err) - } - mountpoint, err := os.MkdirTemp("", "weed-nfs-kmount-v4-") - if err != nil { - t.Fatalf("mkdtemp: %v", err) - } - defer os.RemoveAll(mountpoint) - - target := fmt.Sprintf("%s:%s", host, fw.ExportRoot()) - cmd := exec.Command("mount", "-t", "nfs", "-o", - fmt.Sprintf("vers=4,port=%s", portStr), - target, mountpoint) - out, err := cmd.CombinedOutput() - defer exec.Command("umount", "-f", mountpoint).Run() - - if err == nil { - t.Fatalf("v4 mount unexpectedly succeeded against v3-only server\nmount output:\n%s", out) - } - // Don't pin the exact error string — different distros print slightly - // different things — but require some hint that the kernel saw a - // protocol-level failure rather than a generic "mount system call - // failed". Without the version filter, mount.nfs prints the latter - // alone; with it, the former. - lower := strings.ToLower(string(out)) - if !strings.Contains(lower, "protocol") && - !strings.Contains(lower, "version") && - !strings.Contains(lower, "i/o") { - t.Errorf("v4 mount failure didn't mention protocol/version/io; output:\n%s", out) - } - // Also require a non-zero exit so a future change that makes mount(2) - // silently succeed (e.g. by relaxing the version filter) shows up - // here even if the message phrasing changes. - var ee *exec.ExitError - if !errors.As(err, &ee) { - t.Errorf("expected mount to exit non-zero with ExitError, got %v", err) - } -} diff --git a/weed/command/command.go b/weed/command/command.go index ad7e2e9c2..970b44ba2 100644 --- a/weed/command/command.go +++ b/weed/command/command.go @@ -48,7 +48,6 @@ var Commands = []*Command{ cmdVolume, cmdWebDav, cmdSftp, - cmdNfs, cmdWorker, } diff --git a/weed/command/nfs.go b/weed/command/nfs.go deleted file mode 100644 index a2af58b2d..000000000 --- a/weed/command/nfs.go +++ /dev/null @@ -1,127 +0,0 @@ -package command - -import ( - "fmt" - - "github.com/seaweedfs/seaweedfs/weed/glog" - "github.com/seaweedfs/seaweedfs/weed/pb" - "github.com/seaweedfs/seaweedfs/weed/security" - weed_server_nfs "github.com/seaweedfs/seaweedfs/weed/server/nfs" - "github.com/seaweedfs/seaweedfs/weed/util" - "github.com/seaweedfs/seaweedfs/weed/util/version" -) - -var ( - nfsStandaloneOptions NfsOptions -) - -type NfsOptions struct { - filer *string - ipBind *string - port *int - filerRootPath *string - readOnly *bool - allowedClients *string - volumeServerAccess *string - portmapBind *string -} - -func init() { - cmdNfs.Run = runNfs // break init cycle - nfsStandaloneOptions.filer = cmdNfs.Flag.String("filer", "localhost:8888", "filer server address") - nfsStandaloneOptions.ipBind = cmdNfs.Flag.String("ip.bind", "127.0.0.1", "ip address to bind to. Defaults to loopback; override explicitly to expose the experimental server to the network.") - nfsStandaloneOptions.port = cmdNfs.Flag.Int("port", 2049, "NFS server listen port") - nfsStandaloneOptions.filerRootPath = cmdNfs.Flag.String("filer.path", "", "remote path from filer server to export. Required: no default is provided so operators must opt in to exporting a namespace subtree.") - nfsStandaloneOptions.readOnly = cmdNfs.Flag.Bool("readOnly", false, "export the filer path as read only") - nfsStandaloneOptions.allowedClients = cmdNfs.Flag.String("allowedClients", "", "comma-separated client IPs, hostnames, or CIDRs allowed to connect") - nfsStandaloneOptions.volumeServerAccess = cmdNfs.Flag.String("volumeServerAccess", "direct", "access volume servers by [direct|publicUrl|filerProxy]") - nfsStandaloneOptions.portmapBind = cmdNfs.Flag.String("portmap.bind", "", "when set, bind a built-in portmap v2 responder on :111 so plain `mount -t nfs` works without client-side portmap bypass. Empty disables it. Binding port 111 requires root or CAP_NET_BIND_SERVICE and must not conflict with a system rpcbind.") -} - -var cmdNfs = &Command{ - UsageLine: "nfs -port=2049 -filer= -filer.path=", - Short: "start an experimental NFSv3 server backed by a filer", - Long: `start an experimental NFSv3 server backed by a filer. - -This command serves an experimental filer-native NFSv3 frontend with -deterministic filehandles, filer-backed metadata operations, and direct -volume-server data access for chunk reads and buffered writes. - -Safer defaults (since export ACLs are still not implemented): - - - ip.bind defaults to 127.0.0.1, so the server is not reachable from - other hosts unless you override it explicitly. - - filer.path has no default; you must pick the subtree to export. - -Override -ip.bind to a routable address only after you have reviewed --allowedClients and the readiness of the rest of your deployment. - -Mounting from a Linux client ----------------------------- -The server does not run portmap/rpcbind by default. That means Linux -mount.nfs, which queries portmap on port 111 first, will fail with -"portmap query failed" against the plain form: - - mount -t nfs -o nfsvers=3,nolock :/export /mnt - -Either tell the client to bypass portmap: - - mount -t nfs -o nfsvers=3,nolock,port=2049,mountport=2049,\ - proto=tcp,mountproto=tcp :/export /mnt - -or enable the built-in portmap responder on the server: - - weed nfs ... -portmap.bind=0.0.0.0 - -With the responder enabled MOUNT v3 is answered over both TCP and UDP, -so the plain mount form above just works — no mountproto override is -required even on clients whose default mountproto is UDP. - -Binding port 111 requires root or CAP_NET_BIND_SERVICE and must not -collide with a system rpcbind. - `, -} - -func runNfs(cmd *Command, args []string) bool { - util.LoadSecurityConfiguration() - - if *nfsStandaloneOptions.ipBind == "" { - *nfsStandaloneOptions.ipBind = "127.0.0.1" - } - - if *nfsStandaloneOptions.filerRootPath == "" { - glog.Errorf("-filer.path is required: pick an explicit subtree to export; exporting \"/\" is not a default") - return false - } - if *nfsStandaloneOptions.filerRootPath == "/" { - glog.Warningf("-filer.path=/ exports the entire filer namespace; ensure -allowedClients or -ip.bind constrains access") - } - - listenAddress := fmt.Sprintf("%s:%d", *nfsStandaloneOptions.ipBind, *nfsStandaloneOptions.port) - glog.V(0).Infof("Starting Seaweed NFS Server %s at %s", version.Version(), listenAddress) - - grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") - - nfsServer, err := weed_server_nfs.NewServer(&weed_server_nfs.Option{ - Filer: pb.ServerAddress(*nfsStandaloneOptions.filer), - BindIp: *nfsStandaloneOptions.ipBind, - Port: *nfsStandaloneOptions.port, - FilerRootPath: *nfsStandaloneOptions.filerRootPath, - ReadOnly: *nfsStandaloneOptions.readOnly, - AllowedClients: util.StringSplit(*nfsStandaloneOptions.allowedClients, ","), - VolumeServerAccess: *nfsStandaloneOptions.volumeServerAccess, - GrpcDialOption: grpcDialOption, - PortmapBind: *nfsStandaloneOptions.portmapBind, - }) - if err != nil { - glog.Errorf("NFS Server startup error: %v", err) - return false - } - - if err := nfsServer.Start(); err != nil { - glog.Errorf("NFS Server startup error: %v", err) - return false - } - - return true -} diff --git a/weed/filer/filer.go b/weed/filer/filer.go index c4421ed37..6b39317a9 100644 --- a/weed/filer/filer.go +++ b/weed/filer/filer.go @@ -13,7 +13,6 @@ import ( "github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager" "github.com/seaweedfs/seaweedfs/weed/filer/empty_folder_cleanup" - "github.com/seaweedfs/seaweedfs/weed/sequence" "github.com/seaweedfs/seaweedfs/weed/cluster" "github.com/seaweedfs/seaweedfs/weed/pb" @@ -64,7 +63,6 @@ type Filer struct { DeletionRetryQueue *DeletionRetryQueue EmptyFolderCleaner *empty_folder_cleanup.EmptyFolderCleaner EmptyFolderCleanupDelay time.Duration - inodeSequencer sequence.Sequencer } func NewFiler(masters pb.ServerDiscovery, grpcDialOption grpc.DialOption, filerHost pb.ServerAddress, filerGroup string, collection string, replication string, dataCenter string, maxFilenameLength uint32, notifyFn func()) *Filer { @@ -79,7 +77,6 @@ func NewFiler(masters pb.ServerDiscovery, grpcDialOption grpc.DialOption, filerH MaxFilenameLength: maxFilenameLength, deletionQuit: make(chan struct{}), DeletionRetryQueue: NewDeletionRetryQueue(), - inodeSequencer: newInodeSequencer(filerHost), } if f.UniqueFilerId < 0 { f.UniqueFilerId = -f.UniqueFilerId diff --git a/weed/filer/filer_inode.go b/weed/filer/filer_inode.go index e4cacfa27..e5c4c5e26 100644 --- a/weed/filer/filer_inode.go +++ b/weed/filer/filer_inode.go @@ -1,51 +1,26 @@ package filer import ( - "os" - "strconv" + "time" - "github.com/seaweedfs/seaweedfs/weed/glog" - "github.com/seaweedfs/seaweedfs/weed/pb" - "github.com/seaweedfs/seaweedfs/weed/sequence" + "github.com/seaweedfs/seaweedfs/weed/util" ) -// newInodeSequencer constructs the inode sequencer used to assign object -// identity for filer entries. The Snowflake node id defaults to a masked hash -// of filerHost, which only has 1024 possible values; operators running a -// multi-filer cluster should set SEAWEEDFS_FILER_SNOWFLAKE_ID to an explicit -// per-filer value (1..1023) to avoid birthday-paradox collisions. -// -// Initialization failures are fatal: a process-local fallback allocator would -// re-use inode values across restarts and violate the stable object identity -// guarantee that NFS filehandles and the inode secondary index rely on. -func newInodeSequencer(filerHost pb.ServerAddress) sequence.Sequencer { - snowflakeId := parseSnowflakeIdFromEnv() - seq, err := sequence.NewSnowflakeSequencer(string(filerHost), snowflakeId) - if err != nil { - glog.Fatalf("initialize inode sequencer for filer %s (snowflakeId=%d): %v", filerHost, snowflakeId, err) - } - return seq -} - -func parseSnowflakeIdFromEnv() int { - raw := os.Getenv("SEAWEEDFS_FILER_SNOWFLAKE_ID") - if raw == "" { - return 0 - } - id, err := strconv.Atoi(raw) - if err != nil || id < 0 || id > 0x3ff { - glog.Fatalf("SEAWEEDFS_FILER_SNOWFLAKE_ID must be an integer in [0,1023], got %q", raw) - } - return id -} - +// ensureEntryInode derives a stable inode the same way the FUSE mount does, so +// the filer-stored value matches what a mount would otherwise compute and no +// per-object reverse index is required. Hard links hash their shared +// HardLinkId, so every link resolves to one inode; other entries hash the path +// and creation time. func (f *Filer) ensureEntryInode(entry *Entry) { if entry == nil || entry.Attr.Inode != 0 { return } - entry.Attr.Inode = f.nextInode() -} - -func (f *Filer) nextInode() uint64 { - return f.inodeSequencer.NextFileId(1) + if entry.Attr.Crtime.IsZero() { + entry.Attr.Crtime = time.Now() + } + if len(entry.HardLinkId) > 0 { + entry.Attr.Inode = uint64(util.HashStringToLong(string(entry.HardLinkId))) + return + } + entry.Attr.Inode = entry.FullPath.AsInode(entry.Attr.Crtime.Unix()) } diff --git a/weed/filer/filer_inode_index.go b/weed/filer/filer_inode_index.go deleted file mode 100644 index ba0e46ae7..000000000 --- a/weed/filer/filer_inode_index.go +++ /dev/null @@ -1,300 +0,0 @@ -package filer - -import ( - "context" - "encoding/binary" - "encoding/json" - "sort" - - "github.com/seaweedfs/seaweedfs/weed/glog" - "github.com/seaweedfs/seaweedfs/weed/util" -) - -const inodeIndexKeyPrefix = "filer.inode.path." -const InodeIndexInitialGeneration uint64 = 1 - -type inodeIndexEntry struct { - path util.FullPath - inode uint64 -} - -type InodeIndexRecord struct { - Generation uint64 `json:"generation,omitempty"` - Paths []string `json:"paths,omitempty"` -} - -func InodeIndexKey(inode uint64) []byte { - key := make([]byte, len(inodeIndexKeyPrefix)+8) - copy(key, inodeIndexKeyPrefix) - binary.BigEndian.PutUint64(key[len(inodeIndexKeyPrefix):], inode) - return key -} - -func DecodeInodeIndexRecord(value []byte) (*InodeIndexRecord, error) { - if len(value) == 0 { - return &InodeIndexRecord{}, nil - } - - // The first foundation slice stored the current path as raw bytes. Keep that - // format readable so existing records are transparently upgraded on write. - if value[0] != '{' { - record := &InodeIndexRecord{Generation: InodeIndexInitialGeneration} - record.addPath(util.FullPath(value)) - return record, nil - } - - record := &InodeIndexRecord{} - if err := json.Unmarshal(value, record); err != nil { - return nil, err - } - record.normalize() - return record, nil -} - -func (record *InodeIndexRecord) Encode() ([]byte, error) { - record.normalize() - return json.Marshal(record) -} - -func (record *InodeIndexRecord) normalize() { - if len(record.Paths) == 0 { - return - } - if record.Generation == 0 { - record.Generation = InodeIndexInitialGeneration - } - - sanitized := make([]string, 0, len(record.Paths)) - for _, path := range record.Paths { - if path == "" { - continue - } - sanitized = append(sanitized, path) - } - if len(sanitized) == 0 { - record.Paths = nil - return - } - - sort.Strings(sanitized) - deduped := sanitized[:1] - for _, path := range sanitized[1:] { - if path == deduped[len(deduped)-1] { - continue - } - deduped = append(deduped, path) - } - record.Paths = deduped -} - -func (record *InodeIndexRecord) addPath(path util.FullPath) bool { - if path == "" { - return false - } - record.normalize() - target := string(path) - index := sort.SearchStrings(record.Paths, target) - if index < len(record.Paths) && record.Paths[index] == target { - return false - } - record.Paths = append(record.Paths, "") - copy(record.Paths[index+1:], record.Paths[index:]) - record.Paths[index] = target - return true -} - -func (record *InodeIndexRecord) removePath(path util.FullPath) bool { - if len(record.Paths) == 0 || path == "" { - return false - } - record.normalize() - target := string(path) - index := sort.SearchStrings(record.Paths, target) - if index >= len(record.Paths) || record.Paths[index] != target { - return false - } - record.Paths = append(record.Paths[:index], record.Paths[index+1:]...) - if len(record.Paths) == 0 { - record.Paths = nil - } - return true -} - -func (record *InodeIndexRecord) CanonicalPath() util.FullPath { - record.normalize() - if len(record.Paths) == 0 { - return "" - } - return util.FullPath(record.Paths[0]) -} - -func (record *InodeIndexRecord) FullPaths() []util.FullPath { - record.normalize() - if len(record.Paths) == 0 { - return nil - } - paths := make([]util.FullPath, 0, len(record.Paths)) - for _, path := range record.Paths { - paths = append(paths, util.FullPath(path)) - } - return paths -} - -func (fsw *FilerStoreWrapper) lookupInodeIndex(ctx context.Context, inode uint64) (*InodeIndexRecord, error) { - if inode == 0 { - return nil, ErrKvNotFound - } - - value, err := fsw.KvGet(ctx, InodeIndexKey(inode)) - if err != nil { - return nil, err - } - - return DecodeInodeIndexRecord(value) -} - -func (fsw *FilerStoreWrapper) storeInodeIndex(ctx context.Context, path util.FullPath, inode uint64) error { - if inode == 0 || path == "" { - return nil - } - - record, err := fsw.lookupInodeIndex(ctx, inode) - if err != nil { - if err != ErrKvNotFound { - return err - } - record = &InodeIndexRecord{Generation: InodeIndexInitialGeneration} - } - record.addPath(path) - - value, err := record.Encode() - if err != nil { - return err - } - return fsw.KvPut(ctx, InodeIndexKey(inode), value) -} - -func (fsw *FilerStoreWrapper) lookupInodePath(ctx context.Context, inode uint64) (util.FullPath, error) { - record, err := fsw.lookupInodeIndex(ctx, inode) - if err != nil { - return "", err - } - - path := record.CanonicalPath() - if path == "" { - return "", ErrKvNotFound - } - return path, nil -} - -func (fsw *FilerStoreWrapper) lookupInodePaths(ctx context.Context, inode uint64) ([]util.FullPath, error) { - record, err := fsw.lookupInodeIndex(ctx, inode) - if err != nil { - return nil, err - } - - paths := record.FullPaths() - if len(paths) == 0 { - return nil, ErrKvNotFound - } - return paths, nil -} - -func (fsw *FilerStoreWrapper) removePathFromInodeIndex(ctx context.Context, path util.FullPath, inode uint64) error { - if inode == 0 || path == "" { - return nil - } - - record, err := fsw.lookupInodeIndex(ctx, inode) - if err != nil { - if err == ErrKvNotFound { - return nil - } - return err - } - - if !record.removePath(path) { - return nil - } - if len(record.Paths) == 0 { - return fsw.KvDelete(ctx, InodeIndexKey(inode)) - } - - value, err := record.Encode() - if err != nil { - return err - } - return fsw.KvPut(ctx, InodeIndexKey(inode), value) -} - -func (fsw *FilerStoreWrapper) collectInodeIndexEntries(ctx context.Context, dirPath util.FullPath) ([]inodeIndexEntry, error) { - // Honor caller cancellation during the walk: a DeleteFolderChildren on a - // pathological directory could otherwise loop indefinitely gathering - // entries even after the client has given up, turning into a DoS vector. - // If the walk is aborted, the caller treats the index cleanup as - // best-effort and drops the partial result. - var collected []inodeIndexEntry - if err := fsw.collectInodeIndexEntriesRecursive(ctx, dirPath, &collected); err != nil { - return nil, err - } - return collected, nil -} - -func (fsw *FilerStoreWrapper) collectInodeIndexEntriesRecursive(ctx context.Context, dirPath util.FullPath, collected *[]inodeIndexEntry) error { - actualStore := fsw.getActualStore(dirPath + "/") - - lastFileName := "" - includeStartFile := false - for { - page := make([]*Entry, 0, PaginationSize) - nextLastFileName, err := actualStore.ListDirectoryEntries(ctx, dirPath, lastFileName, includeStartFile, PaginationSize, func(entry *Entry) (bool, error) { - page = append(page, entry) - return true, nil - }) - if err != nil { - return err - } - - for _, entry := range page { - if entry.Attr.Inode != 0 { - *collected = append(*collected, inodeIndexEntry{path: entry.FullPath, inode: entry.Attr.Inode}) - } - if entry.IsDirectory() { - if err := fsw.collectInodeIndexEntriesRecursive(ctx, entry.FullPath, collected); err != nil { - return err - } - } - } - - if len(page) < PaginationSize { - return nil - } - lastFileName = nextLastFileName - includeStartFile = false - } -} - -// recordInodeIndexWrite updates the inode→path secondary index after the -// primary store mutation has already succeeded. The index is best-effort: a -// failure here must not surface as an operation error, because the caller -// would then observe a failed create/update even though the entry was -// persisted, and a retry cannot heal the index (DeleteEntry exits early once -// the entry is gone). We log and let later writes rebuild the record. -func (fsw *FilerStoreWrapper) recordInodeIndexWrite(ctx context.Context, op string, path util.FullPath, inode uint64) { - if inode == 0 || path == "" { - return - } - if err := fsw.storeInodeIndex(ctx, path, inode); err != nil { - glog.WarningfCtx(ctx, "%s: update inode index for %s (inode %d): %v", op, path, inode, err) - } -} - -// recordInodeIndexRemoval mirrors recordInodeIndexWrite for removals. -func (fsw *FilerStoreWrapper) recordInodeIndexRemoval(ctx context.Context, op string, path util.FullPath, inode uint64) { - if inode == 0 || path == "" { - return - } - if err := fsw.removePathFromInodeIndex(ctx, path, inode); err != nil { - glog.WarningfCtx(ctx, "%s: clear inode index for %s (inode %d): %v", op, path, inode, err) - } -} diff --git a/weed/filer/filer_inode_index_test.go b/weed/filer/filer_inode_index_test.go deleted file mode 100644 index bee0e6c65..000000000 --- a/weed/filer/filer_inode_index_test.go +++ /dev/null @@ -1,206 +0,0 @@ -package filer - -import ( - "context" - "os" - "testing" - - "github.com/seaweedfs/seaweedfs/weed/util" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestFilerStoreWrapperMaintainsInodeIndexLifecycle(t *testing.T) { - wrapper := NewFilerStoreWrapper(newStubFilerStore()) - ctx := context.Background() - - created := &Entry{ - FullPath: util.FullPath("/docs/report.txt"), - Attr: Attr{ - Mode: 0o644, - Inode: 42, - }, - } - - require.NoError(t, wrapper.InsertEntry(ctx, created)) - path, err := wrapper.lookupInodePath(ctx, created.Attr.Inode) - require.NoError(t, err) - assert.Equal(t, created.FullPath, path) - paths, err := wrapper.lookupInodePaths(ctx, created.Attr.Inode) - require.NoError(t, err) - assert.Equal(t, []util.FullPath{created.FullPath}, paths) - record, err := wrapper.lookupInodeIndex(ctx, created.Attr.Inode) - require.NoError(t, err) - assert.Equal(t, InodeIndexInitialGeneration, record.Generation) - - updated := &Entry{ - FullPath: util.FullPath("/docs/report.txt"), - Attr: Attr{ - Mode: 0o600, - Inode: 42, - }, - } - require.NoError(t, wrapper.UpdateEntry(ctx, updated)) - path, err = wrapper.lookupInodePath(ctx, updated.Attr.Inode) - require.NoError(t, err) - assert.Equal(t, updated.FullPath, path) - - require.NoError(t, wrapper.DeleteEntry(ctx, created.FullPath)) - _, err = wrapper.lookupInodePath(ctx, created.Attr.Inode) - require.ErrorIs(t, err, ErrKvNotFound) -} - -func TestFilerStoreWrapperMaintainsMultiplePathsPerInode(t *testing.T) { - wrapper := NewFilerStoreWrapper(newStubFilerStore()) - ctx := context.Background() - inode := uint64(88) - hardLinkId := NewHardLinkId() - - require.NoError(t, wrapper.InsertEntry(ctx, &Entry{ - FullPath: util.FullPath("/links/b.txt"), - Attr: Attr{ - Mode: 0o644, - Inode: inode, - }, - HardLinkId: hardLinkId, - HardLinkCounter: 2, - })) - require.NoError(t, wrapper.InsertEntry(ctx, &Entry{ - FullPath: util.FullPath("/links/a.txt"), - Attr: Attr{ - Mode: 0o644, - Inode: inode, - }, - HardLinkId: hardLinkId, - HardLinkCounter: 2, - })) - - paths, err := wrapper.lookupInodePaths(ctx, inode) - require.NoError(t, err) - assert.Equal(t, []util.FullPath{"/links/a.txt", "/links/b.txt"}, paths) - record, err := wrapper.lookupInodeIndex(ctx, inode) - require.NoError(t, err) - assert.Equal(t, InodeIndexInitialGeneration, record.Generation) - - path, err := wrapper.lookupInodePath(ctx, inode) - require.NoError(t, err) - assert.Equal(t, util.FullPath("/links/a.txt"), path) - - require.NoError(t, wrapper.DeleteEntry(ctx, util.FullPath("/links/a.txt"))) - - paths, err = wrapper.lookupInodePaths(ctx, inode) - require.NoError(t, err) - assert.Equal(t, []util.FullPath{"/links/b.txt"}, paths) - - path, err = wrapper.lookupInodePath(ctx, inode) - require.NoError(t, err) - assert.Equal(t, util.FullPath("/links/b.txt"), path) -} - -func TestFilerStoreWrapperUpgradesLegacySinglePathInodeIndexRecords(t *testing.T) { - wrapper := NewFilerStoreWrapper(newStubFilerStore()) - ctx := context.Background() - inode := uint64(91) - - require.NoError(t, wrapper.KvPut(ctx, InodeIndexKey(inode), []byte("/legacy/path.txt"))) - - path, err := wrapper.lookupInodePath(ctx, inode) - require.NoError(t, err) - assert.Equal(t, util.FullPath("/legacy/path.txt"), path) - - paths, err := wrapper.lookupInodePaths(ctx, inode) - require.NoError(t, err) - assert.Equal(t, []util.FullPath{"/legacy/path.txt"}, paths) - - require.NoError(t, wrapper.storeInodeIndex(ctx, util.FullPath("/legacy/second.txt"), inode)) - - paths, err = wrapper.lookupInodePaths(ctx, inode) - require.NoError(t, err) - assert.Equal(t, []util.FullPath{"/legacy/path.txt", "/legacy/second.txt"}, paths) - - value, err := wrapper.KvGet(ctx, InodeIndexKey(inode)) - require.NoError(t, err) - assert.JSONEq(t, `{"generation":1,"paths":["/legacy/path.txt","/legacy/second.txt"]}`, string(value)) -} - -func TestFilerStoreWrapperKeepsInodeIndexWhenDeleteArrivesAfterRenameInsert(t *testing.T) { - wrapper := NewFilerStoreWrapper(newStubFilerStore()) - ctx := context.Background() - inode := uint64(77) - - require.NoError(t, wrapper.InsertEntry(ctx, &Entry{ - FullPath: util.FullPath("/old/name.txt"), - Attr: Attr{ - Mode: 0o644, - Inode: inode, - }, - })) - require.NoError(t, wrapper.InsertEntry(ctx, &Entry{ - FullPath: util.FullPath("/new/name.txt"), - Attr: Attr{ - Mode: 0o644, - Inode: inode, - }, - })) - require.NoError(t, wrapper.DeleteEntry(ctx, util.FullPath("/old/name.txt"))) - - path, err := wrapper.lookupInodePath(ctx, inode) - require.NoError(t, err) - assert.Equal(t, util.FullPath("/new/name.txt"), path) - - paths, err := wrapper.lookupInodePaths(ctx, inode) - require.NoError(t, err) - assert.Equal(t, []util.FullPath{"/new/name.txt"}, paths) -} - -func TestRecursiveDeleteRemovesDescendantInodeIndexes(t *testing.T) { - f, store := newTestFilerWithStubStore() - ctx := context.Background() - - entries := []*Entry{ - { - FullPath: util.FullPath("/tree"), - Attr: Attr{ - Mode: os.ModeDir | 0o755, - Inode: 100, - }, - }, - { - FullPath: util.FullPath("/tree/file.txt"), - Attr: Attr{ - Mode: 0o644, - Inode: 101, - }, - }, - { - FullPath: util.FullPath("/tree/subdir"), - Attr: Attr{ - Mode: os.ModeDir | 0o755, - Inode: 102, - }, - }, - { - FullPath: util.FullPath("/tree/subdir/nested.txt"), - Attr: Attr{ - Mode: 0o644, - Inode: 103, - }, - }, - } - - for _, entry := range entries { - require.NoError(t, f.Store.InsertEntry(ctx, entry)) - } - - require.NoError(t, f.DeleteEntryMetaAndData(ctx, util.FullPath("/tree"), true, false, false, false, nil, 0)) - - for _, inode := range []uint64{100, 101, 102, 103} { - _, err := f.Store.(*FilerStoreWrapper).lookupInodePath(ctx, inode) - require.ErrorIs(t, err, ErrKvNotFound) - } - - for _, path := range []string{"/tree", "/tree/file.txt", "/tree/subdir", "/tree/subdir/nested.txt"} { - _, err := store.FindEntry(ctx, util.FullPath(path)) - require.Error(t, err) - } -} diff --git a/weed/filer/filer_inode_test.go b/weed/filer/filer_inode_test.go index e19ae680a..41c8883a9 100644 --- a/weed/filer/filer_inode_test.go +++ b/weed/filer/filer_inode_test.go @@ -4,6 +4,7 @@ import ( "context" "os" "testing" + "time" "github.com/seaweedfs/seaweedfs/weed/pb" "github.com/seaweedfs/seaweedfs/weed/util" @@ -11,6 +12,47 @@ import ( "github.com/stretchr/testify/require" ) +func TestEnsureEntryInodeMatchesFuseDerivation(t *testing.T) { + f := &Filer{} + crtime := time.Unix(1700000000, 0) + + entry := &Entry{ + FullPath: util.FullPath("/dir/file.txt"), + Attr: Attr{Crtime: crtime}, + } + f.ensureEntryInode(entry) + + // The filer stores exactly what the FUSE mount would compute for a + // non-hard-linked entry, and it is deterministic across calls. + assert.Equal(t, entry.FullPath.AsInode(crtime.Unix()), entry.Attr.Inode) + again := &Entry{FullPath: entry.FullPath, Attr: Attr{Crtime: crtime}} + f.ensureEntryInode(again) + assert.Equal(t, entry.Attr.Inode, again.Attr.Inode) +} + +func TestEnsureEntryInodeSharesAcrossHardLinks(t *testing.T) { + f := &Filer{} + hardLinkId := NewHardLinkId() + + a := &Entry{ + FullPath: util.FullPath("/links/a.txt"), + Attr: Attr{Crtime: time.Unix(1700000000, 0)}, + HardLinkId: hardLinkId, + } + b := &Entry{ + FullPath: util.FullPath("/links/b.txt"), + Attr: Attr{Crtime: time.Unix(1800000000, 0)}, + HardLinkId: hardLinkId, + } + f.ensureEntryInode(a) + f.ensureEntryInode(b) + + // Every link to the same target resolves to one inode, independent of path + // or creation time. + assert.Equal(t, uint64(util.HashStringToLong(string(hardLinkId))), a.Attr.Inode) + assert.Equal(t, a.Attr.Inode, b.Attr.Inode) +} + func newTestFilerWithStubStore() (*Filer, *stubFilerStore) { store := newStubFilerStore() f := NewFiler(pb.ServerDiscovery{}, nil, "", "", "", "", "", 255, nil) diff --git a/weed/filer/filer_lazy_remote_test.go b/weed/filer/filer_lazy_remote_test.go index 18973c75a..6ad5e4449 100644 --- a/weed/filer/filer_lazy_remote_test.go +++ b/weed/filer/filer_lazy_remote_test.go @@ -276,7 +276,6 @@ func newTestFiler(t *testing.T, store *stubFilerStore, rs *FilerRemoteStorage) * FilerConf: NewFilerConf(), MaxFilenameLength: 255, MasterClient: mc, - inodeSequencer: newInodeSequencer("test-filer"), fileIdDeletionQueue: util.NewUnboundedQueue(), deletionQuit: make(chan struct{}), LocalMetaLogBuffer: log_buffer.NewLogBuffer("test", time.Minute, diff --git a/weed/filer/filerstore_wrapper.go b/weed/filer/filerstore_wrapper.go index b9ce81427..d742380ae 100644 --- a/weed/filer/filerstore_wrapper.go +++ b/weed/filer/filerstore_wrapper.go @@ -132,7 +132,6 @@ func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) err return err } ctx = context.WithoutCancel(ctx) - fullPath := entry.FullPath actualStore := fsw.getActualStore(entry.FullPath) stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "insert").Inc() start := time.Now() @@ -152,11 +151,7 @@ func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) err return err } - if err := actualStore.InsertEntry(ctx, entry); err != nil { - return err - } - fsw.recordInodeIndexWrite(ctx, "InsertEntry", fullPath, entry.Attr.Inode) - return nil + return actualStore.InsertEntry(ctx, entry) } // InsertEntryKnownAbsent skips the pre-insert FindEntry path when the caller has @@ -166,7 +161,6 @@ func (fsw *FilerStoreWrapper) InsertEntryKnownAbsent(ctx context.Context, entry return err } ctx = context.WithoutCancel(ctx) - fullPath := entry.FullPath actualStore := fsw.getActualStore(entry.FullPath) stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "insert").Inc() start := time.Now() @@ -185,11 +179,7 @@ func (fsw *FilerStoreWrapper) InsertEntryKnownAbsent(ctx context.Context, entry } } - if err := actualStore.InsertEntry(ctx, entry); err != nil { - return err - } - fsw.recordInodeIndexWrite(ctx, "InsertEntryKnownAbsent", fullPath, entry.Attr.Inode) - return nil + return actualStore.InsertEntry(ctx, entry) } func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) error { @@ -197,7 +187,6 @@ func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) err return err } ctx = context.WithoutCancel(ctx) - fullPath := entry.FullPath actualStore := fsw.getActualStore(entry.FullPath) stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "update").Inc() start := time.Now() @@ -217,11 +206,7 @@ func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) err return err } - if err := actualStore.UpdateEntry(ctx, entry); err != nil { - return err - } - fsw.recordInodeIndexWrite(ctx, "UpdateEntry", fullPath, entry.Attr.Inode) - return nil + return actualStore.UpdateEntry(ctx, entry) } func normalizeEntryMimeForStore(entry *Entry) { @@ -273,8 +258,6 @@ func (fsw *FilerStoreWrapper) DeleteEntry(ctx context.Context, fp util.FullPath) if findErr == filer_pb.ErrNotFound || existingEntry == nil { return nil } - inode := existingEntry.Attr.Inode - fullPath := existingEntry.FullPath if len(existingEntry.HardLinkId) != 0 { // remove hard link op := ctx.Value("OP") @@ -289,11 +272,7 @@ func (fsw *FilerStoreWrapper) DeleteEntry(ctx context.Context, fp util.FullPath) } } - if err := actualStore.DeleteEntry(ctx, fp); err != nil { - return err - } - fsw.recordInodeIndexRemoval(ctx, "DeleteEntry", fullPath, inode) - return nil + return actualStore.DeleteEntry(ctx, fp) } func (fsw *FilerStoreWrapper) DeleteOneEntry(ctx context.Context, existingEntry *Entry) (err error) { @@ -301,8 +280,6 @@ func (fsw *FilerStoreWrapper) DeleteOneEntry(ctx context.Context, existingEntry return err } ctx = context.WithoutCancel(ctx) - fullPath := existingEntry.FullPath - inode := existingEntry.Attr.Inode actualStore := fsw.getActualStore(existingEntry.FullPath) stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "delete").Inc() start := time.Now() @@ -325,11 +302,7 @@ func (fsw *FilerStoreWrapper) DeleteOneEntry(ctx context.Context, existingEntry } } - if err := actualStore.DeleteEntry(ctx, existingEntry.FullPath); err != nil { - return err - } - fsw.recordInodeIndexRemoval(ctx, "DeleteOneEntry", fullPath, inode) - return nil + return actualStore.DeleteEntry(ctx, existingEntry.FullPath) } func (fsw *FilerStoreWrapper) DeleteFolderChildren(ctx context.Context, fp util.FullPath) (err error) { @@ -344,20 +317,7 @@ func (fsw *FilerStoreWrapper) DeleteFolderChildren(ctx context.Context, fp util. stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "deleteFolderChildren").Observe(time.Since(start).Seconds()) }() - collected, err := fsw.collectInodeIndexEntries(ctx, fp) - if err != nil { - // Index collection is best-effort: a failure here only prevents inode - // index housekeeping, not the directory removal itself. - glog.WarningfCtx(ctx, "collectInodeIndexEntries %s: %v; deleting folder children without index cleanup", fp, err) - collected = nil - } - if err := actualStore.DeleteFolderChildren(ctx, fp); err != nil { - return err - } - for _, entry := range collected { - fsw.recordInodeIndexRemoval(ctx, "DeleteFolderChildren", entry.path, entry.inode) - } - return nil + return actualStore.DeleteFolderChildren(ctx, fp) } func (fsw *FilerStoreWrapper) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc ListEachEntryFunc) (string, error) { diff --git a/weed/server/nfs/access.go b/weed/server/nfs/access.go deleted file mode 100644 index 85ae5583f..000000000 --- a/weed/server/nfs/access.go +++ /dev/null @@ -1,140 +0,0 @@ -package nfs - -import ( - "fmt" - "net" - "strings" - - "github.com/seaweedfs/seaweedfs/weed/glog" -) - -type clientAuthorizer struct { - exact map[string]struct{} - cidrs map[string]*net.IPNet - enabled bool -} - -func newClientAuthorizer(allowed []string) (*clientAuthorizer, error) { - authorizer := &clientAuthorizer{ - exact: make(map[string]struct{}), - cidrs: make(map[string]*net.IPNet), - } - - for _, raw := range allowed { - entry := strings.TrimSpace(raw) - if entry == "" { - continue - } - if strings.Contains(entry, "/") { - _, network, err := net.ParseCIDR(entry) - if err != nil { - return nil, fmt.Errorf("parse allowed NFS client %q: %w", entry, err) - } - authorizer.cidrs[entry] = network - authorizer.enabled = true - continue - } - - if ip := normalizeClientIP(entry); ip != nil { - authorizer.exact[ip.String()] = struct{}{} - authorizer.enabled = true - continue - } - - ips, err := net.LookupIP(entry) - if err != nil { - return nil, fmt.Errorf("resolve allowed NFS client %q: %w", entry, err) - } - if len(ips) == 0 { - return nil, fmt.Errorf("resolve allowed NFS client %q: no addresses", entry) - } - authorizer.exact[entry] = struct{}{} - for _, ip := range ips { - if ip == nil { - continue - } - authorizer.exact[ip.String()] = struct{}{} - } - authorizer.enabled = true - } - - return authorizer, nil -} - -func (a *clientAuthorizer) isAllowedConn(conn net.Conn) bool { - if conn == nil { - return true - } - return a.isAllowedAddr(conn.RemoteAddr()) -} - -func (a *clientAuthorizer) isAllowedAddr(addr net.Addr) bool { - if a == nil || !a.enabled { - return true - } - if addr == nil { - return false - } - - host := remoteHost(addr.String()) - if host == "" { - return false - } - if _, found := a.exact[host]; found { - return true - } - - ip := normalizeClientIP(host) - if ip == nil { - return false - } - if _, found := a.exact[ip.String()]; found { - return true - } - for _, network := range a.cidrs { - if network.Contains(ip) { - return true - } - } - return false -} - -func remoteHost(remote string) string { - host, _, err := net.SplitHostPort(strings.TrimSpace(remote)) - if err == nil { - return host - } - - host = strings.TrimSpace(remote) - if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") { - host = host[1 : len(host)-1] - } - return host -} - -func normalizeClientIP(host string) net.IP { - host = strings.TrimSpace(host) - if zoneIndex := strings.LastIndex(host, "%"); zoneIndex >= 0 { - host = host[:zoneIndex] - } - return net.ParseIP(host) -} - -type allowlistListener struct { - net.Listener - authorizer *clientAuthorizer -} - -func (l *allowlistListener) Accept() (net.Conn, error) { - for { - conn, err := l.Listener.Accept() - if err != nil { - return nil, err - } - if l.authorizer == nil || l.authorizer.isAllowedConn(conn) { - return conn, nil - } - glog.V(0).Infof("reject unauthorized nfs client %s", conn.RemoteAddr()) - _ = conn.Close() - } -} diff --git a/weed/server/nfs/access_test.go b/weed/server/nfs/access_test.go deleted file mode 100644 index 105de2b96..000000000 --- a/weed/server/nfs/access_test.go +++ /dev/null @@ -1,29 +0,0 @@ -package nfs - -import ( - "net" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestClientAuthorizerResolvesHostnameEntries(t *testing.T) { - ips, err := net.LookupIP("localhost") - require.NoError(t, err) - require.NotEmpty(t, ips) - - authorizer, err := newClientAuthorizer([]string{"localhost"}) - require.NoError(t, err) - - matched := false - for _, ip := range ips { - if authorizer.isAllowedAddr(&net.TCPAddr{IP: ip, Port: 2049}) { - matched = true - break - } - } - - assert.True(t, matched) - assert.False(t, authorizer.isAllowedAddr(&net.TCPAddr{IP: net.ParseIP("192.0.2.10"), Port: 2049})) -} diff --git a/weed/server/nfs/filehandle.go b/weed/server/nfs/filehandle.go deleted file mode 100644 index fd8036131..000000000 --- a/weed/server/nfs/filehandle.go +++ /dev/null @@ -1,251 +0,0 @@ -package nfs - -import ( - "context" - "encoding/binary" - "errors" - "fmt" - "hash/crc32" - "strings" - - "github.com/seaweedfs/seaweedfs/weed/filer" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" - "google.golang.org/grpc" -) - -const ( - fileHandleVersion = 1 - fileHandleLength = 28 -) - -var ( - ErrInvalidHandle = errors.New("invalid nfs filehandle") - ErrHandleExportMismatch = errors.New("nfs filehandle export mismatch") - ErrStaleHandle = errors.New("stale nfs filehandle") -) - -type FileHandleKind uint8 - -const ( - FileHandleKindUnknown FileHandleKind = 0 - FileHandleKindFile FileHandleKind = 1 - FileHandleKindDirectory FileHandleKind = 2 -) - -type FileHandle struct { - Kind FileHandleKind - ExportID uint32 - Inode uint64 - Generation uint64 -} - -type filerResolverClient interface { - KvGet(ctx context.Context, in *filer_pb.KvGetRequest, opts ...grpc.CallOption) (*filer_pb.KvGetResponse, error) - LookupDirectoryEntry(ctx context.Context, in *filer_pb.LookupDirectoryEntryRequest, opts ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error) -} - -type Resolver struct { - exportRoot util.FullPath - exportID uint32 - client filerResolverClient -} - -type ResolvedHandle struct { - Handle FileHandle - Path util.FullPath - Entry *filer_pb.Entry -} - -func NewFileHandle(exportID uint32, kind FileHandleKind, inode, generation uint64) FileHandle { - if generation == 0 { - generation = filer.InodeIndexInitialGeneration - } - return FileHandle{ - Kind: kind, - ExportID: exportID, - Inode: inode, - Generation: generation, - } -} - -func (h FileHandle) Encode() []byte { - buf := make([]byte, fileHandleLength) - buf[0] = fileHandleVersion - buf[1] = byte(h.Kind) - binary.BigEndian.PutUint32(buf[4:8], h.ExportID) - binary.BigEndian.PutUint64(buf[8:16], h.Inode) - binary.BigEndian.PutUint64(buf[16:24], h.Generation) - binary.BigEndian.PutUint32(buf[24:28], crc32.ChecksumIEEE(buf[:24])) - return buf -} - -func DecodeFileHandle(raw []byte) (FileHandle, error) { - if len(raw) != fileHandleLength { - return FileHandle{}, fmt.Errorf("%w: unexpected length %d", ErrInvalidHandle, len(raw)) - } - if raw[0] != fileHandleVersion { - return FileHandle{}, fmt.Errorf("%w: unsupported version %d", ErrInvalidHandle, raw[0]) - } - - wantChecksum := binary.BigEndian.Uint32(raw[24:28]) - gotChecksum := crc32.ChecksumIEEE(raw[:24]) - if wantChecksum != gotChecksum { - return FileHandle{}, fmt.Errorf("%w: checksum mismatch", ErrInvalidHandle) - } - - handle := FileHandle{ - Kind: FileHandleKind(raw[1]), - ExportID: binary.BigEndian.Uint32(raw[4:8]), - Inode: binary.BigEndian.Uint64(raw[8:16]), - Generation: binary.BigEndian.Uint64(raw[16:24]), - } - if handle.Generation == 0 { - return FileHandle{}, fmt.Errorf("%w: empty generation", ErrInvalidHandle) - } - return handle, nil -} - -func NewResolver(exportRoot util.FullPath, client filerResolverClient) *Resolver { - root := normalizeExportRoot(exportRoot) - return &Resolver{ - exportRoot: root, - exportID: exportIDForRoot(root), - client: client, - } -} - -func (r *Resolver) ExportID() uint32 { - if r == nil { - return 0 - } - return r.exportID -} - -func (r *Resolver) ResolveHandle(ctx context.Context, raw []byte) (*ResolvedHandle, error) { - if r == nil || r.client == nil { - return nil, errors.New("nfs resolver is not configured") - } - - handle, err := DecodeFileHandle(raw) - if err != nil { - return nil, err - } - if handle.ExportID != r.exportID { - return nil, ErrHandleExportMismatch - } - if handle.Inode == 0 { - return r.resolveSyntheticRoot(ctx, handle) - } - - kvResp, err := r.client.KvGet(ctx, &filer_pb.KvGetRequest{Key: filer.InodeIndexKey(handle.Inode)}) - if err != nil { - return nil, err - } - if kvResp.GetError() != "" { - return nil, errors.New(kvResp.GetError()) - } - if len(kvResp.GetValue()) == 0 { - return nil, ErrStaleHandle - } - - record, err := filer.DecodeInodeIndexRecord(kvResp.GetValue()) - if err != nil { - return nil, err - } - if record.Generation != handle.Generation { - return nil, ErrStaleHandle - } - - for _, path := range record.FullPaths() { - if !pathVisibleFromExport(path, r.exportRoot) { - continue - } - - dir, name := path.DirAndName() - lookupResp, lookupErr := r.client.LookupDirectoryEntry(ctx, &filer_pb.LookupDirectoryEntryRequest{ - Directory: dir, - Name: name, - }) - if isLookupNotFound(lookupErr) || lookupResp == nil || lookupResp.Entry == nil { - continue - } - if lookupErr != nil { - return nil, lookupErr - } - if attrs := lookupResp.Entry.Attributes; attrs != nil && attrs.Inode != 0 && attrs.Inode != handle.Inode { - continue - } - if handle.Kind == FileHandleKindDirectory && !lookupResp.Entry.IsDirectory { - continue - } - if handle.Kind == FileHandleKindFile && lookupResp.Entry.IsDirectory { - continue - } - - return &ResolvedHandle{ - Handle: handle, - Path: path, - Entry: lookupResp.Entry, - }, nil - } - - return nil, ErrStaleHandle -} - -func (r *Resolver) resolveSyntheticRoot(ctx context.Context, handle FileHandle) (*ResolvedHandle, error) { - if handle.Kind != FileHandleKindDirectory || handle.Generation != filer.InodeIndexInitialGeneration { - return nil, ErrStaleHandle - } - - dir, name := r.exportRoot.DirAndName() - lookupResp, err := r.client.LookupDirectoryEntry(ctx, &filer_pb.LookupDirectoryEntryRequest{ - Directory: dir, - Name: name, - }) - if isLookupNotFound(err) { - return &ResolvedHandle{ - Handle: handle, - Path: r.exportRoot, - Entry: syntheticRootEntry(), - }, nil - } - if err != nil { - return nil, err - } - if lookupResp == nil || lookupResp.Entry == nil { - return &ResolvedHandle{ - Handle: handle, - Path: r.exportRoot, - Entry: syntheticRootEntry(), - }, nil - } - - return &ResolvedHandle{ - Handle: handle, - Path: r.exportRoot, - Entry: lookupResp.Entry, - }, nil -} - -func normalizeExportRoot(root util.FullPath) util.FullPath { - if normalized := util.NormalizePath(string(root)); normalized != "" { - return normalized - } - return "/" -} - -func exportIDForRoot(root util.FullPath) uint32 { - return crc32.ChecksumIEEE([]byte(normalizeExportRoot(root))) -} - -func pathVisibleFromExport(path, exportRoot util.FullPath) bool { - return path == exportRoot || path.IsUnder(exportRoot) -} - -func isLookupNotFound(err error) bool { - if err == nil { - return false - } - return err == filer_pb.ErrNotFound || strings.Contains(err.Error(), filer_pb.ErrNotFound.Error()) -} diff --git a/weed/server/nfs/filehandle_test.go b/weed/server/nfs/filehandle_test.go deleted file mode 100644 index 4fe38a2df..000000000 --- a/weed/server/nfs/filehandle_test.go +++ /dev/null @@ -1,182 +0,0 @@ -package nfs - -import ( - "context" - "testing" - - "github.com/seaweedfs/seaweedfs/weed/filer" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "google.golang.org/grpc" -) - -type fakeResolverClient struct { - kv map[string][]byte - entries map[util.FullPath]*filer_pb.Entry -} - -func (f *fakeResolverClient) KvGet(_ context.Context, in *filer_pb.KvGetRequest, _ ...grpc.CallOption) (*filer_pb.KvGetResponse, error) { - if value, found := f.kv[string(in.Key)]; found { - return &filer_pb.KvGetResponse{Value: value}, nil - } - return &filer_pb.KvGetResponse{}, nil -} - -func (f *fakeResolverClient) LookupDirectoryEntry(_ context.Context, in *filer_pb.LookupDirectoryEntryRequest, _ ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error) { - fullPath := util.NewFullPath(in.Directory, in.Name) - if entry, found := f.entries[fullPath]; found { - return &filer_pb.LookupDirectoryEntryResponse{Entry: entry}, nil - } - return nil, filer_pb.ErrNotFound -} - -func TestFileHandleEncodeDecodeRoundTrip(t *testing.T) { - handle := NewFileHandle(1234, FileHandleKindDirectory, 5678, 9) - - raw := handle.Encode() - decoded, err := DecodeFileHandle(raw) - require.NoError(t, err) - assert.Equal(t, handle, decoded) - - raw[len(raw)-1] ^= 0xff - _, err = DecodeFileHandle(raw) - require.ErrorIs(t, err, ErrInvalidHandle) -} - -func TestResolverUsesPathVisibleFromExportRoot(t *testing.T) { - client := &fakeResolverClient{ - kv: make(map[string][]byte), - entries: make(map[util.FullPath]*filer_pb.Entry), - } - resolver := NewResolver("/exports", client) - - record := &filer.InodeIndexRecord{ - Generation: 7, - Paths: []string{"/a/other.txt", "/exports/demo/link.txt"}, - } - value, err := record.Encode() - require.NoError(t, err) - client.kv[string(filer.InodeIndexKey(101))] = value - client.entries["/exports/demo/link.txt"] = &filer_pb.Entry{ - Name: "link.txt", - Attributes: &filer_pb.FuseAttributes{ - Inode: 101, - }, - } - - handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 101, 7) - resolved, err := resolver.ResolveHandle(context.Background(), handle.Encode()) - require.NoError(t, err) - assert.Equal(t, util.FullPath("/exports/demo/link.txt"), resolved.Path) - require.NotNil(t, resolved.Entry) - assert.Equal(t, uint64(101), resolved.Entry.Attributes.Inode) -} - -func TestResolverRejectsGenerationMismatch(t *testing.T) { - client := &fakeResolverClient{ - kv: make(map[string][]byte), - entries: make(map[util.FullPath]*filer_pb.Entry), - } - resolver := NewResolver("/", client) - - record := &filer.InodeIndexRecord{ - Generation: 3, - Paths: []string{"/data/file.txt"}, - } - value, err := record.Encode() - require.NoError(t, err) - client.kv[string(filer.InodeIndexKey(44))] = value - client.entries["/data/file.txt"] = &filer_pb.Entry{ - Name: "file.txt", - Attributes: &filer_pb.FuseAttributes{ - Inode: 44, - }, - } - - handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 44, 4) - _, err = resolver.ResolveHandle(context.Background(), handle.Encode()) - require.ErrorIs(t, err, ErrStaleHandle) -} - -func TestResolverKeepsHandleValidAcrossRename(t *testing.T) { - client := &fakeResolverClient{ - kv: make(map[string][]byte), - entries: make(map[util.FullPath]*filer_pb.Entry), - } - resolver := NewResolver("/exports", client) - - record := &filer.InodeIndexRecord{ - Generation: 5, - Paths: []string{"/exports/new-name.txt"}, - } - value, err := record.Encode() - require.NoError(t, err) - client.kv[string(filer.InodeIndexKey(88))] = value - client.entries["/exports/new-name.txt"] = &filer_pb.Entry{ - Name: "new-name.txt", - Attributes: &filer_pb.FuseAttributes{ - Inode: 88, - }, - } - - handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 88, 5) - resolved, err := resolver.ResolveHandle(context.Background(), handle.Encode()) - require.NoError(t, err) - assert.Equal(t, util.FullPath("/exports/new-name.txt"), resolved.Path) - require.NotNil(t, resolved.Entry) - assert.Equal(t, uint64(88), resolved.Entry.Attributes.Inode) -} - -func TestResolverRejectsHandleAfterDeleteRecreateWithNewInode(t *testing.T) { - client := &fakeResolverClient{ - kv: make(map[string][]byte), - entries: make(map[util.FullPath]*filer_pb.Entry), - } - resolver := NewResolver("/exports", client) - - client.entries["/exports/file.txt"] = &filer_pb.Entry{ - Name: "file.txt", - Attributes: &filer_pb.FuseAttributes{ - Inode: 999, - }, - } - - record := &filer.InodeIndexRecord{ - Generation: 4, - Paths: []string{"/exports/file.txt"}, - } - value, err := record.Encode() - require.NoError(t, err) - client.kv[string(filer.InodeIndexKey(77))] = value - - handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 77, 4) - _, err = resolver.ResolveHandle(context.Background(), handle.Encode()) - require.ErrorIs(t, err, ErrStaleHandle) -} - -func TestResolverSupportsSyntheticRootHandle(t *testing.T) { - client := &fakeResolverClient{ - kv: make(map[string][]byte), - entries: make(map[util.FullPath]*filer_pb.Entry), - } - resolver := NewResolver("/", client) - - handle := NewFileHandle(resolver.ExportID(), FileHandleKindDirectory, 0, filer.InodeIndexInitialGeneration) - resolved, err := resolver.ResolveHandle(context.Background(), handle.Encode()) - require.NoError(t, err) - assert.Equal(t, util.FullPath("/"), resolved.Path) - require.NotNil(t, resolved.Entry) - assert.True(t, resolved.Entry.IsDirectory) -} - -func TestNewServerNormalizesExportRootAndExportID(t *testing.T) { - server, err := NewServer(&Option{ - FilerRootPath: "/export/path/", - Port: 2049, - }) - require.NoError(t, err) - assert.Equal(t, util.FullPath("/export/path"), server.exportRoot) - assert.Equal(t, exportIDForRoot("/export/path"), server.exportID) -} diff --git a/weed/server/nfs/filesystem.go b/weed/server/nfs/filesystem.go deleted file mode 100644 index 7d239f89a..000000000 --- a/weed/server/nfs/filesystem.go +++ /dev/null @@ -1,1348 +0,0 @@ -package nfs - -import ( - "bytes" - "context" - "errors" - "fmt" - "io" - "os" - "path" - "sort" - "strings" - "time" - - billy "github.com/go-git/go-billy/v5" - "github.com/seaweedfs/seaweedfs/weed/filer" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" - "github.com/seaweedfs/seaweedfs/weed/util/chunk_cache" - "github.com/seaweedfs/seaweedfs/weed/wdclient" - gonfs "github.com/willscott/go-nfs" - gonfsfile "github.com/willscott/go-nfs/file" - "google.golang.org/protobuf/proto" -) - -const ( - // maxInlineWriteSize is the legacy cutoff that decided whether a - // persisted write was inlined into entry.Content or uploaded as a - // chunk. The streaming write path always uploads chunks, so this - // constant is only used when reading back old inline-stored files. - maxInlineWriteSize = 4 << 20 - listEntriesPageSize = 1024 - maxSymlinkDepth = 32 -) - -type noopChunkCache struct{} - -func (noopChunkCache) ReadChunkAt(_ []byte, _ string, _ uint64) (int, error) { return 0, nil } -func (noopChunkCache) SetChunk(_ string, _ []byte) {} -func (noopChunkCache) IsInCache(_ string, _ bool) bool { return false } -func (noopChunkCache) GetMaxFilePartSizeInCache() uint64 { return 0 } - -type seaweedFileSystem struct { - server *Server - actualRoot util.FullPath - readerCache *filer.ReaderCache -} - -type seaweedFileInfo struct { - name string - virtualPath string - size int64 - mode os.FileMode - modTime time.Time - actualPath util.FullPath - entry *filer_pb.Entry - generation uint64 - fileID uint64 - nlink uint32 -} - -type seaweedFile struct { - fs *seaweedFileSystem - virtualPath string - info *seaweedFileInfo - reader io.ReaderAt - offset int64 - writable bool - appendOnly bool - closed bool -} - -var _ billy.Filesystem = (*seaweedFileSystem)(nil) -var _ billy.Capable = (*seaweedFileSystem)(nil) -var _ billy.Change = (*seaweedFileSystem)(nil) -var _ filer_pb.FilerClient = (*seaweedFileSystem)(nil) -var _ gonfs.UnixChange = (*seaweedFileSystem)(nil) - -func newSeaweedFileSystem(server *Server, actualRoot util.FullPath, sharedReaderCache *filer.ReaderCache) *seaweedFileSystem { - fs := &seaweedFileSystem{ - server: server, - actualRoot: normalizeExportRoot(actualRoot), - } - if sharedReaderCache != nil { - fs.readerCache = sharedReaderCache - } else { - fs.readerCache = filer.NewReaderCache(32, chunk_cache.ChunkCache(noopChunkCache{}), fs.LookupFn()) - } - return fs -} - -func (fs *seaweedFileSystem) Capabilities() billy.Capability { - capabilities := billy.ReadCapability | billy.SeekCapability - if !fs.isReadOnly() { - capabilities |= billy.WriteCapability | billy.ReadAndWriteCapability | billy.TruncateCapability - } - return capabilities -} - -func (fs *seaweedFileSystem) Create(filename string) (billy.File, error) { - return fs.OpenFile(filename, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o666) -} - -func (fs *seaweedFileSystem) Open(filename string) (billy.File, error) { - return fs.openFile(context.Background(), filename, os.O_RDONLY, 0) -} - -func (fs *seaweedFileSystem) OpenFile(filename string, flag int, perm os.FileMode) (billy.File, error) { - return fs.openFile(context.Background(), filename, flag, perm) -} - -func (fs *seaweedFileSystem) openFile(ctx context.Context, filename string, flag int, perm os.FileMode) (billy.File, error) { - virtualPath := cleanBillyPath(filename) - writable := flag&(os.O_WRONLY|os.O_RDWR) != 0 - if writable { - if err := fs.ensureWritable(); err != nil { - return nil, err - } - } - - info, err := fs.ensureOpenEntry(ctx, virtualPath, flag, perm) - if err != nil { - return nil, err - } - info, err = fs.followSymlinkInfo(ctx, info, 0) - if err != nil { - return nil, err - } - if info.entry.IsDirectory { - return nil, fmt.Errorf("%s: is a directory", filename) - } - file := &seaweedFile{ - fs: fs, - virtualPath: virtualPath, - info: info, - writable: writable, - appendOnly: writable && flag&os.O_APPEND != 0, - } - if writable { - // O_TRUNC is effectively "rewrite from zero". Drop all chunks and - // inline content up front — but only if there is anything to drop, - // since a fresh empty file already satisfies the O_TRUNC semantics - // and an extra UpdateEntry would just churn metadata. - if flag&os.O_TRUNC != 0 && (filer.FileSize(info.entry) > 0 || len(info.entry.GetChunks()) > 0 || len(info.entry.Content) > 0) { - truncatedEntry, truncErr := fs.truncateEntryToSize(ctx, info.actualPath, 0) - if truncErr != nil { - return nil, truncErr - } - updatedInfo, infoErr := fs.materializeFileInfo(ctx, virtualPath, info.actualPath, truncatedEntry) - if infoErr != nil { - return nil, infoErr - } - file.info = updatedInfo - } - if flag&os.O_APPEND != 0 { - file.offset = int64(filer.FileSize(file.info.entry)) - } - } - return file, nil -} - -func (fs *seaweedFileSystem) Stat(filename string) (os.FileInfo, error) { - return fs.fileInfoForVirtualPathWithOptions(context.Background(), filename, true) -} - -func (fs *seaweedFileSystem) Lstat(filename string) (os.FileInfo, error) { - return fs.fileInfoForVirtualPathWithOptions(context.Background(), filename, false) -} - -func (fs *seaweedFileSystem) Rename(oldpath, newpath string) error { - if err := fs.ensureWritable(); err != nil { - return err - } - oldVirtualPath, oldActualPath := fs.resolvePath(oldpath) - _, newActualPath := fs.resolvePath(newpath) - - if oldVirtualPath == "/" || cleanBillyPath(newpath) == "/" { - return os.ErrPermission - } - if _, err := fs.fileInfoForVirtualPath(context.Background(), oldVirtualPath); err != nil { - return err - } - - oldDir, oldName := oldActualPath.DirAndName() - newDir, newName := newActualPath.DirAndName() - return fs.server.withInternalClient(false, func(client nfsFilerClient) error { - _, err := client.AtomicRenameEntry(context.Background(), &filer_pb.AtomicRenameEntryRequest{ - OldDirectory: oldDir, - OldName: oldName, - NewDirectory: newDir, - NewName: newName, - }) - if err != nil { - if isLookupNotFound(err) { - return os.ErrNotExist - } - return err - } - return nil - }) -} - -func (fs *seaweedFileSystem) Remove(filename string) error { - if err := fs.ensureWritable(); err != nil { - return err - } - virtualPath, actualPath := fs.resolvePath(filename) - if virtualPath == "/" { - return os.ErrPermission - } - if _, err := fs.fileInfoForVirtualPath(context.Background(), virtualPath); err != nil { - return err - } - - dir, name := actualPath.DirAndName() - return fs.server.withInternalClient(false, func(client nfsFilerClient) error { - resp, err := client.DeleteEntry(context.Background(), &filer_pb.DeleteEntryRequest{ - Directory: dir, - Name: name, - IsDeleteData: false, - IsRecursive: false, - }) - if err != nil { - if isLookupNotFound(err) { - return os.ErrNotExist - } - return err - } - if resp != nil && resp.Error != "" { - if strings.Contains(resp.Error, filer_pb.ErrNotFound.Error()) { - return os.ErrNotExist - } - return errors.New(resp.Error) - } - return nil - }) -} - -func (fs *seaweedFileSystem) Join(elem ...string) string { - if len(elem) == 0 { - return "/" - } - joined := path.Join(elem...) - if joined == "." || joined == "" { - return "/" - } - if !strings.HasPrefix(joined, "/") { - joined = "/" + joined - } - return path.Clean(joined) -} - -func (fs *seaweedFileSystem) TempFile(string, string) (billy.File, error) { - return nil, billy.ErrReadOnly -} - -func (fs *seaweedFileSystem) ReadDir(dirname string) ([]os.FileInfo, error) { - ctx := context.Background() - virtualPath, actualPath := fs.resolvePath(dirname) - - var infos []os.FileInfo - startFrom := "" - for { - pageCount := 0 - lastName := "" - err := fs.server.withInternalClient(false, func(client nfsFilerClient) error { - stream, err := client.ListEntries(ctx, &filer_pb.ListEntriesRequest{ - Directory: string(actualPath), - StartFromFileName: startFrom, - InclusiveStartFrom: false, - Limit: listEntriesPageSize, - }) - if err != nil { - if isLookupNotFound(err) { - return os.ErrNotExist - } - return err - } - - for { - resp, recvErr := stream.Recv() - if recvErr == io.EOF { - break - } - if recvErr != nil { - return recvErr - } - if resp == nil || resp.Entry == nil { - continue - } - - lastName = resp.Entry.Name - pageCount++ - - childVirtualPath := path.Join(virtualPath, resp.Entry.Name) - childActualPath := util.NewFullPath(string(actualPath), resp.Entry.Name) - info, infoErr := fs.materializeFileInfo(ctx, childVirtualPath, childActualPath, resp.Entry) - if infoErr != nil { - return infoErr - } - infos = append(infos, info) - } - return nil - }) - if err != nil { - return nil, err - } - if pageCount < listEntriesPageSize || lastName == "" { - break - } - startFrom = lastName - } - - sort.Slice(infos, func(i, j int) bool { - return infos[i].Name() < infos[j].Name() - }) - return infos, nil -} - -func (fs *seaweedFileSystem) MkdirAll(filename string, perm os.FileMode) error { - if err := fs.ensureWritable(); err != nil { - return err - } - virtualPath := cleanBillyPath(filename) - if virtualPath == "/" { - return nil - } - - info, err := fs.fileInfoForVirtualPath(context.Background(), virtualPath) - if err == nil { - if info.IsDir() { - return nil - } - return os.ErrExist - } - if !os.IsNotExist(err) { - return err - } - - _, actualPath := fs.resolvePath(virtualPath) - _, err = fs.createEntry(context.Background(), actualPath, true, perm|os.ModeDir, "") - return err -} - -func (fs *seaweedFileSystem) Symlink(target, link string) error { - if err := fs.ensureWritable(); err != nil { - return err - } - virtualPath, actualPath := fs.resolvePath(link) - if virtualPath == "/" { - return os.ErrPermission - } - if _, err := fs.fileInfoForVirtualPath(context.Background(), virtualPath); err == nil { - return os.ErrExist - } else if !os.IsNotExist(err) { - return err - } - - _, err := fs.createEntry(context.Background(), actualPath, false, 0o777, target) - return err -} - -func (fs *seaweedFileSystem) Link(target, link string) error { - if err := fs.ensureWritable(); err != nil { - return err - } - ctx := context.Background() - - linkVirtualPath, linkActualPath := fs.resolvePath(link) - if linkVirtualPath == "/" { - return os.ErrPermission - } - if _, err := fs.fileInfoForVirtualPath(ctx, linkVirtualPath); err == nil { - return os.ErrExist - } else if !os.IsNotExist(err) { - return err - } - - sourceActualPath, sourceEntry, err := fs.resolveHardLinkTarget(ctx, target) - if err != nil { - return err - } - if sourceEntry == nil { - return os.ErrNotExist - } - if sourceEntry.IsDirectory { - return billy.ErrNotSupported - } - - sourceOriginal, ok := proto.Clone(sourceEntry).(*filer_pb.Entry) - if !ok { - return errors.New("clone hard link source entry") - } - - updatedSource, err := fs.mutateEntry(ctx, sourceActualPath, func(entry *filer_pb.Entry) { - if entry.Attributes == nil { - entry.Attributes = &filer_pb.FuseAttributes{} - } - if len(entry.HardLinkId) == 0 { - entry.HardLinkId = filer.NewHardLinkId() - entry.HardLinkCounter = 1 - } - entry.HardLinkCounter++ - touchEntryTimes(entry, true) - }) - if err != nil { - return err - } - - newLinkEntry, ok := proto.Clone(updatedSource).(*filer_pb.Entry) - if !ok { - return errors.New("clone hard link target entry") - } - _, linkName := linkActualPath.DirAndName() - newLinkEntry.Name = linkName - - if _, err := fs.createEntryFromProto(ctx, linkActualPath, newLinkEntry); err != nil { - _, rollbackErr := fs.updateEntryAtPath(ctx, sourceActualPath, sourceOriginal) - if rollbackErr != nil { - return fmt.Errorf("create hard link: %w (rollback failed: %v)", err, rollbackErr) - } - return err - } - - return nil -} - -func (fs *seaweedFileSystem) Readlink(link string) (string, error) { - info, err := fs.fileInfoForVirtualPath(context.Background(), link) - if err != nil { - return "", err - } - if info.entry.Attributes == nil || info.entry.Attributes.SymlinkTarget == "" { - return "", billy.ErrNotSupported - } - return info.entry.Attributes.SymlinkTarget, nil -} - -func (fs *seaweedFileSystem) Mknod(string, uint32, uint32, uint32) error { - return billy.ErrNotSupported -} - -func (fs *seaweedFileSystem) Mkfifo(string, uint32) error { - return billy.ErrNotSupported -} - -func (fs *seaweedFileSystem) Socket(string) error { - return billy.ErrNotSupported -} - -func (fs *seaweedFileSystem) Chroot(p string) (billy.Filesystem, error) { - info, err := fs.fileInfoForVirtualPath(context.Background(), p) - if err != nil { - return nil, err - } - if !info.IsDir() { - return nil, fmt.Errorf("%s: not a directory", p) - } - return newSeaweedFileSystem(fs.server, info.actualPath, fs.readerCache), nil -} - -func (fs *seaweedFileSystem) Chmod(name string, mode os.FileMode) error { - if err := fs.ensureWritable(); err != nil { - return err - } - _, actualPath := fs.resolvePath(name) - _, err := fs.mutateEntry(context.Background(), actualPath, func(entry *filer_pb.Entry) { - entry.Attributes.FileMode = uint32(mode) - touchEntryTimes(entry, false) - }) - return err -} - -func (fs *seaweedFileSystem) Lchown(name string, uid, gid int) error { - if err := fs.ensureWritable(); err != nil { - return err - } - _, actualPath := fs.resolvePath(name) - _, err := fs.mutateEntry(context.Background(), actualPath, func(entry *filer_pb.Entry) { - entry.Attributes.Uid = uint32(uid) - entry.Attributes.Gid = uint32(gid) - touchEntryTimes(entry, false) - }) - return err -} - -func (fs *seaweedFileSystem) Chown(name string, uid, gid int) error { - return fs.Lchown(name, uid, gid) -} - -func (fs *seaweedFileSystem) Chtimes(name string, _ time.Time, mtime time.Time) error { - if err := fs.ensureWritable(); err != nil { - return err - } - _, actualPath := fs.resolvePath(name) - _, err := fs.mutateEntry(context.Background(), actualPath, func(entry *filer_pb.Entry) { - entry.Attributes.Mtime = mtime.Unix() - entry.Attributes.MtimeNs = int32(mtime.Nanosecond()) - entry.Attributes.Ctime = mtime.Unix() - entry.Attributes.CtimeNs = int32(mtime.Nanosecond()) - }) - return err -} - -func (fs *seaweedFileSystem) Root() string { - return "/" -} - -func (fs *seaweedFileSystem) WithFilerClient(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error { - return fs.server.WithFilerClient(streamingMode, fn) -} - -func (fs *seaweedFileSystem) LookupFn() wdclient.LookupFileIdFunctionType { - if fs == nil || fs.server == nil { - return nil - } - return fs.server.LookupFn() -} - -func (fs *seaweedFileSystem) AdjustedUrl(location *filer_pb.Location) string { - if location == nil { - return "" - } - if fs.server.option.VolumeServerAccess == "publicUrl" && location.PublicUrl != "" { - return location.PublicUrl - } - return location.Url -} - -func (fs *seaweedFileSystem) isReadOnly() bool { - return fs != nil && fs.server != nil && fs.server.option != nil && fs.server.option.ReadOnly -} - -func (fs *seaweedFileSystem) ensureWritable() error { - if fs.isReadOnly() { - return billy.ErrReadOnly - } - return nil -} - -func (fs *seaweedFileSystem) GetDataCenter() string { - return "" -} - -func (fs *seaweedFileSystem) resolvePath(name string) (string, util.FullPath) { - virtualPath := cleanBillyPath(name) - if virtualPath == "/" { - return virtualPath, fs.actualRoot - } - return virtualPath, fs.actualRoot.Child(strings.TrimPrefix(virtualPath, "/")) -} - -func (fs *seaweedFileSystem) ensureOpenEntry(ctx context.Context, virtualPath string, flag int, perm os.FileMode) (*seaweedFileInfo, error) { - info, err := fs.fileInfoForVirtualPath(ctx, virtualPath) - if err == nil { - if flag&os.O_CREATE != 0 && flag&os.O_EXCL != 0 { - return nil, os.ErrExist - } - return info, nil - } - if !os.IsNotExist(err) { - return nil, err - } - if flag&os.O_CREATE == 0 { - return nil, err - } - - _, actualPath := fs.resolvePath(virtualPath) - if perm == 0 { - perm = 0o666 - } - entry, createErr := fs.createEntry(ctx, actualPath, false, perm, "") - if createErr != nil { - return nil, createErr - } - return fs.materializeFileInfo(ctx, virtualPath, actualPath, entry) -} - -func (fs *seaweedFileSystem) createEntry(ctx context.Context, actualPath util.FullPath, isDirectory bool, mode os.FileMode, symlinkTarget string) (*filer_pb.Entry, error) { - dir, name := actualPath.DirAndName() - now := time.Now() - entry := &filer_pb.Entry{ - Name: name, - IsDirectory: isDirectory, - Attributes: &filer_pb.FuseAttributes{ - Mtime: now.Unix(), - MtimeNs: int32(now.Nanosecond()), - Ctime: now.Unix(), - CtimeNs: int32(now.Nanosecond()), - Crtime: now.Unix(), - FileMode: uint32(mode), - Uid: filer_pb.OS_UID, - Gid: filer_pb.OS_GID, - }, - } - if isDirectory { - entry.Attributes.FileMode = uint32(mode | os.ModeDir) - } - if symlinkTarget != "" { - entry.Attributes.SymlinkTarget = symlinkTarget - } - - var createdEntry *filer_pb.Entry - err := fs.server.withInternalClient(false, func(client nfsFilerClient) error { - resp, err := client.CreateEntry(ctx, &filer_pb.CreateEntryRequest{ - Directory: dir, - Entry: entry, - OExcl: false, - }) - if err != nil { - if errors.Is(err, filer_pb.ErrEntryAlreadyExists) { - return os.ErrExist - } - return err - } - if resp != nil { - if resp.ErrorCode != filer_pb.FilerError_OK { - if sentinel := filer_pb.FilerErrorToSentinel(resp.ErrorCode); sentinel != nil { - if errors.Is(sentinel, filer_pb.ErrEntryAlreadyExists) { - return os.ErrExist - } - return sentinel - } - if resp.Error != "" { - return errors.New(resp.Error) - } - } - if resp.MetadataEvent != nil && resp.MetadataEvent.EventNotification != nil && resp.MetadataEvent.EventNotification.NewEntry != nil { - createdEntry = resp.MetadataEvent.EventNotification.NewEntry - } - } - return nil - }) - if err != nil { - return nil, err - } - if createdEntry != nil { - return createdEntry, nil - } - return fs.lookupEntry(ctx, actualPath) -} - -func (fs *seaweedFileSystem) createEntryFromProto(ctx context.Context, actualPath util.FullPath, entry *filer_pb.Entry) (*filer_pb.Entry, error) { - dir, name := actualPath.DirAndName() - - clonedEntry, ok := proto.Clone(entry).(*filer_pb.Entry) - if !ok { - return nil, errors.New("clone filer entry") - } - clonedEntry.Name = name - - var createdEntry *filer_pb.Entry - err := fs.server.withInternalClient(false, func(client nfsFilerClient) error { - resp, err := client.CreateEntry(ctx, &filer_pb.CreateEntryRequest{ - Directory: dir, - Entry: clonedEntry, - OExcl: false, - }) - if err != nil { - if errors.Is(err, filer_pb.ErrEntryAlreadyExists) { - return os.ErrExist - } - return err - } - if resp != nil { - if resp.ErrorCode != filer_pb.FilerError_OK { - if sentinel := filer_pb.FilerErrorToSentinel(resp.ErrorCode); sentinel != nil { - if errors.Is(sentinel, filer_pb.ErrEntryAlreadyExists) { - return os.ErrExist - } - return sentinel - } - if resp.Error != "" { - return errors.New(resp.Error) - } - } - if resp.MetadataEvent != nil && resp.MetadataEvent.EventNotification != nil && resp.MetadataEvent.EventNotification.NewEntry != nil { - createdEntry = resp.MetadataEvent.EventNotification.NewEntry - } - } - return nil - }) - if err != nil { - return nil, err - } - if createdEntry != nil { - return createdEntry, nil - } - return fs.lookupEntry(ctx, actualPath) -} - -func (fs *seaweedFileSystem) mutateEntry(ctx context.Context, actualPath util.FullPath, mutate func(*filer_pb.Entry)) (*filer_pb.Entry, error) { - currentEntry, err := fs.lookupEntry(ctx, actualPath) - if err != nil { - return nil, err - } - - clonedEntry, ok := proto.Clone(currentEntry).(*filer_pb.Entry) - if !ok { - return nil, errors.New("clone filer entry") - } - if clonedEntry.Attributes == nil { - clonedEntry.Attributes = &filer_pb.FuseAttributes{} - } - - mutate(clonedEntry) - - dir, _ := actualPath.DirAndName() - var updatedEntry *filer_pb.Entry - err = fs.server.withInternalClient(false, func(client nfsFilerClient) error { - resp, err := client.UpdateEntry(ctx, &filer_pb.UpdateEntryRequest{ - Directory: dir, - Entry: clonedEntry, - }) - if err != nil { - return err - } - if resp != nil && resp.MetadataEvent != nil && resp.MetadataEvent.EventNotification != nil && resp.MetadataEvent.EventNotification.NewEntry != nil { - updatedEntry = resp.MetadataEvent.EventNotification.NewEntry - } - return nil - }) - if err != nil { - return nil, err - } - if updatedEntry != nil { - return updatedEntry, nil - } - return fs.lookupEntry(ctx, actualPath) -} - -func (fs *seaweedFileSystem) updateEntryAtPath(ctx context.Context, actualPath util.FullPath, entry *filer_pb.Entry) (*filer_pb.Entry, error) { - clonedEntry, ok := proto.Clone(entry).(*filer_pb.Entry) - if !ok { - return nil, errors.New("clone filer entry") - } - _, name := actualPath.DirAndName() - clonedEntry.Name = name - - dir, _ := actualPath.DirAndName() - var updatedEntry *filer_pb.Entry - err := fs.server.withInternalClient(false, func(client nfsFilerClient) error { - resp, err := client.UpdateEntry(ctx, &filer_pb.UpdateEntryRequest{ - Directory: dir, - Entry: clonedEntry, - }) - if err != nil { - return err - } - if resp != nil && resp.MetadataEvent != nil && resp.MetadataEvent.EventNotification != nil && resp.MetadataEvent.EventNotification.NewEntry != nil { - updatedEntry = resp.MetadataEvent.EventNotification.NewEntry - } - return nil - }) - if err != nil { - return nil, err - } - if updatedEntry != nil { - return updatedEntry, nil - } - return fs.lookupEntry(ctx, actualPath) -} - -// saveDataAsChunk uploads `content` to a volume server and returns a filer -// FileChunk describing the resulting segment at the requested file offset. -// The caller is responsible for wiring the returned chunk into the entry's -// chunk list (typically via mutateEntry) and for updating FileSize. -// -// The actual AssignVolume + HTTP upload is handled by -// filer.SaveGatewayDataAsChunk so NFS, WebDAV, and future filer-backed -// gateways share a single implementation of that code path. -func (fs *seaweedFileSystem) saveDataAsChunk(actualPath util.FullPath, content []byte, fileOffset int64) (*filer_pb.FileChunk, error) { - uploader, err := fs.server.newUploader() - if err != nil { - return nil, fmt.Errorf("upload data: %w", err) - } - - return filer.SaveGatewayDataAsChunk(filer.GatewayChunkUploadRequest{ - FilerClient: fs, - Uploader: uploader, - Reader: util.NewBytesReader(content), - FullPath: string(actualPath), - Filename: actualPath.Name(), - Offset: fileOffset, - TsNs: time.Now().UnixNano(), - DataCenter: fs.GetDataCenter(), - VolumeServerAccess: fs.server.option.VolumeServerAccess, - FilerHTTPAddress: fs.server.option.Filer.ToHttpAddress(), - }) -} - -// appendStreamedChunk uploads `data` at `fileOffset` and atomically appends -// the resulting chunk to the filer entry, extending FileSize if this write -// grew the file. If the entry currently stores its payload inline in -// entry.Content, that content is migrated to a chunk first so the chunk -// list becomes the authoritative representation for the file. -func (fs *seaweedFileSystem) appendStreamedChunk(ctx context.Context, info *seaweedFileInfo, data []byte, fileOffset int64) (*filer_pb.Entry, error) { - // Upload the caller's write as a chunk at the target offset. - newChunk, err := fs.saveDataAsChunk(info.actualPath, data, fileOffset) - if err != nil { - return nil, err - } - - // If the file still has inline content, migrate it to a chunk as well. - // We upload it outside of mutateEntry so the mutation closure stays - // synchronous and short. - var migratedInlineChunk *filer_pb.FileChunk - if info.entry != nil && len(info.entry.Content) > 0 { - migratedInlineChunk, err = fs.saveDataAsChunk(info.actualPath, info.entry.Content, 0) - if err != nil { - return nil, err - } - } - - newEnd := uint64(fileOffset) + uint64(len(data)) - return fs.mutateEntry(ctx, info.actualPath, func(entry *filer_pb.Entry) { - if migratedInlineChunk != nil && len(entry.Content) > 0 { - entry.Chunks = append(entry.Chunks, migratedInlineChunk) - entry.Content = nil - } - entry.Chunks = append(entry.Chunks, newChunk) - entry.RemoteEntry = nil - if newEnd > entry.Attributes.FileSize { - entry.Attributes.FileSize = newEnd - } - touchEntryTimes(entry, true) - }) -} - -// truncateEntryToSize resizes the file to `size` by dropping chunks that -// live entirely past the new size, clipping inline content, and updating -// FileSize. Chunks that straddle the new size are left intact; the filer's -// chunk-view layer clips the logical read window at FileSize. -func (fs *seaweedFileSystem) truncateEntryToSize(ctx context.Context, actualPath util.FullPath, size int64) (*filer_pb.Entry, error) { - if size < 0 { - return nil, billy.ErrNotSupported - } - return fs.mutateEntry(ctx, actualPath, func(entry *filer_pb.Entry) { - kept := entry.Chunks[:0] - for _, chunk := range entry.Chunks { - if chunk.Offset >= size { - continue - } - kept = append(kept, chunk) - } - entry.Chunks = kept - if int64(len(entry.Content)) > size { - if size == 0 { - entry.Content = nil - } else { - entry.Content = entry.Content[:size] - } - } - entry.Attributes.FileSize = uint64(size) - touchEntryTimes(entry, true) - }) -} - -func (fs *seaweedFileSystem) fileInfoForVirtualPath(ctx context.Context, name string) (*seaweedFileInfo, error) { - return fs.fileInfoForVirtualPathWithOptions(ctx, name, false) -} - -func (fs *seaweedFileSystem) fileInfoForVirtualPathWithOptions(ctx context.Context, name string, followFinalSymlink bool) (*seaweedFileInfo, error) { - return fs.fileInfoForVirtualPathDepth(ctx, name, followFinalSymlink, 0) -} - -func (fs *seaweedFileSystem) fileInfoForVirtualPathDepth(ctx context.Context, name string, followFinalSymlink bool, depth int) (*seaweedFileInfo, error) { - virtualPath, actualPath := fs.resolvePath(name) - - entry, err := fs.lookupEntry(ctx, actualPath) - if err != nil { - return nil, err - } - info, err := fs.materializeFileInfo(ctx, virtualPath, actualPath, entry) - if err != nil { - return nil, err - } - if !followFinalSymlink { - return info, nil - } - return fs.followSymlinkInfo(ctx, info, depth) -} - -func (fs *seaweedFileSystem) followSymlinkInfo(ctx context.Context, info *seaweedFileInfo, depth int) (*seaweedFileInfo, error) { - if info == nil || info.entry == nil || info.entry.Attributes == nil || info.entry.Attributes.SymlinkTarget == "" { - return info, nil - } - if depth >= maxSymlinkDepth { - return nil, fmt.Errorf("%s: too many symlinks", info.virtualPath) - } - targetPath := resolveSymlinkVirtualPath(info.virtualPath, info.entry.Attributes.SymlinkTarget) - return fs.fileInfoForVirtualPathDepth(ctx, targetPath, true, depth+1) -} - -func resolveSymlinkVirtualPath(linkPath, target string) string { - if strings.HasPrefix(target, "/") { - return cleanBillyPath(target) - } - return cleanBillyPath(path.Join(path.Dir(cleanBillyPath(linkPath)), target)) -} - -func (fs *seaweedFileSystem) materializeFileInfo(ctx context.Context, virtualPath string, actualPath util.FullPath, entry *filer_pb.Entry) (*seaweedFileInfo, error) { - entry, generation, err := fs.ensureIndexedEntry(ctx, actualPath, entry) - if err != nil { - return nil, err - } - - fileID := entry.Attributes.GetInode() - if fileID == 0 && actualPath == fs.server.exportRoot && entry.IsDirectory { - fileID = uint64(fs.server.exportID) - } - - return &seaweedFileInfo{ - name: fileInfoName(virtualPath, entry), - virtualPath: virtualPath, - size: int64(filer.FileSize(entry)), - mode: fileModeForEntry(entry), - modTime: entryModTime(entry), - actualPath: actualPath, - entry: entry, - generation: generation, - fileID: fileID, - nlink: entryLinkCount(entry), - }, nil -} - -func (fs *seaweedFileSystem) lookupEntry(ctx context.Context, actualPath util.FullPath) (*filer_pb.Entry, error) { - var entry *filer_pb.Entry - err := fs.server.withInternalClient(false, func(client nfsFilerClient) error { - dir, name := actualPath.DirAndName() - resp, err := client.LookupDirectoryEntry(ctx, &filer_pb.LookupDirectoryEntryRequest{ - Directory: dir, - Name: name, - }) - if err != nil { - return err - } - if resp == nil || resp.Entry == nil { - return filer_pb.ErrNotFound - } - entry = resp.Entry - return nil - }) - if err == nil { - return entry, nil - } - if isLookupNotFound(err) { - if actualPath == "/" { - return syntheticRootEntry(), nil - } - return nil, os.ErrNotExist - } - return nil, err -} - -func (fs *seaweedFileSystem) resolveHardLinkTarget(ctx context.Context, target string) (util.FullPath, *filer_pb.Entry, error) { - var resolved *ResolvedHandle - handleErr := fs.server.withInternalClient(false, func(client nfsFilerClient) error { - var err error - resolved, err = NewResolver(fs.server.exportRoot, client).ResolveHandle(ctx, []byte(target)) - return err - }) - if handleErr == nil && resolved != nil { - return resolved.Path, resolved.Entry, nil - } - - if strings.HasPrefix(target, "/") { - _, actualPath := fs.resolvePath(target) - entry, err := fs.lookupEntry(ctx, actualPath) - if err != nil { - return "", nil, err - } - return actualPath, entry, nil - } - - if handleErr != nil { - return "", nil, handleErr - } - return "", nil, os.ErrNotExist -} - -func (fs *seaweedFileSystem) ensureIndexedEntry(ctx context.Context, actualPath util.FullPath, entry *filer_pb.Entry) (*filer_pb.Entry, uint64, error) { - if entry == nil { - return nil, 0, os.ErrNotExist - } - if entry.Attributes == nil { - entry.Attributes = &filer_pb.FuseAttributes{} - } - - if entry.Attributes.Inode == 0 && !(actualPath == "/" && entry.Name == "/" && entry.IsDirectory) { - updatedEntry, err := fs.backfillLegacyInode(ctx, actualPath, entry) - if err != nil { - return nil, 0, err - } - entry = updatedEntry - } - - if entry.Attributes.GetInode() == 0 { - if actualPath == "/" && entry.Name == "/" && entry.IsDirectory { - return entry, filer.InodeIndexInitialGeneration, nil - } - return nil, 0, fmt.Errorf("nfs requires inode-backed entry for %s", actualPath) - } - - generation, err := fs.lookupGeneration(ctx, entry.Attributes.GetInode()) - if err != nil { - return nil, 0, err - } - return entry, generation, nil -} - -func (fs *seaweedFileSystem) backfillLegacyInode(ctx context.Context, actualPath util.FullPath, entry *filer_pb.Entry) (*filer_pb.Entry, error) { - dir, _ := actualPath.DirAndName() - clonedEntry, ok := proto.Clone(entry).(*filer_pb.Entry) - if !ok { - return nil, errors.New("clone filer entry") - } - - var updatedEntry *filer_pb.Entry - err := fs.server.withInternalClient(false, func(client nfsFilerClient) error { - resp, err := client.UpdateEntry(ctx, &filer_pb.UpdateEntryRequest{ - Directory: dir, - Entry: clonedEntry, - }) - if err != nil { - return err - } - if resp != nil && resp.MetadataEvent != nil && resp.MetadataEvent.EventNotification != nil && resp.MetadataEvent.EventNotification.NewEntry != nil { - updatedEntry = resp.MetadataEvent.EventNotification.NewEntry - } - return nil - }) - if err != nil { - return nil, err - } - if updatedEntry != nil { - return updatedEntry, nil - } - return fs.lookupEntry(ctx, actualPath) -} - -func (fs *seaweedFileSystem) lookupGeneration(ctx context.Context, inode uint64) (uint64, error) { - var resp *filer_pb.KvGetResponse - err := fs.server.withInternalClient(false, func(client nfsFilerClient) error { - var kvErr error - resp, kvErr = client.KvGet(ctx, &filer_pb.KvGetRequest{Key: filer.InodeIndexKey(inode)}) - return kvErr - }) - if err != nil { - return 0, err - } - if resp == nil { - return 0, ErrStaleHandle - } - if resp.GetError() != "" { - return 0, errors.New(resp.GetError()) - } - if len(resp.GetValue()) == 0 { - return 0, ErrStaleHandle - } - - record, err := filer.DecodeInodeIndexRecord(resp.GetValue()) - if err != nil { - return 0, err - } - if record.Generation == 0 { - return filer.InodeIndexInitialGeneration, nil - } - return record.Generation, nil -} - -func fileInfoName(virtualPath string, entry *filer_pb.Entry) string { - if entry != nil && entry.Name != "" { - return entry.Name - } - if virtualPath == "/" { - return "/" - } - return path.Base(virtualPath) -} - -func fileModeForEntry(entry *filer_pb.Entry) os.FileMode { - mode := os.FileMode(0) - if entry != nil && entry.Attributes != nil { - mode = os.FileMode(entry.Attributes.FileMode) - } - if entry != nil && entry.IsDirectory { - mode |= os.ModeDir - } - if entry != nil && entry.Attributes != nil && entry.Attributes.SymlinkTarget != "" { - mode |= os.ModeSymlink - } - return mode -} - -func entryModTime(entry *filer_pb.Entry) time.Time { - if entry == nil || entry.Attributes == nil { - return time.Unix(0, 0) - } - seconds := entry.Attributes.Mtime - nanos := int64(entry.Attributes.MtimeNs) - if seconds == 0 && nanos == 0 { - seconds = entry.Attributes.Crtime - } - return time.Unix(seconds, nanos) -} - -func entryLinkCount(entry *filer_pb.Entry) uint32 { - if entry == nil { - return 1 - } - if entry.HardLinkCounter > 0 { - return uint32(entry.HardLinkCounter) - } - return 1 -} - -func touchEntryTimes(entry *filer_pb.Entry, updateMtime bool) { - if entry == nil { - return - } - if entry.Attributes == nil { - entry.Attributes = &filer_pb.FuseAttributes{} - } - now := time.Now() - if updateMtime { - entry.Attributes.Mtime = now.Unix() - entry.Attributes.MtimeNs = int32(now.Nanosecond()) - } - entry.Attributes.Ctime = now.Unix() - entry.Attributes.CtimeNs = int32(now.Nanosecond()) - if entry.Attributes.Crtime == 0 { - entry.Attributes.Crtime = now.Unix() - } -} - -func cleanBillyPath(name string) string { - if name == "" || name == "." { - return "/" - } - cleaned := path.Clean(name) - if cleaned == "." { - return "/" - } - if !strings.HasPrefix(cleaned, "/") { - cleaned = "/" + cleaned - } - return cleaned -} - -func syntheticRootEntry() *filer_pb.Entry { - return &filer_pb.Entry{ - Name: "/", - IsDirectory: true, - Attributes: &filer_pb.FuseAttributes{ - FileMode: uint32(os.ModeDir | 0755), - }, - } -} - -func (fi *seaweedFileInfo) Name() string { return fi.name } -func (fi *seaweedFileInfo) Size() int64 { return fi.size } -func (fi *seaweedFileInfo) Mode() os.FileMode { return fi.mode } -func (fi *seaweedFileInfo) ModTime() time.Time { return fi.modTime } -func (fi *seaweedFileInfo) IsDir() bool { return fi.mode.IsDir() } -func (fi *seaweedFileInfo) Sys() interface{} { - return &gonfsfile.FileInfo{ - Nlink: fi.nlink, - UID: fi.entry.GetAttributes().GetUid(), - GID: fi.entry.GetAttributes().GetGid(), - Fileid: fi.fileID, - } -} - -func (f *seaweedFile) Name() string { return f.virtualPath } - -func (f *seaweedFile) Read(p []byte) (int, error) { - n, err := f.ReadAt(p, f.offset) - f.offset += int64(n) - return n, err -} - -func (f *seaweedFile) ReadAt(p []byte, off int64) (int, error) { - // Writable opens no longer carry a private in-memory copy of the - // content; reads always go through the filer entry's inline bytes or - // chunk list. Write() refreshes f.info after each append so a - // read-after-write in the same session sees the new data. - if len(f.info.entry.Content) > 0 { - reader := bytes.NewReader(f.info.entry.Content) - return reader.ReadAt(p, off) - } - - fileSize := int64(filer.FileSize(f.info.entry)) - if fileSize == 0 || off >= fileSize { - return 0, io.EOF - } - if f.reader == nil { - visibleIntervals, err := filer.NonOverlappingVisibleIntervals(context.Background(), f.fs.LookupFn(), f.info.entry.GetChunks(), 0, fileSize) - if err != nil { - return 0, err - } - chunkViews := filer.ViewFromVisibleIntervals(visibleIntervals, 0, fileSize) - f.reader = filer.NewChunkReaderAtFromClient(context.Background(), f.fs.readerCache, chunkViews, fileSize, filer.DefaultPrefetchCount) - } - return f.reader.ReadAt(p, off) -} - -func (f *seaweedFile) Write(p []byte) (int, error) { - if !f.writable { - return 0, billy.ErrReadOnly - } - if f.fs != nil && f.fs.isReadOnly() { - return 0, billy.ErrReadOnly - } - if f.closed { - return 0, os.ErrClosed - } - if len(p) == 0 { - return 0, nil - } - if f.offset < 0 { - return 0, billy.ErrNotSupported - } - if f.appendOnly { - f.offset = int64(filer.FileSize(f.info.entry)) - } - - ctx := context.Background() - currentSize := int64(filer.FileSize(f.info.entry)) - hasChunks := len(f.info.entry.GetChunks()) > 0 - - // Inline fast path — mirrors the filer HTTP upload handler's - // SaveToFilerLimit shortcut. As long as the file has no existing - // chunks and the post-write size still fits in the inline budget, - // we rewrite the `Content` bytes directly on the filer entry and - // skip the volume-server round-trip entirely. A write that would - // push the file beyond the inline limit, or a write to a file that - // already has chunks, falls through to the streaming path below. - postWriteSize := f.offset + int64(len(p)) - if postWriteSize < currentSize { - postWriteSize = currentSize - } - var updatedEntry *filer_pb.Entry - var err error - if !hasChunks && postWriteSize <= int64(maxInlineWriteSize) { - existing := f.info.entry.Content - merged := make([]byte, postWriteSize) - copy(merged, existing) - copy(merged[f.offset:], p) - updatedEntry, err = f.fs.mutateEntry(ctx, f.info.actualPath, func(entry *filer_pb.Entry) { - entry.Content = merged - entry.Chunks = nil - entry.RemoteEntry = nil - entry.Attributes.FileSize = uint64(len(merged)) - touchEntryTimes(entry, true) - }) - } else { - // Streaming path: upload the caller's bytes straight to a volume - // server and atomically append the resulting chunk to the filer - // entry. No per-file in-memory buffer is held; each Write call - // costs one AssignVolume + one chunk upload + one filer - // UpdateEntry, exactly like how `weed filer` HTTP uploads and - // the S3 gateway persist object data. - updatedEntry, err = f.fs.appendStreamedChunk(ctx, f.info, p, f.offset) - } - if err != nil { - return 0, err - } - - updatedInfo, err := f.fs.materializeFileInfo(ctx, f.virtualPath, f.info.actualPath, updatedEntry) - if err != nil { - return 0, err - } - f.info = updatedInfo - // Invalidate any cached reader so a subsequent Read sees the new data. - f.reader = nil - - f.offset += int64(len(p)) - return len(p), nil -} - -func (f *seaweedFile) Seek(offset int64, whence int) (int64, error) { - nextOffset := f.offset - switch whence { - case io.SeekStart: - nextOffset = offset - case io.SeekCurrent: - nextOffset += offset - case io.SeekEnd: - if f.writable { - nextOffset = int64(filer.FileSize(f.info.entry)) + offset - } else { - nextOffset = f.info.size + offset - } - default: - return 0, fmt.Errorf("invalid whence %d", whence) - } - if nextOffset < 0 { - nextOffset = 0 - } - // POSIX allows Seek on an O_APPEND file — the append-only constraint - // only restricts Write, not read offsets or lseek positioning. Write - // already snaps the offset back to EOF before writing (see seaweedFile - // Write), so we can accept any Seek here without violating the - // append-only guarantee. - f.offset = nextOffset - return f.offset, nil -} - -func (f *seaweedFile) Close() error { - if f.closed { - return nil - } - f.closed = true - // All dirty data is flushed to the filer synchronously inside Write - // (and inside Truncate), so Close has nothing to do beyond marking the - // handle as unusable. - return nil -} -func (f *seaweedFile) Lock() error { return billy.ErrNotSupported } -func (f *seaweedFile) Unlock() error { return billy.ErrNotSupported } - -func (f *seaweedFile) Truncate(size int64) error { - if !f.writable { - return billy.ErrReadOnly - } - if f.fs != nil && f.fs.isReadOnly() { - return billy.ErrReadOnly - } - if size < 0 { - return billy.ErrNotSupported - } - ctx := context.Background() - updatedEntry, err := f.fs.truncateEntryToSize(ctx, f.info.actualPath, size) - if err != nil { - return err - } - updatedInfo, err := f.fs.materializeFileInfo(ctx, f.virtualPath, f.info.actualPath, updatedEntry) - if err != nil { - return err - } - f.info = updatedInfo - f.reader = nil - if f.offset > size { - f.offset = size - } - return nil -} diff --git a/weed/server/nfs/handler.go b/weed/server/nfs/handler.go deleted file mode 100644 index d52e0f5cb..000000000 --- a/weed/server/nfs/handler.go +++ /dev/null @@ -1,182 +0,0 @@ -package nfs - -import ( - "context" - "net" - "os" - "strings" - - billy "github.com/go-git/go-billy/v5" - "github.com/seaweedfs/seaweedfs/weed/filer" - "github.com/seaweedfs/seaweedfs/weed/glog" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" - gonfs "github.com/willscott/go-nfs" -) - -type Handler struct { - server *Server - rootFS *seaweedFileSystem -} - -var _ gonfs.Handler = (*Handler)(nil) - -func (h *Handler) Mount(ctx context.Context, conn net.Conn, req gonfs.MountRequest) (gonfs.MountStatus, billy.Filesystem, []gonfs.AuthFlavor) { - if h.server.clientAuthorizer != nil && !h.server.clientAuthorizer.isAllowedConn(conn) { - return gonfs.MountStatusErrAcces, nil, []gonfs.AuthFlavor{gonfs.AuthFlavorNull} - } - fs, status := h.resolveMountFilesystem(ctx, string(req.Dirpath)) - if status != gonfs.MountStatusOk { - return status, nil, []gonfs.AuthFlavor{gonfs.AuthFlavorNull} - } - return gonfs.MountStatusOk, fs, []gonfs.AuthFlavor{gonfs.AuthFlavorNull, gonfs.AuthFlavorUnix} -} - -// resolveMountFilesystem resolves the MOUNT3 dirpath to a filesystem: -// exact match serves the export root; a path strictly under the export -// is mounted at that subdirectory (NoEnt/NotDir if missing or not a -// directory); anything else falls back to the export root with an INFO -// log. The UDP MOUNT path mirrors this in mount_udp.go. -func (h *Handler) resolveMountFilesystem(ctx context.Context, requestedPath string) (*seaweedFileSystem, gonfs.MountStatus) { - requested := normalizeExportRoot(util.FullPath(requestedPath)) - // Exact match and outside-export both fall back to the export root. - // Only the second case logs; the first is the boring common path. - if requested == h.server.exportRoot || !requested.IsUnder(h.server.exportRoot) { - if requested != h.server.exportRoot { - glog.V(0).Infof("nfs mount: client requested %q (outside export %q); serving configured export", requestedPath, h.server.exportRoot) - } - return h.rootFS, h.lstatExportStatus(ctx) - } - entry, err := h.lookupSubexportEntry(ctx, requested) - switch { - case err != nil && isLookupNotFound(err): - return nil, gonfs.MountStatusErrNoEnt - case err != nil: - glog.Errorf("nfs mount: lookup %q under export %q failed: %v", requested, h.server.exportRoot, err) - return nil, gonfs.MountStatusErrServerFault - case entry == nil: - return nil, gonfs.MountStatusErrNoEnt - case !entry.IsDirectory: - return nil, gonfs.MountStatusErrNotDir - } - glog.V(1).Infof("nfs mount: client requested %q under export %q; mounting at subdirectory", requestedPath, h.server.exportRoot) - return newSeaweedFileSystem(h.server, requested, h.server.sharedReaderCache), gonfs.MountStatusOk -} - -func (h *Handler) lstatExportStatus(ctx context.Context) gonfs.MountStatus { - if _, err := h.rootFS.fileInfoForVirtualPath(ctx, "/"); err != nil { - if os.IsNotExist(err) { - return gonfs.MountStatusErrNoEnt - } - return gonfs.MountStatusErrServerFault - } - return gonfs.MountStatusOk -} - -func (h *Handler) lookupSubexportEntry(ctx context.Context, p util.FullPath) (*filer_pb.Entry, error) { - var entry *filer_pb.Entry - err := h.server.withInternalClient(false, func(client nfsFilerClient) error { - dir, name := p.DirAndName() - resp, lerr := client.LookupDirectoryEntry(ctx, &filer_pb.LookupDirectoryEntryRequest{ - Directory: dir, - Name: name, - }) - if lerr != nil { - return lerr - } - if resp != nil { - entry = resp.Entry - } - return nil - }) - return entry, err -} - -func (h *Handler) Change(filesystem billy.Filesystem) billy.Change { - if h.server != nil && h.server.option != nil && h.server.option.ReadOnly { - return nil - } - if changer, ok := filesystem.(billy.Change); ok { - return changer - } - return nil -} - -func (h *Handler) FSStat(ctx context.Context, _ billy.Filesystem, stat *gonfs.FSStat) error { - return h.server.withInternalClient(false, func(client nfsFilerClient) error { - resp, err := client.Statistics(ctx, &filer_pb.StatisticsRequest{}) - if err != nil { - return err - } - if resp == nil { - return nil - } - stat.TotalSize = resp.TotalSize - if resp.TotalSize >= resp.UsedSize { - stat.FreeSize = resp.TotalSize - resp.UsedSize - stat.AvailableSize = resp.TotalSize - resp.UsedSize - } - stat.TotalFiles = resp.FileCount - return nil - }) -} - -func (h *Handler) ToHandle(filesystem billy.Filesystem, path []string) []byte { - fs, ok := filesystem.(*seaweedFileSystem) - if !ok { - fs = h.rootFS - } - - info, err := fs.fileInfoForVirtualPath(context.Background(), fs.Join(path...)) - if err != nil { - return nil - } - - inode := info.entry.GetAttributes().GetInode() - if inode == 0 && info.actualPath == h.server.exportRoot && info.entry.IsDirectory { - return NewFileHandle(h.server.exportID, FileHandleKindDirectory, 0, filer.InodeIndexInitialGeneration).Encode() - } - - return NewFileHandle(h.server.exportID, fileHandleKindForEntry(info.entry), inode, info.generation).Encode() -} - -func (h *Handler) FromHandle(raw []byte) (billy.Filesystem, []string, error) { - var resolved *ResolvedHandle - err := h.server.withInternalClient(false, func(client nfsFilerClient) error { - var resolveErr error - resolved, resolveErr = NewResolver(h.server.exportRoot, client).ResolveHandle(context.Background(), raw) - return resolveErr - }) - if err != nil { - return nil, nil, err - } - - if resolved.Path == h.server.exportRoot { - return h.rootFS, nil, nil - } - - if !pathVisibleFromExport(resolved.Path, h.server.exportRoot) { - return nil, nil, ErrHandleExportMismatch - } - - relativePath := string(resolved.Path) - if h.server.exportRoot != "/" { - relativePath = strings.TrimPrefix(relativePath, string(h.server.exportRoot)) - } - return h.rootFS, util.NormalizePath(relativePath).Split(), nil -} - -func (h *Handler) InvalidateHandle(billy.Filesystem, []byte) error { - return nil -} - -func (h *Handler) HandleLimit() int { - return h.server.handleLimit -} - -func fileHandleKindForEntry(entry *filer_pb.Entry) FileHandleKind { - if entry != nil && entry.IsDirectory { - return FileHandleKindDirectory - } - return FileHandleKindFile -} diff --git a/weed/server/nfs/integration_test.go b/weed/server/nfs/integration_test.go deleted file mode 100644 index 5c997082d..000000000 --- a/weed/server/nfs/integration_test.go +++ /dev/null @@ -1,880 +0,0 @@ -package nfs - -import ( - "bytes" - "context" - "crypto/md5" - "encoding/base64" - "encoding/json" - "fmt" - "io" - "math/rand" - "mime/multipart" - "net" - "net/http" - "net/http/httptest" - "path" - "strconv" - "strings" - "sync" - "testing" - "time" - - "github.com/seaweedfs/seaweedfs/weed/filer" - "github.com/seaweedfs/seaweedfs/weed/pb" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" - util_http "github.com/seaweedfs/seaweedfs/weed/util/http" - "github.com/seaweedfs/seaweedfs/weed/wdclient" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - gonfs "github.com/willscott/go-nfs" - nfsclient "github.com/willscott/go-nfs-client/nfs" - "github.com/willscott/go-nfs-client/nfs/rpc" - "github.com/willscott/go-nfs-client/nfs/xdr" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" -) - -type fakeVolumeBlob struct { - data []byte - contentEncoding string -} - -type fakeVolumeServer struct { - mu sync.Mutex - blobs map[string]fakeVolumeBlob - server *httptest.Server -} - -type fakeVolumeControlPlane struct { - filer_pb.UnimplementedSeaweedFilerServer - - mu sync.Mutex - host string - nextID int - assigns []*filer_pb.AssignVolumeRequest - lookups []*filer_pb.LookupVolumeRequest -} - -var initIntegrationHTTPClient sync.Once - -const nfsProc3Link = 15 - -func newFakeVolumeServer(t *testing.T) *fakeVolumeServer { - t.Helper() - - fake := &fakeVolumeServer{ - blobs: make(map[string]fakeVolumeBlob), - } - fake.server = httptest.NewServer(http.HandlerFunc(fake.serveHTTP)) - t.Cleanup(fake.server.Close) - return fake -} - -func (f *fakeVolumeServer) host() string { - return strings.TrimPrefix(f.server.URL, "http://") -} - -func (f *fakeVolumeServer) serveHTTP(w http.ResponseWriter, r *http.Request) { - fileID := strings.TrimPrefix(r.URL.Path, "/") - if fileID == "" { - http.NotFound(w, r) - return - } - - switch r.Method { - case http.MethodPost: - part, err := firstMultipartFile(r) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - defer part.Close() - - data, err := io.ReadAll(part) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - contentEncoding := part.Header.Get("Content-Encoding") - sum := md5.Sum(data) - - f.mu.Lock() - f.blobs[fileID] = fakeVolumeBlob{ - data: bytes.Clone(data), - contentEncoding: contentEncoding, - } - f.mu.Unlock() - - w.Header().Set("Content-MD5", base64.StdEncoding.EncodeToString(sum[:])) - w.Header().Set("ETag", `"`+base64.StdEncoding.EncodeToString(sum[:])+`"`) - w.Header().Set("Content-Type", "application/json") - _ = json.NewEncoder(w).Encode(map[string]any{ - "name": path.Base(fileID), - "size": len(data), - }) - case http.MethodGet: - f.mu.Lock() - blob, found := f.blobs[fileID] - f.mu.Unlock() - if !found { - http.NotFound(w, r) - return - } - if blob.contentEncoding != "" { - w.Header().Set("Content-Encoding", blob.contentEncoding) - } - http.ServeContent(w, r, fileID, time.Unix(0, 0), bytes.NewReader(blob.data)) - default: - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - } -} - -func firstMultipartFile(r *http.Request) (*multipart.Part, error) { - reader, err := r.MultipartReader() - if err != nil { - return nil, err - } - - for { - part, err := reader.NextPart() - if err == io.EOF { - return nil, io.ErrUnexpectedEOF - } - if err != nil { - return nil, err - } - if part.FormName() == "file" { - return part, nil - } - part.Close() - } -} - -func (f *fakeVolumeControlPlane) AssignVolume(_ context.Context, req *filer_pb.AssignVolumeRequest) (*filer_pb.AssignVolumeResponse, error) { - f.mu.Lock() - defer f.mu.Unlock() - - f.assigns = append(f.assigns, req) - f.nextID++ - fileID := fmt.Sprintf("7,%08x", f.nextID) - return &filer_pb.AssignVolumeResponse{ - FileId: fileID, - Count: 1, - Location: &filer_pb.Location{ - Url: f.host, - }, - }, nil -} - -func (f *fakeVolumeControlPlane) LookupVolume(_ context.Context, req *filer_pb.LookupVolumeRequest) (*filer_pb.LookupVolumeResponse, error) { - f.mu.Lock() - f.lookups = append(f.lookups, req) - f.mu.Unlock() - - locations := make(map[string]*filer_pb.Locations, len(req.GetVolumeIds())) - for _, volumeID := range req.GetVolumeIds() { - locations[volumeID] = &filer_pb.Locations{ - Locations: []*filer_pb.Location{ - {Url: f.host}, - }, - } - } - return &filer_pb.LookupVolumeResponse{LocationsMap: locations}, nil -} - -func startFakeVolumeControlPlane(t *testing.T, controlPlane *fakeVolumeControlPlane) string { - t.Helper() - - listener, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - - grpcServer := grpc.NewServer() - filer_pb.RegisterSeaweedFilerServer(grpcServer, controlPlane) - - done := make(chan error, 1) - go func() { - done <- grpcServer.Serve(listener) - }() - - t.Cleanup(func() { - grpcServer.Stop() - _ = listener.Close() - select { - case err := <-done: - if err != nil && !isClosedNetworkErr(err) { - t.Errorf("fake control plane exited with error: %v", err) - } - case <-time.After(time.Second): - t.Errorf("timed out waiting for fake control plane shutdown") - } - }) - - return listener.Addr().String() -} - -func mountTestTarget(t *testing.T, server *Server) (*nfsclient.Target, func()) { - t.Helper() - - listener, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - - handler, err := server.newHandler() - require.NoError(t, err) - - done := make(chan error, 1) - go func() { - done <- gonfs.Serve(listener, handler) - }() - - var client *rpc.Client - for attempt := 0; attempt < 10; attempt++ { - client, err = rpc.DialTCP(listener.Addr().Network(), listener.Addr().String(), false) - if err == nil { - break - } - if attempt == 9 { - require.NoError(t, err) - } - time.Sleep(10 * time.Millisecond) - } - require.NoError(t, err) - - mounter := &nfsclient.Mount{Client: client} - target, err := mounter.Mount(string(server.exportRoot), rpc.AuthNull) - require.NoError(t, err) - - cleanup := func() { - _ = mounter.Unmount() - client.Close() - _ = listener.Close() - - select { - case err := <-done: - if err != nil && !isClosedNetworkErr(err) { - t.Errorf("nfs server exited with error: %v", err) - } - case <-time.After(time.Second): - t.Errorf("timed out waiting for nfs server shutdown") - } - } - - return target, cleanup -} - -func isClosedNetworkErr(err error) bool { - if err == nil { - return false - } - if strings.Contains(err.Error(), "use of closed network connection") { - return true - } - return strings.Contains(err.Error(), "listener closed") -} - -func nfsLink(target *nfsclient.Target, sourceHandle []byte, linkPath string) error { - parentDir, linkName := path.Split(path.Clean(linkPath)) - if linkName == "" { - return fmt.Errorf("invalid hard link path %q", linkPath) - } - if parentDir == "" { - parentDir = "/" - } - - _, parentHandle, err := target.Lookup(parentDir) - if err != nil { - return err - } - - // Field layout matches the go-nfs server's onLink handler - // (vendor: github.com/willscott/go-nfs/nfs_onlink.go), which reads - // DirOpArg + SetFileAttributes + opaque target handle. That wire - // order differs from RFC 1813 §3.3.15 LINK3args {nfs_fh3 file; - // diropargs3 link;} — the go-nfs library is not strictly compliant - // here, and we mirror its layout so the integration test exercises - // the same parser the server uses. Do not reorder fields to match - // the RFC: the test would then fail against a correctly-functioning - // server. - type LinkArgs struct { - rpc.Header - Link nfsclient.Diropargs3 - Sattr nfsclient.Sattr3 - Target []byte - } - - res, err := target.Call(&LinkArgs{ - Header: rpc.Header{ - Rpcvers: 2, - Prog: nfsclient.Nfs3Prog, - Vers: nfsclient.Nfs3Vers, - Proc: nfsProc3Link, - Cred: rpc.AuthNull, - Verf: rpc.AuthNull, - }, - Link: nfsclient.Diropargs3{ - FH: parentHandle, - Filename: linkName, - }, - Target: sourceHandle, - }) - if err != nil { - return err - } - - status, err := xdr.ReadUint32(res) - if err != nil { - return err - } - return nfsclient.NFS3Error(status) -} - -func TestSeaweedNFSAcceptsAnyMountPathOverRPC(t *testing.T) { - const exportRoot = "/buckets/data" - - client := &fakeNFSFilerClient{ - entries: map[util.FullPath]*filer_pb.Entry{ - "/buckets": testEntry("buckets", true, 100, uint32(0755), nil), - "/buckets/data": testEntry("data", true, 101, uint32(0755), nil), - }, - kv: map[string][]byte{ - string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"), - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"), - }, - } - - server := newTestServer(t, exportRoot, client) - listener, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - - handler, err := server.newHandler() - require.NoError(t, err) - - serveDone := make(chan error, 1) - go func() { - serveDone <- gonfs.Serve(listener, handler) - }() - t.Cleanup(func() { - _ = listener.Close() - select { - case err := <-serveDone: - if err != nil && !isClosedNetworkErr(err) { - t.Errorf("nfs server exited with error: %v", err) - } - case <-time.After(time.Second): - t.Errorf("timed out waiting for nfs server shutdown") - } - }) - - dirpaths := []string{ - "/", - "/buckets", - "/buckets/other", - "/wrong/path", - exportRoot, - exportRoot + "/", - } - for _, dirpath := range dirpaths { - t.Run(dirpath, func(t *testing.T) { - var rpcClient *rpc.Client - var dialErr error - for attempt := 0; attempt < 10; attempt++ { - rpcClient, dialErr = rpc.DialTCP(listener.Addr().Network(), listener.Addr().String(), false) - if dialErr == nil { - break - } - time.Sleep(10 * time.Millisecond) - } - require.NoError(t, dialErr) - defer rpcClient.Close() - - mounter := &nfsclient.Mount{Client: rpcClient} - target, err := mounter.Mount(dirpath, rpc.AuthNull) - require.NoErrorf(t, err, "Mount(%q)", dirpath) - defer target.Close() - - entries, err := target.ReadDirPlus("/") - require.NoError(t, err) - assert.Empty(t, entries, "Mount(%q) should land at the empty export root", dirpath) - }) - } -} - -func TestSeaweedNFSSubexportMountOverRPC(t *testing.T) { - const exportRoot = "/buckets" - - client := &fakeNFSFilerClient{ - entries: map[util.FullPath]*filer_pb.Entry{ - "/buckets": testEntry("buckets", true, 100, uint32(0755), nil), - "/buckets/data": testEntry("data", true, 101, uint32(0755), nil), - "/buckets/data/inner": testEntry("inner", false, 104, uint32(0644), []byte("payload")), - "/buckets/other": testEntry("other", true, 105, uint32(0755), nil), - "/buckets/file.txt": testEntry("file.txt", false, 103, uint32(0644), []byte("hi")), - }, - kv: map[string][]byte{ - string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"), - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"), - string(filer.InodeIndexKey(103)): testIndexRecord(t, 103, 1, "/buckets/file.txt"), - string(filer.InodeIndexKey(104)): testIndexRecord(t, 104, 1, "/buckets/data/inner"), - string(filer.InodeIndexKey(105)): testIndexRecord(t, 105, 1, "/buckets/other"), - }, - } - - server := newTestServer(t, exportRoot, client) - listener, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - - handler, err := server.newHandler() - require.NoError(t, err) - - serveDone := make(chan error, 1) - go func() { - serveDone <- gonfs.Serve(listener, handler) - }() - t.Cleanup(func() { - _ = listener.Close() - select { - case err := <-serveDone: - if err != nil && !isClosedNetworkErr(err) { - t.Errorf("nfs server exited with error: %v", err) - } - case <-time.After(time.Second): - t.Errorf("timed out waiting for nfs server shutdown") - } - }) - - dial := func(t *testing.T) *rpc.Client { - t.Helper() - var rpcClient *rpc.Client - var dialErr error - for attempt := 0; attempt < 10; attempt++ { - rpcClient, dialErr = rpc.DialTCP(listener.Addr().Network(), listener.Addr().String(), false) - if dialErr == nil { - break - } - time.Sleep(10 * time.Millisecond) - } - require.NoError(t, dialErr) - t.Cleanup(func() { rpcClient.Close() }) - return rpcClient - } - - t.Run("mounts_under_export_at_subdirectory", func(t *testing.T) { - mounter := &nfsclient.Mount{Client: dial(t)} - target, err := mounter.Mount("/buckets/data", rpc.AuthNull) - require.NoError(t, err) - defer target.Close() - - entries, err := target.ReadDirPlus("/") - require.NoError(t, err) - require.Len(t, entries, 1) - assert.Equal(t, "inner", entries[0].Name()) - - readFile, err := target.Open("/inner") - require.NoError(t, err) - defer readFile.Close() - data, err := io.ReadAll(readFile) - require.NoError(t, err) - assert.Equal(t, []byte("payload"), data) - }) - - t.Run("missing_entry_under_export_rejects", func(t *testing.T) { - mounter := &nfsclient.Mount{Client: dial(t)} - _, err := mounter.Mount("/buckets/missing", rpc.AuthNull) - require.Error(t, err) - }) - - t.Run("regular_file_under_export_rejects", func(t *testing.T) { - mounter := &nfsclient.Mount{Client: dial(t)} - _, err := mounter.Mount("/buckets/file.txt", rpc.AuthNull) - require.Error(t, err) - }) -} - -func TestSeaweedNFSServesInlineRoundTripOverRPC(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - }, - } - - server := newTestServer(t, "/exports", client) - target, cleanup := mountTestTarget(t, server) - defer cleanup() - defer target.Close() - - _, err := target.Mkdir("/docs", 0o755) - require.NoError(t, err) - - file, err := target.OpenFile("/docs/note.txt", 0o644) - require.NoError(t, err) - payload := []byte("hello over rpc") - _, err = file.Write(payload) - require.NoError(t, err) - require.NoError(t, file.Close()) - - readFile, err := target.Open("/docs/note.txt") - require.NoError(t, err) - defer readFile.Close() - - data, err := io.ReadAll(readFile) - require.NoError(t, err) - assert.Equal(t, payload, data) - - entry := client.entries["/exports/docs/note.txt"] - require.NotNil(t, entry) - assert.Equal(t, payload, entry.Content) - assert.Empty(t, entry.Chunks) - - _, beforeRenameHandle, err := target.Lookup("/docs/note.txt") - require.NoError(t, err) - - entries, err := target.ReadDirPlus("/docs") - require.NoError(t, err) - require.Len(t, entries, 1) - assert.Equal(t, "note.txt", entries[0].Name()) - - require.NoError(t, target.Rename("/docs/note.txt", "/docs/final.txt")) - _, err = target.GetAttr(beforeRenameHandle) - require.NoError(t, err) - _, _, err = target.Lookup("/docs/final.txt") - require.NoError(t, err) - _, _, err = target.Lookup("/docs/note.txt") - require.Error(t, err) - - require.NoError(t, target.Remove("/docs/final.txt")) - _, _, err = target.Lookup("/docs/final.txt") - require.Error(t, err) -} - -func TestSeaweedNFSReadOnlyRejectsMutations(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - string(filer.InodeIndexKey(202)): testIndexRecord(t, 202, 3, "/exports/existing.txt"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - "/exports/existing.txt": testEntry("existing.txt", false, 202, uint32(0644), []byte("seed")), - }, - } - - server := newTestServer(t, "/exports", client) - server.option.ReadOnly = true - - target, cleanup := mountTestTarget(t, server) - defer cleanup() - defer target.Close() - - _, err := target.OpenFile("/created.txt", 0o644) - require.Error(t, err) - nfsErr, ok := err.(*nfsclient.Error) - require.True(t, ok) - assert.Equal(t, uint32(nfsclient.NFS3ErrROFS), nfsErr.ErrorNum) - - file, err := target.Open("/existing.txt") - require.NoError(t, err) - _, err = file.Write([]byte("mutate")) - require.Error(t, err) - nfsErr, ok = err.(*nfsclient.Error) - require.True(t, ok) - assert.Equal(t, uint32(nfsclient.NFS3ErrROFS), nfsErr.ErrorNum) - _ = file.Close() - - readFile, err := target.Open("/existing.txt") - require.NoError(t, err) - defer readFile.Close() - - data, err := io.ReadAll(readFile) - require.NoError(t, err) - assert.Equal(t, []byte("seed"), data) -} - -func TestSeaweedNFSServesSymlinkRoundTripOverRPC(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - }, - } - - server := newTestServer(t, "/exports", client) - target, cleanup := mountTestTarget(t, server) - defer cleanup() - defer target.Close() - - file, err := target.OpenFile("/target.txt", 0o644) - require.NoError(t, err) - _, err = file.Write([]byte("payload")) - require.NoError(t, err) - require.NoError(t, file.Close()) - - require.NoError(t, target.Symlink("target.txt", "/target.link")) - - info, _, err := target.Lookup("/target.link") - require.NoError(t, err) - attr, ok := info.(*nfsclient.Fattr) - require.True(t, ok) - assert.Equal(t, uint32(nfsclient.NF3Lnk), attr.Type) - - linkFile, err := target.Open("/target.link") - require.NoError(t, err) - defer linkFile.Close() - - linkTarget, err := linkFile.Readlink() - require.NoError(t, err) - assert.Equal(t, "target.txt", linkTarget) - - entry := client.entries["/exports/target.link"] - require.NotNil(t, entry) - assert.Equal(t, "target.txt", entry.GetAttributes().GetSymlinkTarget()) -} - -func TestSeaweedNFSServesHardLinkRoundTripOverRPC(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - }, - } - - server := newTestServer(t, "/exports", client) - target, cleanup := mountTestTarget(t, server) - defer cleanup() - defer target.Close() - - file, err := target.OpenFile("/source.txt", 0o644) - require.NoError(t, err) - payload := []byte("shared content") - _, err = file.Write(payload) - require.NoError(t, err) - require.NoError(t, file.Close()) - - _, sourceHandle, err := target.Lookup("/source.txt") - require.NoError(t, err) - require.NoError(t, nfsLink(target, sourceHandle, "/linked.txt")) - - sourceInfo, sourceHandle, err := target.Lookup("/source.txt") - require.NoError(t, err) - linkedInfo, linkedHandle, err := target.Lookup("/linked.txt") - require.NoError(t, err) - - sourceAttr, ok := sourceInfo.(*nfsclient.Fattr) - require.True(t, ok) - linkAttr, ok := linkedInfo.(*nfsclient.Fattr) - require.True(t, ok) - assert.Equal(t, sourceHandle, linkedHandle) - assert.Equal(t, sourceAttr.Fileid, linkAttr.Fileid) - assert.Equal(t, uint32(2), sourceAttr.Nlink) - assert.Equal(t, uint32(2), linkAttr.Nlink) - - linkedFile, err := target.Open("/linked.txt") - require.NoError(t, err) - defer linkedFile.Close() - - data, err := io.ReadAll(linkedFile) - require.NoError(t, err) - assert.Equal(t, payload, data) - - sourceEntry := client.entries["/exports/source.txt"] - linkedEntry := client.entries["/exports/linked.txt"] - require.NotNil(t, sourceEntry) - require.NotNil(t, linkedEntry) - assert.Equal(t, sourceEntry.GetHardLinkId(), linkedEntry.GetHardLinkId()) - assert.Equal(t, int32(2), sourceEntry.GetHardLinkCounter()) - assert.Equal(t, int32(2), linkedEntry.GetHardLinkCounter()) - - require.NoError(t, target.Remove("/source.txt")) - - remainingAttr, err := target.GetAttr(sourceHandle) - require.NoError(t, err) - assert.Equal(t, uint32(1), remainingAttr.Nlink) - - _, _, err = target.Lookup("/source.txt") - require.Error(t, err) - - linkedFile, err = target.Open("/linked.txt") - require.NoError(t, err) - data, err = io.ReadAll(linkedFile) - require.NoError(t, err) - require.NoError(t, linkedFile.Close()) - assert.Equal(t, payload, data) - - require.NoError(t, target.Remove("/linked.txt")) - _, err = target.GetAttr(linkedHandle) - require.Error(t, err) - nfsErr, ok := err.(*nfsclient.Error) - require.True(t, ok) - assert.Equal(t, uint32(nfsclient.NFS3ErrStale), nfsErr.ErrorNum) -} - -func TestSeaweedNFSServesLargeChunkRoundTripOverRPC(t *testing.T) { - initIntegrationHTTPClient.Do(util_http.InitGlobalHttpClient) - - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - }, - } - - volumeServer := newFakeVolumeServer(t) - controlPlane := &fakeVolumeControlPlane{host: volumeServer.host()} - controlPlaneAddr := startFakeVolumeControlPlane(t, controlPlane) - _, grpcPortString, err := net.SplitHostPort(controlPlaneAddr) - require.NoError(t, err) - grpcPort, err := strconv.Atoi(grpcPortString) - require.NoError(t, err) - - server := newTestServer(t, "/exports", client) - server.option.Filer = pb.NewServerAddressWithGrpcPort(controlPlaneAddr, grpcPort) - server.option.GrpcDialOption = grpc.WithTransportCredentials(insecure.NewCredentials()) - if server.filerClient != nil { - server.filerClient.Close() - } - server.filerClient = wdclient.NewFilerClient([]pb.ServerAddress{server.option.Filer}, server.option.GrpcDialOption, "") - server.withFilerClient = func(_ bool, fn func(filer_pb.SeaweedFilerClient) error) error { - conn, err := grpc.NewClient(controlPlaneAddr, grpc.WithTransportCredentials(insecure.NewCredentials())) - if err != nil { - return err - } - defer conn.Close() - return fn(filer_pb.NewSeaweedFilerClient(conn)) - } - - target, cleanup := mountTestTarget(t, server) - defer cleanup() - defer target.Close() - - payload := make([]byte, maxInlineWriteSize+4096) - _, err = rand.New(rand.NewSource(1)).Read(payload) - require.NoError(t, err) - - file, err := target.OpenFile("/big.bin", 0o644) - require.NoError(t, err) - _, err = file.Write(payload) - require.NoError(t, err) - require.NoError(t, file.Close()) - - entry := client.entries["/exports/big.bin"] - require.NotNil(t, entry) - require.Len(t, entry.GetChunks(), 1) - assert.Nil(t, entry.Content) - assert.Equal(t, uint64(len(payload)), entry.GetAttributes().GetFileSize()) - - readFile, err := target.Open("/big.bin") - require.NoError(t, err) - defer readFile.Close() - - data, err := io.ReadAll(readFile) - require.NoError(t, err) - assert.Equal(t, payload, data) - - controlPlane.mu.Lock() - defer controlPlane.mu.Unlock() - require.Len(t, controlPlane.assigns, 1) - assert.Equal(t, "/exports/big.bin", controlPlane.assigns[0].GetPath()) - assert.NotEmpty(t, controlPlane.lookups) -} - -func TestSeaweedNFSRejectsStaleHandleAfterDeleteRecreate(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - }, - } - - server := newTestServer(t, "/exports", client) - target, cleanup := mountTestTarget(t, server) - defer cleanup() - defer target.Close() - - file, err := target.OpenFile("/stale.txt", 0o644) - require.NoError(t, err) - _, err = file.Write([]byte("old")) - require.NoError(t, err) - require.NoError(t, file.Close()) - - _, oldHandle, err := target.Lookup("/stale.txt") - require.NoError(t, err) - - require.NoError(t, target.Remove("/stale.txt")) - - file, err = target.OpenFile("/stale.txt", 0o644) - require.NoError(t, err) - _, err = file.Write([]byte("new")) - require.NoError(t, err) - require.NoError(t, file.Close()) - - _, err = target.GetAttr(oldHandle) - require.Error(t, err) - nfsErr, ok := err.(*nfsclient.Error) - require.True(t, ok) - assert.Equal(t, uint32(nfsclient.NFS3ErrStale), nfsErr.ErrorNum) - - _, newHandle, err := target.Lookup("/stale.txt") - require.NoError(t, err) - _, err = target.GetAttr(newHandle) - require.NoError(t, err) -} - -func TestSeaweedNFSFileHandleSurvivesServerRestart(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - }, - } - - server := newTestServer(t, "/exports", client) - target, cleanup := mountTestTarget(t, server) - - file, err := target.OpenFile("/restart.txt", 0o644) - require.NoError(t, err) - payload := []byte("survives restart") - _, err = file.Write(payload) - require.NoError(t, err) - require.NoError(t, file.Close()) - - _, handle, err := target.Lookup("/restart.txt") - require.NoError(t, err) - - target.Close() - cleanup() - - restartedServer := newTestServer(t, "/exports", client) - restartedTarget, restartedCleanup := mountTestTarget(t, restartedServer) - defer restartedCleanup() - defer restartedTarget.Close() - - attr, err := restartedTarget.GetAttr(handle) - require.NoError(t, err) - assert.Equal(t, uint64(client.entries["/exports/restart.txt"].GetAttributes().GetInode()), attr.Fileid) - - _, restartedHandle, err := restartedTarget.Lookup("/restart.txt") - require.NoError(t, err) - assert.Equal(t, handle, restartedHandle) - - readFile, err := restartedTarget.Open("/restart.txt") - require.NoError(t, err) - defer readFile.Close() - - data, err := io.ReadAll(readFile) - require.NoError(t, err) - assert.Equal(t, payload, data) -} diff --git a/weed/server/nfs/internal_client.go b/weed/server/nfs/internal_client.go deleted file mode 100644 index e3bca1882..000000000 --- a/weed/server/nfs/internal_client.go +++ /dev/null @@ -1,88 +0,0 @@ -package nfs - -import ( - "context" - - "github.com/seaweedfs/seaweedfs/weed/pb" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "google.golang.org/grpc" -) - -type filerClientExecutor func(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error -type internalClientExecutor func(streamingMode bool, fn func(nfsFilerClient) error) error - -type nfsListEntriesClient interface { - Recv() (*filer_pb.ListEntriesResponse, error) -} - -type nfsSubscribeMetadataClient interface { - Recv() (*filer_pb.SubscribeMetadataResponse, error) -} - -type nfsFilerClient interface { - KvGet(ctx context.Context, in *filer_pb.KvGetRequest, opts ...grpc.CallOption) (*filer_pb.KvGetResponse, error) - LookupDirectoryEntry(ctx context.Context, in *filer_pb.LookupDirectoryEntryRequest, opts ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error) - ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (nfsListEntriesClient, error) - SubscribeMetadata(ctx context.Context, in *filer_pb.SubscribeMetadataRequest, opts ...grpc.CallOption) (nfsSubscribeMetadataClient, error) - CreateEntry(ctx context.Context, in *filer_pb.CreateEntryRequest, opts ...grpc.CallOption) (*filer_pb.CreateEntryResponse, error) - UpdateEntry(ctx context.Context, in *filer_pb.UpdateEntryRequest, opts ...grpc.CallOption) (*filer_pb.UpdateEntryResponse, error) - DeleteEntry(ctx context.Context, in *filer_pb.DeleteEntryRequest, opts ...grpc.CallOption) (*filer_pb.DeleteEntryResponse, error) - AtomicRenameEntry(ctx context.Context, in *filer_pb.AtomicRenameEntryRequest, opts ...grpc.CallOption) (*filer_pb.AtomicRenameEntryResponse, error) - Statistics(ctx context.Context, in *filer_pb.StatisticsRequest, opts ...grpc.CallOption) (*filer_pb.StatisticsResponse, error) -} - -type grpcNFSFilerClient struct { - client filer_pb.SeaweedFilerClient -} - -func (c grpcNFSFilerClient) KvGet(ctx context.Context, in *filer_pb.KvGetRequest, opts ...grpc.CallOption) (*filer_pb.KvGetResponse, error) { - return c.client.KvGet(ctx, in, opts...) -} - -func (c grpcNFSFilerClient) LookupDirectoryEntry(ctx context.Context, in *filer_pb.LookupDirectoryEntryRequest, opts ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error) { - return c.client.LookupDirectoryEntry(ctx, in, opts...) -} - -func (c grpcNFSFilerClient) ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (nfsListEntriesClient, error) { - return c.client.ListEntries(ctx, in, opts...) -} - -func (c grpcNFSFilerClient) SubscribeMetadata(ctx context.Context, in *filer_pb.SubscribeMetadataRequest, opts ...grpc.CallOption) (nfsSubscribeMetadataClient, error) { - return c.client.SubscribeMetadata(ctx, in, opts...) -} - -func (c grpcNFSFilerClient) CreateEntry(ctx context.Context, in *filer_pb.CreateEntryRequest, opts ...grpc.CallOption) (*filer_pb.CreateEntryResponse, error) { - return c.client.CreateEntry(ctx, in, opts...) -} - -func (c grpcNFSFilerClient) UpdateEntry(ctx context.Context, in *filer_pb.UpdateEntryRequest, opts ...grpc.CallOption) (*filer_pb.UpdateEntryResponse, error) { - return c.client.UpdateEntry(ctx, in, opts...) -} - -func (c grpcNFSFilerClient) DeleteEntry(ctx context.Context, in *filer_pb.DeleteEntryRequest, opts ...grpc.CallOption) (*filer_pb.DeleteEntryResponse, error) { - return c.client.DeleteEntry(ctx, in, opts...) -} - -func (c grpcNFSFilerClient) AtomicRenameEntry(ctx context.Context, in *filer_pb.AtomicRenameEntryRequest, opts ...grpc.CallOption) (*filer_pb.AtomicRenameEntryResponse, error) { - return c.client.AtomicRenameEntry(ctx, in, opts...) -} - -func (c grpcNFSFilerClient) Statistics(ctx context.Context, in *filer_pb.StatisticsRequest, opts ...grpc.CallOption) (*filer_pb.StatisticsResponse, error) { - return c.client.Statistics(ctx, in, opts...) -} - -func newFilerClientExecutor(option *Option, signature int32) filerClientExecutor { - return func(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error { - return pb.WithGrpcClient(streamingMode, signature, func(grpcConnection *grpc.ClientConn) error { - return fn(filer_pb.NewSeaweedFilerClient(grpcConnection)) - }, option.Filer.ToGrpcAddress(), false, option.GrpcDialOption) - } -} - -func newInternalClientExecutor(option *Option, signature int32) internalClientExecutor { - return func(streamingMode bool, fn func(nfsFilerClient) error) error { - return pb.WithGrpcClient(streamingMode, signature, func(grpcConnection *grpc.ClientConn) error { - return fn(grpcNFSFilerClient{client: filer_pb.NewSeaweedFilerClient(grpcConnection)}) - }, option.Filer.ToGrpcAddress(), false, option.GrpcDialOption) - } -} diff --git a/weed/server/nfs/metadata_follow.go b/weed/server/nfs/metadata_follow.go deleted file mode 100644 index 5ed0a44ce..000000000 --- a/weed/server/nfs/metadata_follow.go +++ /dev/null @@ -1,147 +0,0 @@ -package nfs - -import ( - "context" - "errors" - "io" - "time" - - "github.com/seaweedfs/seaweedfs/weed/glog" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" -) - -type chunkInvalidator interface { - UnCache(fileID string) -} - -type metadataInvalidation struct { - path util.FullPath - entry *filer_pb.Entry -} - -func (s *Server) runMetadataInvalidationLoop(ctx context.Context) { - if s == nil || s.chunkInvalidator == nil || s.withInternalClient == nil { - return - } - - waitTime := time.Second - for ctx.Err() == nil { - err := s.followMetadataStream(ctx) - if err == nil || errors.Is(err, context.Canceled) || ctx.Err() != nil { - return - } - - glog.V(0).Infof("retry nfs metadata invalidation stream for %s in %v: %v", s.exportRoot, waitTime, err) - - timer := time.NewTimer(waitTime) - select { - case <-ctx.Done(): - if !timer.Stop() { - <-timer.C - } - return - case <-timer.C: - } - if waitTime < util.RetryWaitTime { - waitTime += waitTime / 2 - } - } -} - -func (s *Server) followMetadataStream(ctx context.Context) error { - req := &filer_pb.SubscribeMetadataRequest{ - ClientName: "nfs", - PathPrefix: string(s.exportRoot), - ClientId: s.signature, - ClientEpoch: 1, - ClientSupportsBatching: true, - } - - return s.withInternalClient(true, func(client nfsFilerClient) error { - stream, err := client.SubscribeMetadata(ctx, req) - if err != nil { - return err - } - for { - resp, err := stream.Recv() - if err == io.EOF { - return nil - } - if err != nil { - return err - } - s.applyMetadataInvalidationResponse(resp) - } - }) -} - -func (s *Server) applyMetadataInvalidationResponse(resp *filer_pb.SubscribeMetadataResponse) { - if s == nil || s.chunkInvalidator == nil || resp == nil { - return - } - - uncached := make(map[string]struct{}) - apply := func(event *filer_pb.SubscribeMetadataResponse) { - for _, invalidation := range metadataInvalidationsForEvent(event) { - if invalidation.entry == nil || !pathVisibleFromExport(invalidation.path, s.exportRoot) { - continue - } - for _, chunk := range invalidation.entry.GetChunks() { - fileID := chunk.GetFileIdString() - if fileID == "" { - continue - } - if _, seen := uncached[fileID]; seen { - continue - } - uncached[fileID] = struct{}{} - s.chunkInvalidator.UnCache(fileID) - } - } - } - - apply(resp) - for _, event := range resp.Events { - apply(event) - } -} - -func metadataInvalidationsForEvent(resp *filer_pb.SubscribeMetadataResponse) []metadataInvalidation { - message := resp.GetEventNotification() - if message == nil { - return nil - } - - var invalidations []metadataInvalidation - if message.OldEntry != nil && message.NewEntry != nil { - oldPath := util.NewFullPath(resp.Directory, message.OldEntry.Name) - invalidations = append(invalidations, metadataInvalidation{path: oldPath, entry: message.OldEntry}) - - newDir := resp.Directory - if message.NewParentPath != "" { - newDir = message.NewParentPath - } - if message.OldEntry.Name != message.NewEntry.Name || resp.Directory != newDir { - newPath := util.NewFullPath(newDir, message.NewEntry.Name) - invalidations = append(invalidations, metadataInvalidation{path: newPath, entry: message.NewEntry}) - } - return invalidations - } - - if message.NewEntry != nil { - newDir := resp.Directory - if message.NewParentPath != "" { - newDir = message.NewParentPath - } - newPath := util.NewFullPath(newDir, message.NewEntry.Name) - invalidations = append(invalidations, metadataInvalidation{path: newPath, entry: message.NewEntry}) - } - - if message.OldEntry != nil { - oldPath := util.NewFullPath(resp.Directory, message.OldEntry.Name) - invalidations = append(invalidations, metadataInvalidation{path: oldPath, entry: message.OldEntry}) - } - - return invalidations -} diff --git a/weed/server/nfs/mount_udp.go b/weed/server/nfs/mount_udp.go deleted file mode 100644 index 6163367f6..000000000 --- a/weed/server/nfs/mount_udp.go +++ /dev/null @@ -1,343 +0,0 @@ -package nfs - -import ( - "context" - "encoding/binary" - "fmt" - "net" - "os" - "sync" - "time" - - "github.com/seaweedfs/seaweedfs/weed/filer" - "github.com/seaweedfs/seaweedfs/weed/glog" - "github.com/seaweedfs/seaweedfs/weed/util" -) - -// The upstream willscott/go-nfs library only serves the MOUNT protocol over -// TCP. Linux's mount.nfs and the in-kernel NFS client default `mountproto` to -// UDP in many configurations, so against a stock `weed nfs` deployment the -// kernel queries portmap for "MOUNT v3 UDP", gets port=0 ("not registered"), -// and either falls back inconsistently or surfaces EPROTONOSUPPORT -// ("requested NFS version or transport protocol is not supported"). The user -// either has to add `mountproto=tcp` / `mountport=2049` to their mount -// options or guess that their distro happens to fall back to TCP on its own. -// -// This responder closes that gap. It speaks just enough of MOUNT v3 to handle -// MOUNT_NULL / MOUNT_MNT / MOUNT_UMNT over UDP — the only procedures the -// kernel actually invokes during mount setup and teardown — so plain -// `mount -t nfs : /mnt` works without any client-side protocol -// hints. The protocol layout is intentionally identical to the TCP MOUNT -// handler in handler.go's Mount() so the two paths return the same -// filehandle and the same set of auth flavors for the same export. -// -// References: RFC 1813 §5 (NFSv3/MOUNTv3), RFC 5531 (RPC). - -const ( - mountUDPMaxRecord = 32 * 1024 - - // mountUDPRetryBackoff mirrors portmapRetryBackoff so the two - // listening goroutines back off identically under host pressure. - mountUDPRetryBackoff = 50 * time.Millisecond - - // mountUDPLookupTimeout bounds any filer round-trip the UDP MOUNT - // path makes (export-root existence check, subexport lookup). The - // UDP serve loop is single-threaded, so a stalled filer call would - // otherwise block every later MOUNT packet. - mountUDPLookupTimeout = 5 * time.Second - - mountVersion = 3 - - mountProcNull = 0 - mountProcMnt = 1 - mountProcUmnt = 3 - - // MOUNT v3 status codes (mountstat3 in RFC 1813 §5.1.1). - mnt3StatOK uint32 = 0 - mnt3ErrAcces uint32 = 13 - mnt3ErrNoEnt uint32 = 2 - mnt3ErrNotDir uint32 = 20 - mnt3ErrServerFault uint32 = 10006 - - // XDR opaque length cap for dirpath. RFC 1813 §5.1 limits MNTPATHLEN - // to 1024; cap a bit higher for headroom and reject anything beyond. - mountUDPMaxPathLen = 4096 - - // AuthFlavor numeric IDs (matches go-nfs and RFC 5531 §8). - authFlavorNull = 0 - authFlavorUnix = 1 -) - -// mountUDPServer answers MOUNT v3 RPCs over UDP. It listens on the same port -// the NFS TCP server uses (2049 by default), since that's what we advertise -// via portmap, and shares the parent Server's exportRoot, exportID, and -// client allowlist so the UDP MOUNT path applies the same access policy as -// the TCP path. -type mountUDPServer struct { - bindIP string - port int - server *Server - - udpConn *net.UDPConn - - mu sync.Mutex - closed bool - done chan struct{} - wg sync.WaitGroup -} - -func newMountUDPServer(bindIP string, port int, server *Server) *mountUDPServer { - return &mountUDPServer{ - bindIP: bindIP, - port: port, - server: server, - done: make(chan struct{}), - } -} - -func (m *mountUDPServer) Start() error { - addr := net.JoinHostPort(m.bindIP, fmt.Sprintf("%d", m.port)) - udpAddr, err := net.ResolveUDPAddr("udp", addr) - if err != nil { - return fmt.Errorf("mount udp resolve %s: %w", addr, err) - } - udpConn, err := net.ListenUDP("udp", udpAddr) - if err != nil { - return fmt.Errorf("mount udp listen %s: %w", addr, err) - } - m.udpConn = udpConn - m.wg.Add(1) - go func() { - defer m.wg.Done() - m.serve() - }() - return nil -} - -func (m *mountUDPServer) Close() error { - m.mu.Lock() - if m.closed { - m.mu.Unlock() - return nil - } - m.closed = true - close(m.done) - m.mu.Unlock() - if m.udpConn != nil { - _ = m.udpConn.Close() - } - m.wg.Wait() - return nil -} - -func (m *mountUDPServer) isClosed() bool { - m.mu.Lock() - defer m.mu.Unlock() - return m.closed -} - -func (m *mountUDPServer) serve() { - buf := make([]byte, mountUDPMaxRecord) - for { - n, addr, err := m.udpConn.ReadFromUDP(buf) - if err != nil { - if m.isClosed() { - return - } - // Transient read failure: log, back off, keep the - // responder alive — same pattern as portmap UDP. - glog.V(1).Infof("mount udp read: %v", err) - select { - case <-m.done: - return - case <-time.After(mountUDPRetryBackoff): - continue - } - } - // Apply the parent server's client allowlist before we even - // look at the RPC bytes, mirroring the TCP path's - // allowlistListener wrapping. - if m.server != nil && m.server.clientAuthorizer != nil && !m.server.clientAuthorizer.isAllowedAddr(addr) { - glog.V(1).Infof("mount udp: rejecting unauthorized client %s", addr) - continue - } - reply := m.handleCall(buf[:n], addr) - if reply == nil { - continue - } - if _, err := m.udpConn.WriteToUDP(reply, addr); err != nil { - glog.V(1).Infof("mount udp write to %s: %v", addr, err) - } - } -} - -// handleCall classifies one RPC CALL message and returns the encoded reply, -// or nil if the call is malformed enough to drop silently. -func (m *mountUDPServer) handleCall(callBuf []byte, addr *net.UDPAddr) []byte { - xid, prog, vers, proc, args, err := parseRPCCall(callBuf) - if err != nil { - return nil - } - if prog != mountProgram { - return encodeAcceptedReply(xid, rpcAcceptProgUnavail, nil) - } - if vers != mountVersion { - // Mismatch — advertise the v3..v3 we actually support. - body := make([]byte, 8) - binary.BigEndian.PutUint32(body[0:4], mountVersion) - binary.BigEndian.PutUint32(body[4:8], mountVersion) - return encodeAcceptedReply(xid, rpcAcceptProgMismatch, body) - } - - switch proc { - case mountProcNull: - return encodeAcceptedReply(xid, rpcAcceptSuccess, nil) - case mountProcMnt: - return m.handleMount(xid, args, addr) - case mountProcUmnt: - // Stateless server: there's nothing to forget, just acknowledge. - // The client sends back the dirpath in args; we don't need to - // validate it here because UMNT has no return data. - return encodeAcceptedReply(xid, rpcAcceptSuccess, nil) - default: - // MOUNT v3 also defines DUMP / EXPORT / UMNTALL but the kernel - // mount path doesn't invoke them. Returning PROC_UNAVAIL is - // the protocol-correct response. - return encodeAcceptedReply(xid, rpcAcceptProcUnavail, nil) - } -} - -// handleMount implements MOUNT v3 MNT. RFC 1813 §5.1.4: -// -// MOUNT3args { dirpath3 dirpath; } // XDR opaque -// MOUNT3res { mountstat3 status; if OK { handle, auth_flavors[] } } -// -// Mirrors Handler.resolveMountFilesystem: exact match returns the -// synthetic root handle; under-export resolves to the subdirectory's -// handle; outside-export falls back to the synthetic root. -func (m *mountUDPServer) handleMount(xid uint32, args []byte, addr *net.UDPAddr) []byte { - if len(args) < 4 { - return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil) - } - pathLen := binary.BigEndian.Uint32(args[0:4]) - if pathLen > mountUDPMaxPathLen { - return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil) - } - padded := (pathLen + 3) &^ 3 - if uint32(len(args)) < 4+padded { - return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil) - } - dirpath := string(args[4 : 4+pathLen]) - requested := normalizeExportRoot(util.FullPath(dirpath)) - flavors := []uint32{authFlavorNull, authFlavorUnix} - - ctx, cancel := context.WithTimeout(context.Background(), mountUDPLookupTimeout) - defer cancel() - - // Exact match and outside-export both fall back to the synthetic root - // handle. Only the second case logs; the first is the common path. - if requested == m.server.exportRoot || !requested.IsUnder(m.server.exportRoot) { - if requested != m.server.exportRoot { - glog.V(0).Infof("mount udp: client %s requested %q (outside export %q); serving configured export", addr, dirpath, m.server.exportRoot) - } - if status := m.rootMountStatus(ctx); status != mnt3StatOK { - return encodeMountStatus(xid, status) - } - return encodeMountSuccess(xid, syntheticRootHandle(m.server), flavors) - } - fh, status := m.resolveSubexportFileHandle(ctx, requested) - if status != mnt3StatOK { - return encodeMountStatus(xid, status) - } - glog.V(1).Infof("mount udp: client %s requested %q under export %q; mounting at subdirectory", addr, dirpath, m.server.exportRoot) - return encodeMountSuccess(xid, fh, flavors) -} - -// rootMountStatus is the UDP analogue of Handler.lstatExportStatus: -// confirms the configured export root still exists in the filer so the -// transport-OK branches can't hand out a handle pointing at a deleted -// directory. Reuses the Server's shared rootFS instance so we don't -// construct a wrapper per MOUNT request. -func (m *mountUDPServer) rootMountStatus(ctx context.Context) uint32 { - if m.server.withInternalClient == nil { - return mnt3StatOK - } - switch _, err := m.server.rootFilesystem().fileInfoForVirtualPath(ctx, "/"); { - case err == nil: - return mnt3StatOK - case os.IsNotExist(err): - return mnt3ErrNoEnt - default: - glog.Errorf("mount udp: export root %q lookup failed: %v", m.server.exportRoot, err) - return mnt3ErrServerFault - } -} - -// resolveSubexportFileHandle is the UDP analogue of the sub-fs branch in -// Handler.resolveMountFilesystem. The TCP path lets go-nfs's onMount call -// ToHandle on the returned filesystem; UDP encodes the FH itself, so the -// inode/generation lookup happens explicitly here. -// -// The UDP listener is up before serve() runs newHandler(), so a subexport -// MOUNT can land here before sharedReaderCache has been assigned. Resolve -// the rootFS first to drive Server.rootFilesystem's sync.Once and read -// the cache directly off it, so the new sub-fs always shares the same -// reader cache the TCP path uses. -func (m *mountUDPServer) resolveSubexportFileHandle(ctx context.Context, requested util.FullPath) ([]byte, uint32) { - if m.server.withInternalClient == nil { - return nil, mnt3ErrServerFault - } - rootFS := m.server.rootFilesystem() - subFS := newSeaweedFileSystem(m.server, requested, rootFS.readerCache) - info, err := subFS.fileInfoForVirtualPath(ctx, "/") - switch { - case err == nil: - case os.IsNotExist(err): - return nil, mnt3ErrNoEnt - default: - glog.Errorf("mount udp: subexport lookup %q failed: %v", requested, err) - return nil, mnt3ErrServerFault - } - if !info.entry.IsDirectory { - return nil, mnt3ErrNotDir - } - inode := info.entry.GetAttributes().GetInode() - return NewFileHandle(m.server.exportID, FileHandleKindDirectory, inode, info.generation).Encode(), mnt3StatOK -} - -func syntheticRootHandle(s *Server) []byte { - return NewFileHandle(s.exportID, FileHandleKindDirectory, 0, filer.InodeIndexInitialGeneration).Encode() -} - -// encodeMountStatus returns a MOUNT MNT reply carrying just an error status. -// Per RFC 1813 §5.1.4 a non-OK status terminates the response — no handle or -// flavors follow. -func encodeMountStatus(xid, status uint32) []byte { - body := make([]byte, 4) - binary.BigEndian.PutUint32(body, status) - return encodeAcceptedReply(xid, rpcAcceptSuccess, body) -} - -// encodeMountSuccess builds the OK MOUNT MNT reply: status=OK, file handle -// (XDR opaque), and the supported auth_flavors list. -func encodeMountSuccess(xid uint32, handle []byte, flavors []uint32) []byte { - handleLen := uint32(len(handle)) - handlePadded := (handleLen + 3) &^ 3 - bodyLen := 4 + 4 + handlePadded + 4 + 4*uint32(len(flavors)) - - body := make([]byte, bodyLen) - binary.BigEndian.PutUint32(body[0:4], mnt3StatOK) - binary.BigEndian.PutUint32(body[4:8], handleLen) - copy(body[8:8+handleLen], handle) - // Trailing pad bytes are already zero from make(). - - pos := 8 + handlePadded - binary.BigEndian.PutUint32(body[pos:pos+4], uint32(len(flavors))) - pos += 4 - for _, fl := range flavors { - binary.BigEndian.PutUint32(body[pos:pos+4], fl) - pos += 4 - } - - return encodeAcceptedReply(xid, rpcAcceptSuccess, body) -} diff --git a/weed/server/nfs/mount_udp_test.go b/weed/server/nfs/mount_udp_test.go deleted file mode 100644 index cba441c76..000000000 --- a/weed/server/nfs/mount_udp_test.go +++ /dev/null @@ -1,431 +0,0 @@ -package nfs - -import ( - "context" - "encoding/binary" - "net" - "testing" - "time" - - "github.com/seaweedfs/seaweedfs/weed/filer" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - gonfs "github.com/willscott/go-nfs" -) - -// buildMountCallFrame constructs a MOUNT v3 RPC CALL with an opaque dirpath -// argument. The shape matches RFC 5531 §9: xid + msg_type=CALL + rpcvers=2 + -// prog + vers + proc + cred(AUTH_NONE) + verf(AUTH_NONE) + arg. -func buildMountCallFrame(xid, prog, vers, proc uint32, dirpath string) []byte { - // RPC CALL header (24 bytes) + 2 × AUTH_NONE opaque_auth (16 bytes) + - // dirpath as XDR opaque (4-byte length + padded body). - dpLen := uint32(len(dirpath)) - dpPadded := (dpLen + 3) &^ 3 - out := make([]byte, 24+16+4+dpPadded) - binary.BigEndian.PutUint32(out[0:4], xid) - binary.BigEndian.PutUint32(out[4:8], rpcMsgCall) - binary.BigEndian.PutUint32(out[8:12], 2) // rpcvers - binary.BigEndian.PutUint32(out[12:16], prog) - binary.BigEndian.PutUint32(out[16:20], vers) - binary.BigEndian.PutUint32(out[20:24], proc) - // cred + verf both AUTH_NONE / length 0 (already zero-filled). - binary.BigEndian.PutUint32(out[40:44], dpLen) - copy(out[44:44+dpLen], dirpath) - return out -} - -func newMountUDPTestServer(t *testing.T, exportPath string) (*mountUDPServer, *net.UDPConn) { - t.Helper() - return newMountUDPTestServerWithClient(t, exportPath, nil) -} - -// newMountUDPTestServerWithClient wires Server.withInternalClient when -// client is non-nil, so the under-export lookup branch in handleMount -// can find directory entries. -func newMountUDPTestServerWithClient(t *testing.T, exportPath string, client *fakeNFSFilerClient) (*mountUDPServer, *net.UDPConn) { - t.Helper() - - exportRoot := normalizeExportRoot(util.FullPath(exportPath)) - authz, err := newClientAuthorizer(nil) - if err != nil { - t.Fatal(err) - } - srv := &Server{ - option: &Option{}, - exportRoot: exportRoot, - exportID: exportIDForRoot(exportRoot), - clientAuthorizer: authz, - } - if client != nil { - srv.withInternalClient = func(_ bool, fn func(nfsFilerClient) error) error { - return fn(client) - } - } - - udpAddr, err := net.ResolveUDPAddr("udp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - conn, err := net.ListenUDP("udp", udpAddr) - if err != nil { - t.Fatal(err) - } - - m := &mountUDPServer{ - bindIP: "127.0.0.1", - port: conn.LocalAddr().(*net.UDPAddr).Port, - server: srv, - udpConn: conn, - done: make(chan struct{}), - } - m.wg.Add(1) - go func() { - defer m.wg.Done() - m.serve() - }() - t.Cleanup(func() { - _ = m.Close() - }) - return m, conn -} - -func sendMountUDP(t *testing.T, target *net.UDPAddr, payload []byte) []byte { - t.Helper() - c, err := net.DialUDP("udp", nil, target) - if err != nil { - t.Fatal(err) - } - defer c.Close() - if _, err := c.Write(payload); err != nil { - t.Fatal(err) - } - _ = c.SetReadDeadline(time.Now().Add(2 * time.Second)) - buf := make([]byte, 4096) - n, err := c.Read(buf) - if err != nil { - t.Fatalf("read reply: %v", err) - } - return buf[:n] -} - -// parseRPCReply pulls xid, accept_stat, and the body that follows accept_stat -// out of a MSG_ACCEPTED reply. Unlike the TCP path there is no fragment -// marker — the entire UDP datagram is the reply. -func parseRPCReply(t *testing.T, reply []byte) (xid, acceptStat uint32, body []byte) { - t.Helper() - if len(reply) < 24 { - t.Fatalf("reply too short: %d bytes", len(reply)) - } - xid = binary.BigEndian.Uint32(reply[0:4]) - if mt := binary.BigEndian.Uint32(reply[4:8]); mt != rpcMsgReply { - t.Fatalf("msg_type=%d want REPLY(1)", mt) - } - if rs := binary.BigEndian.Uint32(reply[8:12]); rs != rpcMsgAccepted { - t.Fatalf("reply_stat=%d want MSG_ACCEPTED(0)", rs) - } - acceptStat = binary.BigEndian.Uint32(reply[20:24]) - body = reply[24:] - return -} - -func TestMountUDPNullReturnsSuccess(t *testing.T) { - m, conn := newMountUDPTestServer(t, "/exports") - target := conn.LocalAddr().(*net.UDPAddr) - - reply := sendMountUDP(t, target, buildMountCallFrame(7, mountProgram, 3, mountProcNull, "")) - xid, astat, body := parseRPCReply(t, reply) - - if xid != 7 { - t.Errorf("xid=%d want 7", xid) - } - if astat != rpcAcceptSuccess { - t.Errorf("accept_stat=%d want SUCCESS(0)", astat) - } - if len(body) != 0 { - t.Errorf("NULL reply body should be empty, got %d bytes", len(body)) - } - _ = m -} - -func TestMountUDPMntReturnsHandleAndFlavors(t *testing.T) { - m, conn := newMountUDPTestServer(t, "/exports") - target := conn.LocalAddr().(*net.UDPAddr) - - reply := sendMountUDP(t, target, buildMountCallFrame(42, mountProgram, 3, mountProcMnt, "/exports")) - xid, astat, body := parseRPCReply(t, reply) - - if xid != 42 { - t.Errorf("xid=%d want 42", xid) - } - if astat != rpcAcceptSuccess { - t.Fatalf("accept_stat=%d want SUCCESS(0)", astat) - } - if len(body) < 4 { - t.Fatalf("body too short: %d bytes", len(body)) - } - status := binary.BigEndian.Uint32(body[0:4]) - if status != mnt3StatOK { - t.Fatalf("mountstat3=%d want OK(0)", status) - } - - // fhandle3: uint32 length + padded opaque bytes. - if len(body) < 8 { - t.Fatalf("body missing handle length: %d bytes", len(body)) - } - handleLen := binary.BigEndian.Uint32(body[4:8]) - handlePadded := (handleLen + 3) &^ 3 - if uint32(len(body)) < 8+handlePadded+4 { - t.Fatalf("body truncated: have %d, need at least %d", len(body), 8+handlePadded+4) - } - handle := body[8 : 8+handleLen] - if _, err := DecodeFileHandle(handle); err != nil { - t.Fatalf("returned handle does not decode: %v", err) - } - - flavorOff := 8 + handlePadded - count := binary.BigEndian.Uint32(body[flavorOff : flavorOff+4]) - if count != 2 { - t.Errorf("flavor count=%d want 2 (NULL + UNIX)", count) - } - got := []uint32{ - binary.BigEndian.Uint32(body[flavorOff+4 : flavorOff+8]), - binary.BigEndian.Uint32(body[flavorOff+8 : flavorOff+12]), - } - if got[0] != authFlavorNull || got[1] != authFlavorUnix { - t.Errorf("flavors=%v want [%d %d]", got, authFlavorNull, authFlavorUnix) - } - _ = m -} - -func TestMountUDPMntAcceptsAnyPath(t *testing.T) { - const exportRoot = "/buckets/data" - - _, conn := newMountUDPTestServer(t, exportRoot) - target := conn.LocalAddr().(*net.UDPAddr) - - dirpaths := []string{ - "/", - "/buckets", - "/buckets/other", - "/wrong/path", - "", - "buckets/data", - exportRoot, - exportRoot + "/", - } - for i, dirpath := range dirpaths { - t.Run(dirpath, func(t *testing.T) { - xid := uint32(1000 + i) - reply := sendMountUDP(t, target, buildMountCallFrame(xid, mountProgram, 3, mountProcMnt, dirpath)) - _, astat, body := parseRPCReply(t, reply) - if astat != rpcAcceptSuccess { - t.Fatalf("accept_stat=%d want SUCCESS(0)", astat) - } - if len(body) < 4 { - t.Fatalf("body too short: %d bytes", len(body)) - } - if got := binary.BigEndian.Uint32(body[0:4]); got != mnt3StatOK { - t.Errorf("MNT(%q): mountstat3=%d want OK(0)", dirpath, got) - } - if len(body) <= 4 { - t.Errorf("MNT(%q) success body must include handle and flavors", dirpath) - } - }) - } -} - -func TestMountUDPSubexportMount(t *testing.T) { - const exportRoot = "/buckets" - - client := &fakeNFSFilerClient{ - entries: map[util.FullPath]*filer_pb.Entry{ - "/buckets": testEntry("buckets", true, 100, uint32(0755), nil), - "/buckets/data": testEntry("data", true, 101, uint32(0755), nil), - "/buckets/data/nested": testEntry("nested", true, 102, uint32(0755), nil), - "/buckets/file.txt": testEntry("file.txt", false, 103, uint32(0644), []byte("hi")), - }, - kv: map[string][]byte{ - string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"), - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"), - string(filer.InodeIndexKey(102)): testIndexRecord(t, 102, 1, "/buckets/data/nested"), - string(filer.InodeIndexKey(103)): testIndexRecord(t, 103, 1, "/buckets/file.txt"), - }, - } - - m, conn := newMountUDPTestServerWithClient(t, exportRoot, client) - target := conn.LocalAddr().(*net.UDPAddr) - - // Build a TCP Handler from the same Server so we can compare the - // raw FH bytes both transports produce for the same subdirectory. - tcpHandler, err := m.server.newHandler() - require.NoError(t, err) - - cases := []struct { - name string - dirpath string - wantStatus uint32 - wantInode uint64 - }{ - {name: "subdirectory_one_level", dirpath: "/buckets/data", wantStatus: mnt3StatOK, wantInode: 101}, - {name: "subdirectory_two_levels", dirpath: "/buckets/data/nested", wantStatus: mnt3StatOK, wantInode: 102}, - {name: "subdirectory_trailing_slash", dirpath: "/buckets/data/", wantStatus: mnt3StatOK, wantInode: 101}, - {name: "missing_under_export", dirpath: "/buckets/missing", wantStatus: mnt3ErrNoEnt}, - {name: "deep_missing_under_export", dirpath: "/buckets/data/no-such-thing", wantStatus: mnt3ErrNoEnt}, - {name: "regular_file_not_directory", dirpath: "/buckets/file.txt", wantStatus: mnt3ErrNotDir}, - } - for i, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - xid := uint32(2000 + i) - reply := sendMountUDP(t, target, buildMountCallFrame(xid, mountProgram, 3, mountProcMnt, tc.dirpath)) - _, astat, body := parseRPCReply(t, reply) - if astat != rpcAcceptSuccess { - t.Fatalf("accept_stat=%d want SUCCESS(0)", astat) - } - if len(body) < 4 { - t.Fatalf("body too short: %d bytes", len(body)) - } - got := binary.BigEndian.Uint32(body[0:4]) - if got != tc.wantStatus { - t.Fatalf("MNT(%q) status=%d want %d", tc.dirpath, got, tc.wantStatus) - } - if tc.wantStatus != mnt3StatOK { - if len(body) != 4 { - t.Errorf("MNT(%q) error body should carry only the status; got %d trailing bytes", tc.dirpath, len(body)-4) - } - return - } - if len(body) < 8 { - t.Fatalf("MNT(%q) success body missing handle length", tc.dirpath) - } - handleLen := binary.BigEndian.Uint32(body[4:8]) - if uint32(len(body)) < 8+handleLen { - t.Fatalf("MNT(%q) success body truncated", tc.dirpath) - } - udpHandleBytes := body[8 : 8+handleLen] - handle, err := DecodeFileHandle(udpHandleBytes) - if err != nil { - t.Fatalf("MNT(%q) handle decode: %v", tc.dirpath, err) - } - if handle.Inode != tc.wantInode { - t.Errorf("MNT(%q) FH inode=%d want %d", tc.dirpath, handle.Inode, tc.wantInode) - } - if handle.Kind != FileHandleKindDirectory { - t.Errorf("MNT(%q) FH kind=%d want directory", tc.dirpath, handle.Kind) - } - - // Transport parity: drive the TCP Handler with the same dirpath - // and confirm the bytes go-nfs's onMount would write match the - // UDP responder's bytes exactly. A regression that drifts the - // generation, exportID, or kind on one transport would fail here. - tcpStatus, tcpFS, _ := tcpHandler.Mount(context.Background(), nil, gonfs.MountRequest{Dirpath: []byte(tc.dirpath)}) - require.Equal(t, gonfs.MountStatusOk, tcpStatus, "TCP Mount(%q)", tc.dirpath) - tcpHandleBytes := tcpHandler.ToHandle(tcpFS, nil) - require.NotEmpty(t, tcpHandleBytes, "TCP Mount(%q) ToHandle returned empty", tc.dirpath) - assert.Equal(t, tcpHandleBytes, udpHandleBytes, "TCP/UDP FH bytes diverge for %q", tc.dirpath) - }) - } -} - -func TestMountUDPRejectsWrongVersion(t *testing.T) { - // Same defence-in-depth as the TCP version filter: don't speak v1/v4 - // MOUNT — return PROG_MISMATCH advertising 3..3 so the client knows - // to retry with v3. - _, conn := newMountUDPTestServer(t, "/exports") - target := conn.LocalAddr().(*net.UDPAddr) - - reply := sendMountUDP(t, target, buildMountCallFrame(1, mountProgram, 4, mountProcNull, "")) - _, astat, body := parseRPCReply(t, reply) - - if astat != rpcAcceptProgMismatch { - t.Fatalf("accept_stat=%d want PROG_MISMATCH(2)", astat) - } - if len(body) != 8 { - t.Fatalf("PROG_MISMATCH body=%d bytes want 8", len(body)) - } - low := binary.BigEndian.Uint32(body[0:4]) - high := binary.BigEndian.Uint32(body[4:8]) - if low != 3 || high != 3 { - t.Errorf("supported range=(%d,%d) want (3,3)", low, high) - } -} - -func TestMountUDPRejectsWrongProgram(t *testing.T) { - _, conn := newMountUDPTestServer(t, "/exports") - target := conn.LocalAddr().(*net.UDPAddr) - - // 100021 is NLM, which we don't run here. - reply := sendMountUDP(t, target, buildMountCallFrame(1, 100021, 4, mountProcNull, "")) - _, astat, _ := parseRPCReply(t, reply) - if astat != rpcAcceptProgUnavail { - t.Errorf("accept_stat=%d want PROG_UNAVAIL(1)", astat) - } -} - -func TestMountUDPUmntAcknowledges(t *testing.T) { - _, conn := newMountUDPTestServer(t, "/exports") - target := conn.LocalAddr().(*net.UDPAddr) - - // UMNT carries a dirpath but the server is stateless and ignores it. - reply := sendMountUDP(t, target, buildMountCallFrame(8, mountProgram, 3, mountProcUmnt, "/exports")) - _, astat, body := parseRPCReply(t, reply) - if astat != rpcAcceptSuccess { - t.Errorf("accept_stat=%d want SUCCESS(0)", astat) - } - if len(body) != 0 { - t.Errorf("UMNT reply body should be empty, got %d bytes", len(body)) - } -} - -func TestMountUDPRejectsTruncatedMntArgs(t *testing.T) { - _, conn := newMountUDPTestServer(t, "/exports") - target := conn.LocalAddr().(*net.UDPAddr) - - // Hand-craft an MNT call whose dirpath length field claims 16 bytes - // but no body follows. Using buildMountCallFrame would also emit a - // trailing length=0 from the empty-string default; we need exactly - // "length, no body" so the GARBAGE_ARGS path actually fires. - frame := make([]byte, 24+16+4) // header + auth + 4-byte length only - binary.BigEndian.PutUint32(frame[0:4], 1) // xid - binary.BigEndian.PutUint32(frame[4:8], rpcMsgCall) // msg_type - binary.BigEndian.PutUint32(frame[8:12], 2) // rpcvers - binary.BigEndian.PutUint32(frame[12:16], mountProgram) - binary.BigEndian.PutUint32(frame[16:20], 3) // mount vers - binary.BigEndian.PutUint32(frame[20:24], mountProcMnt) - // auth = two AUTH_NONE / length-0 stanzas (already zero from make). - binary.BigEndian.PutUint32(frame[40:44], 16) // dirpath length=16, no bytes follow - reply := sendMountUDP(t, target, frame) - _, astat, _ := parseRPCReply(t, reply) - if astat != rpcAcceptGarbageArgs { - t.Errorf("accept_stat=%d want GARBAGE_ARGS(4)", astat) - } -} - -func TestMountUDPCloseStopsServing(t *testing.T) { - m, conn := newMountUDPTestServer(t, "/exports") - target := conn.LocalAddr().(*net.UDPAddr) - - // Sanity: NULL works before close. - _ = sendMountUDP(t, target, buildMountCallFrame(1, mountProgram, 3, mountProcNull, "")) - - if err := m.Close(); err != nil { - t.Fatalf("Close: %v", err) - } - // After Close the socket is shut, so a fresh send should fail to - // read a reply within the deadline rather than producing a - // well-formed response. - c, err := net.DialUDP("udp", nil, target) - if err != nil { - // Some platforms refuse the dial outright after Close — that's - // also acceptable: the server is gone either way. - return - } - defer c.Close() - _, _ = c.Write(buildMountCallFrame(2, mountProgram, 3, mountProcNull, "")) - _ = c.SetReadDeadline(time.Now().Add(200 * time.Millisecond)) - buf := make([]byte, 1024) - if _, err := c.Read(buf); err == nil { - t.Error("Close should have stopped the responder, but a reply still arrived") - } -} diff --git a/weed/server/nfs/portmap.go b/weed/server/nfs/portmap.go deleted file mode 100644 index 45c715fef..000000000 --- a/weed/server/nfs/portmap.go +++ /dev/null @@ -1,447 +0,0 @@ -package nfs - -import ( - "encoding/binary" - "errors" - "fmt" - "io" - "net" - "sync" - "time" - - "github.com/seaweedfs/seaweedfs/weed/glog" -) - -// Minimal PORTMAP v2 responder. -// -// The upstream willscott/go-nfs library serves NFSv3 and MOUNT on a single TCP -// port and deliberately does not register with portmap (RPC program 100000). -// Linux mount.nfs, however, queries portmap on port 111 before sending the -// MOUNT RPC, so the plain `mount -t nfs host:/export /mnt` command fails -// against a default `weed nfs` deployment. -// -// When enabled, this responder binds the privileged port 111 (RFC 1833) on -// both TCP and UDP and answers the subset of PORTMAP v2 calls that standard -// Linux clients make: PMAP_NULL, PMAP_GETPORT and PMAP_DUMP. It refuses -// registration from third parties (PMAP_SET / PMAP_UNSET return false) and -// only exposes the programs that weed itself serves. -// -// References: RFC 1833 (Portmap v2), RFC 5531 (RPC). -const ( - portmapProgram = 100000 - portmapVersion = 2 - portmapPort = 111 - - pmapProcNull = 0 - pmapProcSet = 1 - pmapProcUnset = 2 - pmapProcGetPort = 3 - pmapProcDump = 4 - - ipProtoTCP = 6 - ipProtoUDP = 17 - - nfsProgram = 100003 - mountProgram = 100005 - - // RPC - rpcMsgCall = 0 - rpcMsgReply = 1 - - rpcMsgAccepted = 0 - - rpcAcceptSuccess = 0 - rpcAcceptProgUnavail = 1 - rpcAcceptProgMismatch = 2 - rpcAcceptProcUnavail = 3 - rpcAcceptGarbageArgs = 4 - - rpcAuthNone = 0 - - // Defensive limits. Portmap messages are tiny in practice; these caps - // protect the responder from large or slow reads. - portmapMaxRecord = 64 * 1024 - - // Per-connection read/write deadlines on the TCP listener. The idle - // timeout bounds how long we wait for the next request on an otherwise - // quiet connection; the IO timeout bounds a single read or write once - // one is in flight. Both guard against slowloris-style stalls on the - // privileged port 111. - portmapTCPIdleTimeout = 30 * time.Second - portmapTCPIOTimeout = 10 * time.Second - - // Back-off applied before retrying after a non-fatal listener error - // (e.g. EMFILE on TCP Accept, or a transient UDP read failure) so we - // don't busy-loop when the host is under pressure. - portmapRetryBackoff = 50 * time.Millisecond -) - -type portmapEntry struct { - Program uint32 - Version uint32 - Protocol uint32 - Port uint32 -} - -type portmapServer struct { - bindIP string - port int - entries []portmapEntry - - tcpListener net.Listener - udpConn *net.UDPConn - - // mu guards closed and conns. It is held only for bookkeeping, never - // across network IO. - mu sync.Mutex - closed bool - conns map[net.Conn]struct{} - // done is closed exactly once by Close() so that background loops can - // interrupt a retry-backoff sleep instead of waiting it out. - done chan struct{} - wg sync.WaitGroup -} - -// newPortmapServer builds a responder advertising the NFS services the caller -// runs on nfsPort. NFS itself is TCP-only here (the upstream go-nfs library -// doesn't speak NFS UDP). MOUNT, however, is served over both TCP (via -// go-nfs) and UDP (via mountUDPServer in mount_udp.go), so we advertise -// both — that's what makes plain `mount -t nfs : /mnt` work -// against Linux clients whose default mountproto is UDP without needing -// mountproto=tcp / mountport=2049 mount options. -func newPortmapServer(bindIP string, port int, nfsPort uint32) *portmapServer { - if port <= 0 { - port = portmapPort - } - return &portmapServer{ - bindIP: bindIP, - port: port, - done: make(chan struct{}), - entries: []portmapEntry{ - {Program: nfsProgram, Version: 3, Protocol: ipProtoTCP, Port: nfsPort}, - {Program: mountProgram, Version: 3, Protocol: ipProtoTCP, Port: nfsPort}, - {Program: mountProgram, Version: 3, Protocol: ipProtoUDP, Port: nfsPort}, - }, - } -} - -func (ps *portmapServer) Start() error { - addr := net.JoinHostPort(ps.bindIP, fmt.Sprintf("%d", ps.port)) - - tcpLn, err := net.Listen("tcp", addr) - if err != nil { - return fmt.Errorf("portmap tcp listen %s: %w", addr, err) - } - udpAddr, err := net.ResolveUDPAddr("udp", addr) - if err != nil { - _ = tcpLn.Close() - return fmt.Errorf("portmap udp resolve %s: %w", addr, err) - } - udpConn, err := net.ListenUDP("udp", udpAddr) - if err != nil { - _ = tcpLn.Close() - return fmt.Errorf("portmap udp listen %s: %w", addr, err) - } - ps.tcpListener = tcpLn - ps.udpConn = udpConn - - ps.wg.Add(2) - go func() { - defer ps.wg.Done() - ps.serveTCP() - }() - go func() { - defer ps.wg.Done() - ps.serveUDP() - }() - return nil -} - -func (ps *portmapServer) Close() error { - ps.mu.Lock() - if ps.closed { - ps.mu.Unlock() - return nil - } - ps.closed = true - conns := ps.conns - ps.conns = nil - close(ps.done) - ps.mu.Unlock() - - var first error - if ps.tcpListener != nil { - if err := ps.tcpListener.Close(); err != nil { - first = err - } - } - if ps.udpConn != nil { - if err := ps.udpConn.Close(); err != nil && first == nil { - first = err - } - } - // Evict in-flight TCP handlers so Close() does not block on idle - // clients; their read goroutines will unwind on the closed conn. - for c := range conns { - _ = c.Close() - } - ps.wg.Wait() - return first -} - -func (ps *portmapServer) isClosed() bool { - ps.mu.Lock() - defer ps.mu.Unlock() - return ps.closed -} - -// addConn registers c for shutdown eviction. It returns false (and the -// caller must drop c) if the server has already started shutting down. -func (ps *portmapServer) addConn(c net.Conn) bool { - ps.mu.Lock() - defer ps.mu.Unlock() - if ps.closed { - return false - } - if ps.conns == nil { - ps.conns = make(map[net.Conn]struct{}) - } - ps.conns[c] = struct{}{} - return true -} - -func (ps *portmapServer) removeConn(c net.Conn) { - ps.mu.Lock() - defer ps.mu.Unlock() - delete(ps.conns, c) -} - -func (ps *portmapServer) serveTCP() { - for { - conn, err := ps.tcpListener.Accept() - if err != nil { - if ps.isClosed() { - return - } - // Non-fatal (e.g. EMFILE, EINTR): log and back off rather - // than tear the listener down on a transient resource blip. - // Wake early if Close() fires during the sleep. - glog.V(1).Infof("portmap tcp accept: %v", err) - select { - case <-ps.done: - return - case <-time.After(portmapRetryBackoff): - continue - } - } - if !ps.addConn(conn) { - _ = conn.Close() - continue - } - ps.wg.Add(1) - go func(c net.Conn) { - defer ps.wg.Done() - defer ps.removeConn(c) - ps.handleTCPConn(c) - }(conn) - } -} - -func (ps *portmapServer) handleTCPConn(conn net.Conn) { - defer conn.Close() - hdr := make([]byte, 4) - for { - _ = conn.SetReadDeadline(time.Now().Add(portmapTCPIdleTimeout)) - if _, err := io.ReadFull(conn, hdr); err != nil { - return - } - mark := binary.BigEndian.Uint32(hdr) - // Bit 31: last-fragment flag. Portmap messages are always single - // fragment in practice; drop the connection if we see otherwise. - if mark&(1<<31) == 0 { - return - } - recLen := mark &^ (1 << 31) - if recLen == 0 || recLen > portmapMaxRecord { - return - } - buf := make([]byte, recLen) - _ = conn.SetReadDeadline(time.Now().Add(portmapTCPIOTimeout)) - if _, err := io.ReadFull(conn, buf); err != nil { - return - } - reply := ps.handleCall(buf) - if reply == nil { - continue - } - out := make([]byte, 4+len(reply)) - binary.BigEndian.PutUint32(out[0:4], uint32(len(reply))|(1<<31)) - copy(out[4:], reply) - _ = conn.SetWriteDeadline(time.Now().Add(portmapTCPIOTimeout)) - if _, err := conn.Write(out); err != nil { - return - } - } -} - -func (ps *portmapServer) serveUDP() { - buf := make([]byte, portmapMaxRecord) - for { - n, addr, err := ps.udpConn.ReadFromUDP(buf) - if err != nil { - if ps.isClosed() { - return - } - // Transient read failure: log, back off, and keep the - // responder alive instead of taking UDP portmap down. - // Wake early if Close() fires during the sleep. - glog.V(1).Infof("portmap udp read: %v", err) - select { - case <-ps.done: - return - case <-time.After(portmapRetryBackoff): - continue - } - } - reply := ps.handleCall(buf[:n]) - if reply == nil { - continue - } - if _, err := ps.udpConn.WriteToUDP(reply, addr); err != nil { - glog.V(1).Infof("portmap udp write to %s: %v", addr, err) - } - } -} - -// handleCall parses one RPC CALL message and returns the encoded reply, or nil -// if the call is malformed enough that we should drop it silently. -func (ps *portmapServer) handleCall(callBuf []byte) []byte { - xid, prog, vers, proc, args, err := parseRPCCall(callBuf) - if err != nil { - return nil - } - if prog != portmapProgram { - return encodeAcceptedReply(xid, rpcAcceptProgUnavail, nil) - } - if vers != portmapVersion { - // Program-version mismatch: RFC 5531 says we should return the - // accepted range; keep it simple and report 2..2. - body := make([]byte, 8) - binary.BigEndian.PutUint32(body[0:4], portmapVersion) - binary.BigEndian.PutUint32(body[4:8], portmapVersion) - return encodeAcceptedReply(xid, rpcAcceptProgMismatch, body) - } - switch proc { - case pmapProcNull: - return encodeAcceptedReply(xid, rpcAcceptSuccess, nil) - case pmapProcGetPort: - if len(args) < 16 { - return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil) - } - q := portmapEntry{ - Program: binary.BigEndian.Uint32(args[0:4]), - Version: binary.BigEndian.Uint32(args[4:8]), - Protocol: binary.BigEndian.Uint32(args[8:12]), - } - port := uint32(0) - for _, e := range ps.entries { - if e.Program == q.Program && e.Version == q.Version && e.Protocol == q.Protocol { - port = e.Port - break - } - } - body := make([]byte, 4) - binary.BigEndian.PutUint32(body, port) - return encodeAcceptedReply(xid, rpcAcceptSuccess, body) - case pmapProcDump: - // Each entry is 4-byte value_follows + 16-byte mapping = 20 bytes, - // plus a 4-byte terminator value_follows=FALSE. - body := make([]byte, 0, 20*len(ps.entries)+4) - for _, e := range ps.entries { - chunk := make([]byte, 20) - binary.BigEndian.PutUint32(chunk[0:4], 1) // value_follows = TRUE - binary.BigEndian.PutUint32(chunk[4:8], e.Program) - binary.BigEndian.PutUint32(chunk[8:12], e.Version) - binary.BigEndian.PutUint32(chunk[12:16], e.Protocol) - binary.BigEndian.PutUint32(chunk[16:20], e.Port) - body = append(body, chunk...) - } - end := make([]byte, 4) // value_follows = FALSE - body = append(body, end...) - return encodeAcceptedReply(xid, rpcAcceptSuccess, body) - case pmapProcSet, pmapProcUnset: - // Don't accept third-party registrations. bool=FALSE. - body := make([]byte, 4) - return encodeAcceptedReply(xid, rpcAcceptSuccess, body) - default: - return encodeAcceptedReply(xid, rpcAcceptProcUnavail, nil) - } -} - -// parseRPCCall parses the fixed portion of an RPC CALL header and returns the -// remaining procedure arguments. It skips both opaque_auth fields (cred and -// verf) so callers get a buffer starting at the procedure arguments. -func parseRPCCall(buf []byte) (xid, prog, vers, proc uint32, args []byte, err error) { - // Minimum header: xid + msg_type + rpcvers + prog + vers + proc + 2x - // (flavor + len) = 6*4 + 2*8 = 40 bytes. - const minHeader = 40 - if len(buf) < minHeader { - err = fmt.Errorf("rpc call too short: %d bytes", len(buf)) - return - } - xid = binary.BigEndian.Uint32(buf[0:4]) - if msgType := binary.BigEndian.Uint32(buf[4:8]); msgType != rpcMsgCall { - err = fmt.Errorf("not an rpc call: msg_type=%d", msgType) - return - } - if rpcvers := binary.BigEndian.Uint32(buf[8:12]); rpcvers != 2 { - err = fmt.Errorf("unsupported rpc version %d", rpcvers) - return - } - prog = binary.BigEndian.Uint32(buf[12:16]) - vers = binary.BigEndian.Uint32(buf[16:20]) - proc = binary.BigEndian.Uint32(buf[20:24]) - - p := 24 - for i := 0; i < 2; i++ { - if len(buf) < p+8 { - err = fmt.Errorf("truncated opaque_auth at offset %d", p) - return - } - authLen := binary.BigEndian.Uint32(buf[p+4 : p+8]) - // Validate before applying the XDR 4-byte padding so that - // lengths near uint32 max can't wrap to a tiny padded value. - if authLen > uint32(portmapMaxRecord) { - err = errors.New("opaque_auth length exceeds limit") - return - } - padded := (authLen + 3) &^ 3 - end := uint64(p) + 8 + uint64(padded) - if end > uint64(len(buf)) { - err = fmt.Errorf("truncated opaque_auth body at offset %d (len=%d)", p, authLen) - return - } - p = int(end) - } - args = buf[p:] - return -} - -// encodeAcceptedReply builds a MSG_ACCEPTED reply with the given accept_stat. -// body is the already-XDR-encoded data that follows accept_stat in the reply. -// For SUCCESS it is the procedure result; it is nil for most error -// accept_stat values (PROG_UNAVAIL, PROC_UNAVAIL, GARBAGE_ARGS) but is -// non-nil for PROG_MISMATCH, which carries a struct { uint32 low; uint32 -// high; } mismatch_info range per RFC 5531 §9. -func encodeAcceptedReply(xid, acceptStat uint32, body []byte) []byte { - out := make([]byte, 24+len(body)) - binary.BigEndian.PutUint32(out[0:4], xid) - binary.BigEndian.PutUint32(out[4:8], rpcMsgReply) - binary.BigEndian.PutUint32(out[8:12], rpcMsgAccepted) - // verf: AUTH_NONE, zero-length opaque - binary.BigEndian.PutUint32(out[12:16], rpcAuthNone) - binary.BigEndian.PutUint32(out[16:20], 0) - binary.BigEndian.PutUint32(out[20:24], acceptStat) - copy(out[24:], body) - return out -} diff --git a/weed/server/nfs/portmap_test.go b/weed/server/nfs/portmap_test.go deleted file mode 100644 index cc3ec8df5..000000000 --- a/weed/server/nfs/portmap_test.go +++ /dev/null @@ -1,418 +0,0 @@ -package nfs - -import ( - "bytes" - "encoding/binary" - "io" - "net" - "strconv" - "testing" - "time" -) - -func buildRPCCall(t *testing.T, xid, prog, vers, proc uint32, credBody, verfBody, args []byte) []byte { - t.Helper() - pad := func(b []byte) []byte { - r := len(b) % 4 - if r == 0 { - return b - } - out := make([]byte, len(b)+4-r) - copy(out, b) - return out - } - buf := new(bytes.Buffer) - write := func(v uint32) { - var b [4]byte - binary.BigEndian.PutUint32(b[:], v) - buf.Write(b[:]) - } - write(xid) - write(rpcMsgCall) - write(2) // rpcvers - write(prog) - write(vers) - write(proc) - // cred - write(rpcAuthNone) - write(uint32(len(credBody))) - buf.Write(pad(credBody)) - // verf - write(rpcAuthNone) - write(uint32(len(verfBody))) - buf.Write(pad(verfBody)) - buf.Write(args) - return buf.Bytes() -} - -func parseAcceptedReply(t *testing.T, reply []byte) (xid, acceptStat uint32, body []byte) { - t.Helper() - if len(reply) < 24 { - t.Fatalf("reply too short: %d bytes", len(reply)) - } - xid = binary.BigEndian.Uint32(reply[0:4]) - if mt := binary.BigEndian.Uint32(reply[4:8]); mt != rpcMsgReply { - t.Fatalf("msg_type=%d, want REPLY", mt) - } - if rs := binary.BigEndian.Uint32(reply[8:12]); rs != rpcMsgAccepted { - t.Fatalf("reply_stat=%d, want ACCEPTED", rs) - } - // verf - verfLen := binary.BigEndian.Uint32(reply[16:20]) - if verfLen != 0 { - t.Fatalf("unexpected verf length %d", verfLen) - } - acceptStat = binary.BigEndian.Uint32(reply[20:24]) - body = reply[24:] - return -} - -func newTestPortmap() *portmapServer { - return newPortmapServer("127.0.0.1", portmapPort, 2049) -} - -func TestParseRPCCall_SkipsAuth(t *testing.T) { - cred := []byte("hello") // 5 bytes -> padded to 8 - verf := []byte{} - args := []byte{0x01, 0x02, 0x03, 0x04} - msg := buildRPCCall(t, 42, portmapProgram, portmapVersion, pmapProcNull, cred, verf, args) - - xid, prog, vers, proc, gotArgs, err := parseRPCCall(msg) - if err != nil { - t.Fatalf("parseRPCCall: %v", err) - } - if xid != 42 || prog != portmapProgram || vers != portmapVersion || proc != pmapProcNull { - t.Fatalf("header mismatch: xid=%d prog=%d vers=%d proc=%d", xid, prog, vers, proc) - } - if !bytes.Equal(gotArgs, args) { - t.Fatalf("args mismatch: got %x want %x", gotArgs, args) - } -} - -func TestParseRPCCall_RejectsReply(t *testing.T) { - buf := make([]byte, 40) - binary.BigEndian.PutUint32(buf[4:8], rpcMsgReply) - if _, _, _, _, _, err := parseRPCCall(buf); err == nil { - t.Fatal("expected error on reply-typed message") - } -} - -func TestParseRPCCall_TruncatedAuth(t *testing.T) { - // Claim huge cred length but provide no body. - buf := make([]byte, 40) - binary.BigEndian.PutUint32(buf[4:8], rpcMsgCall) - binary.BigEndian.PutUint32(buf[8:12], 2) - binary.BigEndian.PutUint32(buf[28:32], 1000) // cred len - if _, _, _, _, _, err := parseRPCCall(buf); err == nil { - t.Fatal("expected error on truncated auth") - } -} - -func TestHandleCall_Null(t *testing.T) { - ps := newTestPortmap() - msg := buildRPCCall(t, 7, portmapProgram, portmapVersion, pmapProcNull, nil, nil, nil) - reply := ps.handleCall(msg) - xid, acc, body := parseAcceptedReply(t, reply) - if xid != 7 || acc != rpcAcceptSuccess || len(body) != 0 { - t.Fatalf("null reply xid=%d acc=%d body=%x", xid, acc, body) - } -} - -func TestHandleCall_GetPort_HitAndMiss(t *testing.T) { - ps := newTestPortmap() - - buildQuery := func(prog, vers, prot uint32) []byte { - args := make([]byte, 16) - binary.BigEndian.PutUint32(args[0:4], prog) - binary.BigEndian.PutUint32(args[4:8], vers) - binary.BigEndian.PutUint32(args[8:12], prot) - // port field is ignored by the server; leave zero - return args - } - - cases := []struct { - name string - prog, vers, prot uint32 - wantPort uint32 - }{ - {"nfs-v3-tcp-hit", nfsProgram, 3, ipProtoTCP, 2049}, - {"mount-v3-tcp-hit", mountProgram, 3, ipProtoTCP, 2049}, - {"mount-v3-udp-hit", mountProgram, 3, ipProtoUDP, 2049}, - {"mount-v1-tcp-miss", mountProgram, 1, ipProtoTCP, 0}, - {"nfs-v3-udp-miss", nfsProgram, 3, ipProtoUDP, 0}, - {"nlm-miss", 100021, 4, ipProtoTCP, 0}, - } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - msg := buildRPCCall(t, 11, portmapProgram, portmapVersion, pmapProcGetPort, nil, nil, buildQuery(tc.prog, tc.vers, tc.prot)) - reply := ps.handleCall(msg) - xid, acc, body := parseAcceptedReply(t, reply) - if xid != 11 { - t.Fatalf("xid=%d want 11", xid) - } - if acc != rpcAcceptSuccess { - t.Fatalf("acc=%d want SUCCESS", acc) - } - if len(body) != 4 { - t.Fatalf("getport body len=%d want 4", len(body)) - } - got := binary.BigEndian.Uint32(body) - if got != tc.wantPort { - t.Fatalf("port=%d want %d", got, tc.wantPort) - } - }) - } -} - -func TestHandleCall_Dump(t *testing.T) { - ps := newTestPortmap() - msg := buildRPCCall(t, 13, portmapProgram, portmapVersion, pmapProcDump, nil, nil, nil) - reply := ps.handleCall(msg) - _, acc, body := parseAcceptedReply(t, reply) - if acc != rpcAcceptSuccess { - t.Fatalf("acc=%d", acc) - } - var entries []portmapEntry - p := 0 - for p+4 <= len(body) { - vf := binary.BigEndian.Uint32(body[p : p+4]) - p += 4 - if vf == 0 { - break - } - if p+16 > len(body) { - t.Fatalf("truncated entry at %d", p) - } - entries = append(entries, portmapEntry{ - Program: binary.BigEndian.Uint32(body[p : p+4]), - Version: binary.BigEndian.Uint32(body[p+4 : p+8]), - Protocol: binary.BigEndian.Uint32(body[p+8 : p+12]), - Port: binary.BigEndian.Uint32(body[p+12 : p+16]), - }) - p += 16 - } - if len(entries) != 3 { - t.Fatalf("got %d dump entries, want 3: %+v", len(entries), entries) - } - wantSet := map[portmapEntry]bool{ - {Program: nfsProgram, Version: 3, Protocol: ipProtoTCP, Port: 2049}: false, - {Program: mountProgram, Version: 3, Protocol: ipProtoTCP, Port: 2049}: false, - {Program: mountProgram, Version: 3, Protocol: ipProtoUDP, Port: 2049}: false, - } - for _, e := range entries { - if _, ok := wantSet[e]; !ok { - t.Fatalf("unexpected dump entry %+v", e) - } - wantSet[e] = true - } - for e, seen := range wantSet { - if !seen { - t.Fatalf("missing dump entry %+v", e) - } - } -} - -func TestHandleCall_UnknownProg(t *testing.T) { - ps := newTestPortmap() - msg := buildRPCCall(t, 1, 999999, 1, 0, nil, nil, nil) - reply := ps.handleCall(msg) - _, acc, _ := parseAcceptedReply(t, reply) - if acc != rpcAcceptProgUnavail { - t.Fatalf("acc=%d want PROG_UNAVAIL", acc) - } -} - -func TestHandleCall_VersionMismatch(t *testing.T) { - ps := newTestPortmap() - msg := buildRPCCall(t, 1, portmapProgram, 42, pmapProcNull, nil, nil, nil) - reply := ps.handleCall(msg) - _, acc, body := parseAcceptedReply(t, reply) - if acc != rpcAcceptProgMismatch { - t.Fatalf("acc=%d want PROG_MISMATCH", acc) - } - if len(body) != 8 { - t.Fatalf("mismatch body len=%d want 8", len(body)) - } - lo := binary.BigEndian.Uint32(body[0:4]) - hi := binary.BigEndian.Uint32(body[4:8]) - if lo != portmapVersion || hi != portmapVersion { - t.Fatalf("mismatch range lo=%d hi=%d", lo, hi) - } -} - -func TestHandleCall_UnknownProc(t *testing.T) { - ps := newTestPortmap() - msg := buildRPCCall(t, 1, portmapProgram, portmapVersion, 42, nil, nil, nil) - reply := ps.handleCall(msg) - _, acc, _ := parseAcceptedReply(t, reply) - if acc != rpcAcceptProcUnavail { - t.Fatalf("acc=%d want PROC_UNAVAIL", acc) - } -} - -func TestHandleCall_SetRefused(t *testing.T) { - ps := newTestPortmap() - args := make([]byte, 16) // mapping struct - msg := buildRPCCall(t, 1, portmapProgram, portmapVersion, pmapProcSet, nil, nil, args) - reply := ps.handleCall(msg) - _, acc, body := parseAcceptedReply(t, reply) - if acc != rpcAcceptSuccess { - t.Fatalf("acc=%d", acc) - } - if len(body) != 4 || binary.BigEndian.Uint32(body) != 0 { - t.Fatalf("PMAP_SET must return FALSE, got %x", body) - } -} - -// pickFreePort asks the OS for an unused high port by opening and closing a -// listener on it. Used so the end-to-end tests can run in parallel without -// stepping on the privileged default port 111. -func pickFreePort(t *testing.T) int { - t.Helper() - ln, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatalf("listen: %v", err) - } - defer ln.Close() - return ln.Addr().(*net.TCPAddr).Port -} - -func TestPortmapServer_UDPGetPort(t *testing.T) { - port := pickFreePort(t) - ps := newPortmapServer("127.0.0.1", port, 2049) - if err := ps.Start(); err != nil { - t.Fatalf("start: %v", err) - } - t.Cleanup(func() { _ = ps.Close() }) - - args := make([]byte, 16) - binary.BigEndian.PutUint32(args[0:4], nfsProgram) - binary.BigEndian.PutUint32(args[4:8], 3) - binary.BigEndian.PutUint32(args[8:12], ipProtoTCP) - msg := buildRPCCall(t, 99, portmapProgram, portmapVersion, pmapProcGetPort, nil, nil, args) - - conn, err := net.Dial("udp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port))) - if err != nil { - t.Fatalf("dial udp: %v", err) - } - defer conn.Close() - _ = conn.SetDeadline(time.Now().Add(2 * time.Second)) - if _, err := conn.Write(msg); err != nil { - t.Fatalf("write: %v", err) - } - buf := make([]byte, 4096) - n, err := conn.Read(buf) - if err != nil { - t.Fatalf("read: %v", err) - } - xid, acc, body := parseAcceptedReply(t, buf[:n]) - if xid != 99 || acc != rpcAcceptSuccess || len(body) != 4 { - t.Fatalf("bad reply xid=%d acc=%d body=%x", xid, acc, body) - } - if got := binary.BigEndian.Uint32(body); got != 2049 { - t.Fatalf("udp getport port=%d want 2049", got) - } -} - -func TestPortmapServer_CloseEvictsIdleTCPConn(t *testing.T) { - port := pickFreePort(t) - ps := newPortmapServer("127.0.0.1", port, 2049) - if err := ps.Start(); err != nil { - t.Fatalf("start: %v", err) - } - - conn, err := net.Dial("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port))) - if err != nil { - _ = ps.Close() - t.Fatalf("dial: %v", err) - } - defer conn.Close() - - // Issue one call and read its reply so the server-side connection is - // definitely registered before we trigger shutdown. - msg := buildRPCCall(t, 1, portmapProgram, portmapVersion, pmapProcNull, nil, nil, nil) - var mark [4]byte - binary.BigEndian.PutUint32(mark[:], uint32(len(msg))|(1<<31)) - _ = conn.SetDeadline(time.Now().Add(2 * time.Second)) - if _, err := conn.Write(mark[:]); err != nil { - t.Fatalf("write mark: %v", err) - } - if _, err := conn.Write(msg); err != nil { - t.Fatalf("write msg: %v", err) - } - if _, err := io.ReadFull(conn, mark[:]); err != nil { - t.Fatalf("read mark: %v", err) - } - rlen := binary.BigEndian.Uint32(mark[:]) &^ (1 << 31) - if _, err := io.ReadFull(conn, make([]byte, rlen)); err != nil { - t.Fatalf("read body: %v", err) - } - - // Close must return long before the TCP idle deadline (30s) — in - // other words, the server must actively close the idle conn rather - // than wait for the deadline or for the client to disconnect. - done := make(chan error, 1) - go func() { done <- ps.Close() }() - select { - case err := <-done: - if err != nil { - t.Fatalf("Close: %v", err) - } - case <-time.After(2 * time.Second): - t.Fatal("Close did not return within 2s; in-flight conn not evicted") - } - - _ = conn.SetReadDeadline(time.Now().Add(1 * time.Second)) - if _, err := conn.Read(make([]byte, 4)); err == nil { - t.Fatal("expected read error on client conn after server Close") - } -} - -func TestPortmapServer_TCPGetPort(t *testing.T) { - port := pickFreePort(t) - ps := newPortmapServer("127.0.0.1", port, 2049) - if err := ps.Start(); err != nil { - t.Fatalf("start: %v", err) - } - t.Cleanup(func() { _ = ps.Close() }) - - args := make([]byte, 16) - binary.BigEndian.PutUint32(args[0:4], mountProgram) - binary.BigEndian.PutUint32(args[4:8], 3) - binary.BigEndian.PutUint32(args[8:12], ipProtoTCP) - msg := buildRPCCall(t, 123, portmapProgram, portmapVersion, pmapProcGetPort, nil, nil, args) - - conn, err := net.Dial("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port))) - if err != nil { - t.Fatalf("dial tcp: %v", err) - } - defer conn.Close() - _ = conn.SetDeadline(time.Now().Add(2 * time.Second)) - - // record mark: last-fragment bit + length - var mark [4]byte - binary.BigEndian.PutUint32(mark[:], uint32(len(msg))|(1<<31)) - if _, err := conn.Write(mark[:]); err != nil { - t.Fatalf("write mark: %v", err) - } - if _, err := conn.Write(msg); err != nil { - t.Fatalf("write msg: %v", err) - } - - var rmark [4]byte - if _, err := io.ReadFull(conn, rmark[:]); err != nil { - t.Fatalf("read mark: %v", err) - } - rlen := binary.BigEndian.Uint32(rmark[:]) &^ (1 << 31) - buf := make([]byte, rlen) - if _, err := io.ReadFull(conn, buf); err != nil { - t.Fatalf("read body: %v", err) - } - xid, acc, body := parseAcceptedReply(t, buf) - if xid != 123 || acc != rpcAcceptSuccess || len(body) != 4 { - t.Fatalf("bad reply xid=%d acc=%d body=%x", xid, acc, body) - } - if got := binary.BigEndian.Uint32(body); got != 2049 { - t.Fatalf("tcp getport port=%d want 2049", got) - } -} diff --git a/weed/server/nfs/rpc_version_filter.go b/weed/server/nfs/rpc_version_filter.go deleted file mode 100644 index f9136aa93..000000000 --- a/weed/server/nfs/rpc_version_filter.go +++ /dev/null @@ -1,377 +0,0 @@ -package nfs - -import ( - "bufio" - "encoding/binary" - "errors" - "io" - "net" - "sync" - "time" - - "github.com/seaweedfs/seaweedfs/weed/glog" -) - -// The upstream willscott/go-nfs library dispatches RPC calls by (program, -// procedure) only — it does not validate the RPC program version. That means -// a Linux client speaking NFSv4 (program 100003 vers 4) lands on the same -// handler map as NFSv3: proc=1 routes to NFSv3 SETATTR, which parses the -// NFSv4 COMPOUND args as if they were SETATTR3args and writes a malformed -// reply. The client cannot decode that reply, the kernel returns -// EPROTONOSUPPORT, and mount.nfs prints "requested NFS version or transport -// protocol is not supported" without ever falling back to v3. -// -// The default Linux mount.nfs path is to try NFSv4 first, so this affects -// every plain `mount -t nfs : /mnt` against a `weed nfs` -// deployment. To make the v4→v3 fallback work, we wrap the listener so the -// first RPC frame on each new TCP connection is inspected: if the program is -// NFS or MOUNT and the version is not 3, we synthesize a PROG_MISMATCH reply -// (with the supported version range 3..3) directly to the socket and close -// the connection. The client then retries with v3 and proceeds normally. -// -// Clients keep the same program/version for the lifetime of a TCP connection -// in practice, so we only need to check the first frame; subsequent frames -// flow through to go-nfs unchanged. This avoids vendoring go-nfs while still -// producing protocol-correct rejections. - -// RPC numeric constants used here (rpcMsgCall, rpcMsgReply, rpcMsgAccepted, -// rpcAcceptProgMismatch, rpcAuthNone, nfsProgram, mountProgram) are defined -// alongside the portmap responder in portmap.go to keep one source of truth -// per package. -const ( - // rpcVersionFilterPeekTimeout bounds how long we wait for the first frame - // header on a new connection before giving up and letting go-nfs handle - // the (possibly half-open) socket. - rpcVersionFilterPeekTimeout = 10 * time.Second - - // peeked length: 4-byte fragment marker + 24 bytes of fixed RPC header - // (xid + msg_type + rpcvers + prog + vers + proc). - rpcVersionFilterPeekLen = 28 - - // rpcVersionFilterAcceptBackoff is how long the accept loop sleeps - // after a transient Accept() error (EMFILE, EAGAIN, ECONNABORTED, - // etc.) before retrying. Mirrors portmapRetryBackoff in portmap.go so - // both NFS-listening goroutines back off identically under host - // resource pressure. - rpcVersionFilterAcceptBackoff = 50 * time.Millisecond - - supportedNFSVer = 3 -) - -// versionFilterListener moves the per-connection RPC peek off the -// Listener.Accept() critical path. Peeking inline would let one slow or idle -// client (or a TCP three-way handshake without any RPC payload) hold -// rpcVersionFilterPeekTimeout — i.e. up to 10 seconds — of head-of-line -// blocking against every other connect, since gonfs.Serve only calls Accept -// serially. Instead, a background goroutine runs the inner Accept() loop and -// hands each raw conn to its own short-lived goroutine that does the peek; -// validated conns are sent on acceptCh and the wrapper's Accept() reads from -// that channel. Rejected conns never reach the channel — PROG_MISMATCH is -// already on the wire by the time the per-conn goroutine returns. -type versionFilterListener struct { - inner net.Listener - acceptCh chan net.Conn - - // closed is signalled either by Close() or by the accept loop after the - // inner listener returns a terminal error. After it fires Accept() will - // stop blocking and return acceptErr (or net.ErrClosed if none). - closed chan struct{} - closeOnce sync.Once - - mu sync.Mutex - acceptErr error - // inFlight tracks raw (pre-peek) conns that are currently in - // handleConn so Close() can break their Peek() deadline by closing - // them, instead of waiting up to rpcVersionFilterPeekTimeout per - // idle client for the timeout to fire on its own. - inFlight map[net.Conn]struct{} - - startOnce sync.Once - wg sync.WaitGroup -} - -func newVersionFilterListener(inner net.Listener) net.Listener { - return &versionFilterListener{ - inner: inner, - acceptCh: make(chan net.Conn), - closed: make(chan struct{}), - } -} - -// start lazily kicks off the background accept loop the first time someone -// calls Accept(). This matches the behaviour of the embedded-listener form we -// replaced — no goroutines spawn just from constructing the wrapper. -func (l *versionFilterListener) start() { - l.startOnce.Do(func() { - l.wg.Add(1) - go l.acceptLoop() - }) -} - -func (l *versionFilterListener) Accept() (net.Conn, error) { - l.start() - select { - case c := <-l.acceptCh: - return c, nil - case <-l.closed: - return nil, l.terminalErr() - } -} - -func (l *versionFilterListener) Close() error { - l.signalClose() - err := l.inner.Close() - // Eagerly close any raw conns currently blocked in filterFirstRPCFrame's - // Peek so handleConn returns promptly. Without this, an idle client - // (TCP handshake without any RPC payload) holds Close() up to - // rpcVersionFilterPeekTimeout — 10s of stop-the-world per such conn. - l.evictInFlight() - l.wg.Wait() - return err -} - -func (l *versionFilterListener) Addr() net.Addr { - return l.inner.Addr() -} - -func (l *versionFilterListener) signalClose() { - l.closeOnce.Do(func() { - close(l.closed) - }) -} - -func (l *versionFilterListener) terminalErr() error { - l.mu.Lock() - defer l.mu.Unlock() - if l.acceptErr != nil { - return l.acceptErr - } - return net.ErrClosed -} - -// trackInFlight records a raw conn that's about to be peeked, so Close() -// can break its Peek() deadline by closing it. Returns false if shutdown -// has already started; the caller must close the conn and bail. -func (l *versionFilterListener) trackInFlight(c net.Conn) bool { - l.mu.Lock() - defer l.mu.Unlock() - select { - case <-l.closed: - return false - default: - } - if l.inFlight == nil { - l.inFlight = make(map[net.Conn]struct{}) - } - l.inFlight[c] = struct{}{} - return true -} - -func (l *versionFilterListener) untrackInFlight(c net.Conn) { - l.mu.Lock() - defer l.mu.Unlock() - delete(l.inFlight, c) -} - -// evictInFlight closes every conn currently in handleConn so their -// in-flight Peek() returns immediately. delete(nil-map, k) is a no-op, -// so handleConn's deferred untrackInFlight is safe even after we've -// nilled the map here. -func (l *versionFilterListener) evictInFlight() { - l.mu.Lock() - conns := l.inFlight - l.inFlight = nil - l.mu.Unlock() - for c := range conns { - _ = c.Close() - } -} - -func (l *versionFilterListener) acceptLoop() { - defer l.wg.Done() - defer l.signalClose() - for { - conn, err := l.inner.Accept() - if err != nil { - // Permanent: the inner listener has been closed (Close(), - // shutdown, or an unrecoverable error from the OS). Surface - // the error to Accept() and stop. - if errors.Is(err, net.ErrClosed) { - l.mu.Lock() - if l.acceptErr == nil { - l.acceptErr = err - } - l.mu.Unlock() - return - } - // Transient (EMFILE, EAGAIN, ECONNABORTED on accept, - // timeouts if a deadline is ever set): treating these as - // terminal would tear the whole NFS server down on a - // resource blip. Back off briefly and retry, mirroring the - // pattern in portmap.go's serveTCP. - glog.V(1).Infof("nfs version filter: transient accept error: %v", err) - select { - case <-l.closed: - return - case <-time.After(rpcVersionFilterAcceptBackoff): - continue - } - } - l.wg.Add(1) - go l.handleConn(conn) - } -} - -// handleConn runs the version peek for a single accepted conn. Because each -// conn has its own goroutine, a slow client only blocks itself; concurrent -// peeks proceed in parallel up to whatever the runtime can schedule. If -// Close() fires before the peek completes we drop the validated conn so we -// don't leak a socket past shutdown. -func (l *versionFilterListener) handleConn(conn net.Conn) { - defer l.wg.Done() - if !l.trackInFlight(conn) { - // Shutdown beat us: don't start the Peek that we'd then - // have to break, just close the raw conn. - _ = conn.Close() - return - } - defer l.untrackInFlight(conn) - - wrapped, accepted := filterFirstRPCFrame(conn) - if !accepted { - // Already replied with PROG_MISMATCH and closed conn. - return - } - select { - case l.acceptCh <- wrapped: - case <-l.closed: - _ = wrapped.Close() - } -} - -// peekedConn returns the bytes that filterFirstRPCFrame already buffered when -// it peeked the first RPC header, then transparently reads from the -// underlying connection. Writes go straight to the socket; the bufio reader -// only buffers the read side. -type peekedConn struct { - net.Conn - reader io.Reader -} - -func (c *peekedConn) Read(p []byte) (int, error) { - return c.reader.Read(p) -} - -// filterFirstRPCFrame inspects the first RPC frame on conn and decides whether -// to pass it through to go-nfs. Returns (wrappedConn, true) if the frame is -// for a supported (program, version) — including programs we don't recognize, -// since go-nfs handles its own PROG_UNAVAIL response. Returns (nil, false) if -// we already replied with PROG_MISMATCH and closed conn. -// -// On peek failure (early close, deadline) we pass the connection through: -// returning an error here would silently drop legitimate clients on a flaky -// link, and go-nfs has its own per-frame error handling. -func filterFirstRPCFrame(conn net.Conn) (net.Conn, bool) { - r := bufio.NewReader(conn) - - deadlineErr := conn.SetReadDeadline(time.Now().Add(rpcVersionFilterPeekTimeout)) - - hdr, peekErr := r.Peek(rpcVersionFilterPeekLen) - - // Always clear the deadline before returning to go-nfs; failing to do so - // would make every subsequent Read() time out at the same instant. - if deadlineErr == nil { - _ = conn.SetReadDeadline(time.Time{}) - } - - if peekErr != nil { - return &peekedConn{Conn: conn, reader: r}, true - } - - fragMark := binary.BigEndian.Uint32(hdr[0:4]) - if fragMark&(1<<31) == 0 { - // Multi-fragment record: portmap-style filtering of the first frame - // would need reassembly. Fall through to go-nfs which handles this. - return &peekedConn{Conn: conn, reader: r}, true - } - - // Peek(28) can read across record boundaries — the first fragment may - // be shorter than the fixed RPC CALL header (24 bytes after the marker) - // with the remaining bytes belonging to the *next* RPC. Indexing into - // hdr[16:24] without first checking the fragment length would parse - // fields from a different RPC and either spuriously reject or pass it. - // Pass through if the first fragment can't possibly hold a full header - // and let go-nfs surface the framing error. - if fragLen := fragMark &^ uint32(1<<31); fragLen < 24 { - return &peekedConn{Conn: conn, reader: r}, true - } - - xid := binary.BigEndian.Uint32(hdr[4:8]) - if msgType := binary.BigEndian.Uint32(hdr[8:12]); msgType != rpcMsgCall { - // Not a CALL — odd, but pass through. - return &peekedConn{Conn: conn, reader: r}, true - } - if rpcVers := binary.BigEndian.Uint32(hdr[12:16]); rpcVers != 2 { - // ONC RPC v2 is the only version we and go-nfs speak; if the - // rpcvers field is anything else the rest of the header is - // untrusted (could be a non-RPC protocol that happens to share - // the port, or simply garbled traffic). Don't synthesize a - // PROG_MISMATCH that lies about supporting NFS — pass it - // through and let go-nfs / RFC 5531 §9 RPC_MISMATCH handling - // in the upstream library do the right thing. - return &peekedConn{Conn: conn, reader: r}, true - } - - prog := binary.BigEndian.Uint32(hdr[16:20]) - vers := binary.BigEndian.Uint32(hdr[20:24]) - - switch prog { - case nfsProgram, mountProgram: - default: - // Unknown program: let go-nfs reply PROG_UNAVAIL itself. - return &peekedConn{Conn: conn, reader: r}, true - } - - if vers == supportedNFSVer { - return &peekedConn{Conn: conn, reader: r}, true - } - - glog.V(1).Infof("nfs: rejecting client %s with PROG_MISMATCH: prog=%d vers=%d (supported=%d)", - conn.RemoteAddr(), prog, vers, supportedNFSVer) - - if err := writeProgMismatchTCP(conn, xid, supportedNFSVer, supportedNFSVer); err != nil { - glog.V(1).Infof("nfs: write PROG_MISMATCH to %s: %v", conn.RemoteAddr(), err) - } - _ = conn.Close() - return nil, false -} - -// writeProgMismatchTCP encodes a single-frame TCP RPC reply carrying -// MSG_ACCEPTED + PROG_MISMATCH along with the supported version range, per -// RFC 5531 section 9. The frame layout is: -// -// uint32 fragment_header (last-fragment | length) -// uint32 xid -// uint32 msg_type=REPLY(1) -// uint32 reply_stat=MSG_ACCEPTED(0) -// uint32 verf_flavor=AUTH_NONE(0) -// uint32 verf_len=0 -// uint32 accept_stat=PROG_MISMATCH(2) -// uint32 low -// uint32 high -const progMismatchBodyLen = 32 - -func writeProgMismatchTCP(w io.Writer, xid, low, high uint32) error { - out := make([]byte, 4+progMismatchBodyLen) - binary.BigEndian.PutUint32(out[0:4], uint32(progMismatchBodyLen)|(1<<31)) - binary.BigEndian.PutUint32(out[4:8], xid) - binary.BigEndian.PutUint32(out[8:12], rpcMsgReply) - binary.BigEndian.PutUint32(out[12:16], rpcMsgAccepted) - binary.BigEndian.PutUint32(out[16:20], rpcAuthNone) - binary.BigEndian.PutUint32(out[20:24], 0) // verf opaque length (always zero for AUTH_NONE) - binary.BigEndian.PutUint32(out[24:28], rpcAcceptProgMismatch) - binary.BigEndian.PutUint32(out[28:32], low) - binary.BigEndian.PutUint32(out[32:36], high) - _, err := w.Write(out) - return err -} diff --git a/weed/server/nfs/rpc_version_filter_test.go b/weed/server/nfs/rpc_version_filter_test.go deleted file mode 100644 index 77b7cae21..000000000 --- a/weed/server/nfs/rpc_version_filter_test.go +++ /dev/null @@ -1,561 +0,0 @@ -package nfs - -import ( - "encoding/binary" - "errors" - "io" - "net" - "sync" - "testing" - "time" -) - -// buildRPCCallFrame constructs a single TCP-framed RPC CALL header without -// procedure arguments — enough for the version filter to decide whether to -// reject the connection. The frame layout matches RFC 5531 (Open Network -// Computing RPC v2): a 4-byte fragment marker (last-fragment bit set on a -// 40-byte body) followed by xid + msg_type=CALL + rpcvers=2 + prog + vers + -// proc + two empty AUTH_NONE opaque_auth structs. -func buildRPCCallFrame(xid, prog, vers, proc uint32) []byte { - const bodyLen = 40 - frame := make([]byte, 4+bodyLen) - binary.BigEndian.PutUint32(frame[0:4], uint32(bodyLen)|(1<<31)) - binary.BigEndian.PutUint32(frame[4:8], xid) - binary.BigEndian.PutUint32(frame[8:12], 0) // msg_type CALL - binary.BigEndian.PutUint32(frame[12:16], 2) - binary.BigEndian.PutUint32(frame[16:20], prog) - binary.BigEndian.PutUint32(frame[20:24], vers) - binary.BigEndian.PutUint32(frame[24:28], proc) - // cred + verf both AUTH_NONE / length 0 - return frame -} - -// readPROGMismatchReply parses a TCP-framed PROG_MISMATCH reply produced by -// writeProgMismatchTCP and returns the xid plus the supported (low, high) -// version range advertised by the server. -func readPROGMismatchReply(t *testing.T, conn net.Conn) (xid, low, high uint32) { - t.Helper() - _ = conn.SetReadDeadline(time.Now().Add(2 * time.Second)) - buf := make([]byte, 4+progMismatchBodyLen) - n, err := io.ReadFull(conn, buf) - if err != nil { - t.Fatalf("read reply: %v (got %d bytes)", err, n) - } - frag := binary.BigEndian.Uint32(buf[0:4]) - if frag&(1<<31) == 0 { - t.Fatalf("reply frame missing last-fragment bit: %x", frag) - } - if got := frag &^ (1 << 31); got != progMismatchBodyLen { - t.Fatalf("reply body length=%d want %d", got, progMismatchBodyLen) - } - xid = binary.BigEndian.Uint32(buf[4:8]) - if mt := binary.BigEndian.Uint32(buf[8:12]); mt != 1 { - t.Fatalf("reply msg_type=%d want REPLY(1)", mt) - } - if rs := binary.BigEndian.Uint32(buf[12:16]); rs != 0 { - t.Fatalf("reply reply_stat=%d want MSG_ACCEPTED(0)", rs) - } - if as := binary.BigEndian.Uint32(buf[24:28]); as != 2 { - t.Fatalf("reply accept_stat=%d want PROG_MISMATCH(2)", as) - } - low = binary.BigEndian.Uint32(buf[28:32]) - high = binary.BigEndian.Uint32(buf[32:36]) - return -} - -func TestVersionFilterRejectsNFSv4WithProgMismatch(t *testing.T) { - innerListener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - defer innerListener.Close() - - listener := newVersionFilterListener(innerListener) - - // In a real server, accepted conns are passed to go-nfs. We just need - // to drive Accept() so the filter runs; the test never sees a wrapped - // conn because the v4 frame is rejected. - accepted := make(chan net.Conn, 1) - go func() { - for { - c, aerr := listener.Accept() - if aerr != nil { - return - } - accepted <- c - } - }() - - conn, err := net.Dial("tcp", innerListener.Addr().String()) - if err != nil { - t.Fatalf("dial: %v", err) - } - defer conn.Close() - - // NFSv4 NULL: the first probe Linux mount.nfs sends when trying v4. - if _, err := conn.Write(buildRPCCallFrame(0xdeadbeef, nfsProgram, 4, 0)); err != nil { - t.Fatalf("write: %v", err) - } - - xid, low, high := readPROGMismatchReply(t, conn) - const wantXID uint32 = 0xdeadbeef - if xid != wantXID { - t.Errorf("xid=%x want %x", xid, wantXID) - } - if low != supportedNFSVer || high != supportedNFSVer { - t.Errorf("supported range=(%d,%d) want (%d,%d)", low, high, supportedNFSVer, supportedNFSVer) - } - - // Filter must close the connection after replying so the client knows - // not to send another RPC on this socket. Insist on io.EOF specifically: - // "any error" would let a stuck (but still-open) connection pass this - // check via a deadline timeout, which is exactly the regression we want - // to catch. - _ = conn.SetReadDeadline(time.Now().Add(time.Second)) - one := make([]byte, 1) - n, err := conn.Read(one) - switch { - case err == nil: - t.Errorf("expected EOF after PROG_MISMATCH but read returned %d bytes", n) - case !errors.Is(err, io.EOF): - t.Errorf("expected io.EOF after PROG_MISMATCH, got %v (likely a regression where the filter replies but does not close)", err) - } - - select { - case c := <-accepted: - c.Close() - t.Error("rejected connection should not be returned to caller") - case <-time.After(100 * time.Millisecond): - } -} - -func TestVersionFilterRejectsMOUNTv4WithProgMismatch(t *testing.T) { - innerListener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - defer innerListener.Close() - - listener := newVersionFilterListener(innerListener) - go func() { - for { - c, aerr := listener.Accept() - if aerr != nil { - return - } - c.Close() - } - }() - - conn, err := net.Dial("tcp", innerListener.Addr().String()) - if err != nil { - t.Fatal(err) - } - defer conn.Close() - - if _, err := conn.Write(buildRPCCallFrame(42, mountProgram, 4, 0)); err != nil { - t.Fatal(err) - } - - xid, low, high := readPROGMismatchReply(t, conn) - if xid != 42 { - t.Errorf("xid=%d want 42", xid) - } - if low != supportedNFSVer || high != supportedNFSVer { - t.Errorf("supported range=(%d,%d) want (3,3)", low, high) - } -} - -func TestVersionFilterPassesThroughNFSv3(t *testing.T) { - innerListener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - defer innerListener.Close() - - listener := newVersionFilterListener(innerListener) - got := make(chan []byte, 1) - go func() { - c, aerr := listener.Accept() - if aerr != nil { - return - } - defer c.Close() - buf := make([]byte, 44) - _, rerr := io.ReadFull(c, buf) - if rerr != nil { - return - } - got <- buf - }() - - conn, err := net.Dial("tcp", innerListener.Addr().String()) - if err != nil { - t.Fatal(err) - } - defer conn.Close() - - frame := buildRPCCallFrame(7, nfsProgram, 3, 0) - if _, err := conn.Write(frame); err != nil { - t.Fatal(err) - } - - select { - case received := <-got: - if string(received) != string(frame) { - t.Error("v3 frame was modified or partially consumed by filter") - } - case <-time.After(2 * time.Second): - t.Fatal("v3 frame not delivered to inner accept handler") - } -} - -func TestVersionFilterPassesThroughUnknownProgram(t *testing.T) { - // The filter should only police NFS / MOUNT versions; other programs - // reach go-nfs which already responds PROG_UNAVAIL itself. Otherwise - // adding a new program (e.g. NLM) here would require updating the - // filter, which would defeat the point of using it as a thin shim. - innerListener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - defer innerListener.Close() - - listener := newVersionFilterListener(innerListener) - delivered := make(chan struct{}, 1) - go func() { - c, aerr := listener.Accept() - if aerr != nil { - return - } - defer c.Close() - buf := make([]byte, 44) - if _, rerr := io.ReadFull(c, buf); rerr == nil { - delivered <- struct{}{} - } - }() - - conn, err := net.Dial("tcp", innerListener.Addr().String()) - if err != nil { - t.Fatal(err) - } - defer conn.Close() - - // Program 100021 is NLM, which weed nfs doesn't run; let go-nfs handle - // the unsupported-program reply. - if _, err := conn.Write(buildRPCCallFrame(99, 100021, 4, 0)); err != nil { - t.Fatal(err) - } - - select { - case <-delivered: - case <-time.After(2 * time.Second): - t.Fatal("unknown-program frame should pass through filter") - } -} - -// transientErrListener wraps a real net.Listener but injects a configurable -// number of transient Accept() errors before delegating. It exists only to -// regression-test the version filter's transient-retry behaviour without -// having to provoke real EMFILE conditions on the host. -type transientErrListener struct { - inner net.Listener - mu sync.Mutex - remaining int -} - -type fakeAcceptError struct{} - -func (fakeAcceptError) Error() string { return "fake transient accept error" } - -func (l *transientErrListener) Accept() (net.Conn, error) { - l.mu.Lock() - if l.remaining > 0 { - l.remaining-- - l.mu.Unlock() - return nil, fakeAcceptError{} - } - l.mu.Unlock() - return l.inner.Accept() -} - -func (l *transientErrListener) Close() error { return l.inner.Close() } -func (l *transientErrListener) Addr() net.Addr { return l.inner.Addr() } - -func TestVersionFilterRetriesTransientAcceptErrors(t *testing.T) { - // Regression test: previously the accept loop exited on any error - // from the inner listener, which meant a single transient EMFILE / - // EAGAIN under host resource pressure would tear the entire NFS - // server down. Inject a few fake transient errors and assert the - // filter still delivers the next real connection. - innerListener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - defer innerListener.Close() - - injected := &transientErrListener{inner: innerListener, remaining: 3} - listener := newVersionFilterListener(injected) - - delivered := make(chan struct{}, 1) - go func() { - c, aerr := listener.Accept() - if aerr != nil { - return - } - defer c.Close() - buf := make([]byte, 44) - if _, rerr := io.ReadFull(c, buf); rerr == nil { - delivered <- struct{}{} - } - }() - - conn, err := net.Dial("tcp", innerListener.Addr().String()) - if err != nil { - t.Fatal(err) - } - defer conn.Close() - - if _, err := conn.Write(buildRPCCallFrame(1, nfsProgram, 3, 0)); err != nil { - t.Fatal(err) - } - - // 3 transient errors × ~50ms backoff plus normal accept latency. Allow - // a generous bound so flakes on slow CI don't surface here, but still - // tight enough to catch a regression to "any error is terminal". - select { - case <-delivered: - case <-time.After(2 * time.Second): - t.Fatal("filter did not retry transient Accept() errors and recover") - } -} - -func TestVersionFilterCloseReturnsPromptlyWithIdlePeekConns(t *testing.T) { - // Regression test: Close() used to wait on every handleConn goroutine - // via wg.Wait, but those goroutines could be stuck in - // filterFirstRPCFrame's Peek() until rpcVersionFilterPeekTimeout (10s) - // fired. An idle client that completed a TCP handshake but never sent - // a byte would stretch shutdown by up to that timeout per conn. - // Close() now eagerly closes any tracked in-flight raw conns, which - // forces Peek() to return immediately and lets handleConn finish. - // - // Black-box test: only observes Close() latency. With the regression - // in place Close() would block ~10s; with the fix it returns in well - // under a second. - innerListener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - - listener := newVersionFilterListener(innerListener) - // Drive Accept once so the background accept loop is running. - go func() { _, _ = listener.Accept() }() - - const idleConns = 4 - dialed := make([]net.Conn, 0, idleConns) - defer func() { - for _, c := range dialed { - _ = c.Close() - } - }() - for i := 0; i < idleConns; i++ { - c, err := net.Dial("tcp", innerListener.Addr().String()) - if err != nil { - t.Fatal(err) - } - dialed = append(dialed, c) - } - - // Give handleConn time to invoke Peek for each idle conn — without - // this the test could race ahead and Close() while no goroutine has - // actually started peeking yet, masking the regression. - time.Sleep(100 * time.Millisecond) - - // Close() must finish in well under rpcVersionFilterPeekTimeout (10s). - // 2s is a generous bound that still clearly distinguishes "broke the - // peek by closing the conn" from "waited for the peek deadline". - start := time.Now() - if err := listener.Close(); err != nil { - t.Errorf("Close: %v", err) - } - elapsed := time.Since(start) - if elapsed > 2*time.Second { - t.Errorf("Close took %v with %d idle pre-peek conns; should be sub-second once they're forcibly closed", elapsed, idleConns) - } -} - -func TestVersionFilterPassesThroughNonV2RPC(t *testing.T) { - // Anything that isn't ONC RPC v2 isn't ours to classify — even if the - // bytes at hdr[16:24] happen to look like nfsProgram + vers=4, we - // shouldn't synthesize a PROG_MISMATCH advertising NFSv3 support for - // what could be a completely different protocol sharing the port. - innerListener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - defer innerListener.Close() - - listener := newVersionFilterListener(innerListener) - delivered := make(chan struct{}, 1) - go func() { - c, aerr := listener.Accept() - if aerr != nil { - return - } - defer c.Close() - buf := make([]byte, 44) - if _, rerr := io.ReadFull(c, buf); rerr == nil { - delivered <- struct{}{} - } - }() - - conn, err := net.Dial("tcp", innerListener.Addr().String()) - if err != nil { - t.Fatal(err) - } - defer conn.Close() - - // Build a CALL frame, then overwrite the rpcvers field with 99. - // Without the rpcvers guard the filter would still parse prog=NFS, - // vers=4 from the same buffer and reject with PROG_MISMATCH. - frame := buildRPCCallFrame(0xfeedbeef, nfsProgram, 4, 0) - binary.BigEndian.PutUint32(frame[12:16], 99) // bogus rpcvers - if _, err := conn.Write(frame); err != nil { - t.Fatal(err) - } - - // Try to read a PROG_MISMATCH reply with a short deadline — none - // should arrive because the filter shouldn't pretend to know what - // this protocol is. - _ = conn.SetReadDeadline(time.Now().Add(500 * time.Millisecond)) - hdr := make([]byte, 4) - if n, err := io.ReadFull(conn, hdr); err == nil && n == 4 { - if got := binary.BigEndian.Uint32(hdr); got == uint32(progMismatchBodyLen)|(1<<31) { - t.Fatal("filter sent PROG_MISMATCH for a non-v2 RPC frame") - } - } - - // And the connection should reach the inner accept handler. - select { - case <-delivered: - case <-time.After(2 * time.Second): - t.Fatal("non-v2 RPC frame should pass through filter to inner accept") - } -} - -func TestVersionFilterIgnoresShortFirstFragment(t *testing.T) { - // Peek(28) can read past the first fragment's body when the body is - // shorter than the 24-byte fixed RPC CALL header. Without a length - // check, the prog/vers fields would be sourced from bytes belonging to - // the *next* RPC (or a syntactic accident), and the filter could - // spuriously reject the connection. Send a 12-byte first fragment whose - // trailing peek-region bytes look like an NFSv4 CALL header, and assert - // the filter does NOT emit a PROG_MISMATCH reply. - innerListener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - defer innerListener.Close() - - listener := newVersionFilterListener(innerListener) - go func() { - for { - c, aerr := listener.Accept() - if aerr != nil { - return - } - c.Close() - } - }() - - conn, err := net.Dial("tcp", innerListener.Addr().String()) - if err != nil { - t.Fatal(err) - } - defer conn.Close() - - const shortBody = 12 - payload := make([]byte, 4+24) - binary.BigEndian.PutUint32(payload[0:4], shortBody|(1<<31)) // last-fragment, body=12 - // Bytes 4..16 are the actual fragment body (12 bytes — too short for a - // CALL header; the filter must not look at them as one). - // Bytes 16..28 sit past the fragment in the peek window. If we were to - // (incorrectly) read prog/vers from hdr[16:24], we'd see NFS+v4 here. - binary.BigEndian.PutUint32(payload[16:20], nfsProgram) - binary.BigEndian.PutUint32(payload[20:24], 4) - - if _, err := conn.Write(payload); err != nil { - t.Fatal(err) - } - - // If the filter erroneously rejected, it would send a 36-byte TCP RPC - // reply (4-byte frag marker + 32-byte PROG_MISMATCH body) within ms. - // Wait briefly and assert nothing PROG_MISMATCH-shaped came back. - _ = conn.SetReadDeadline(time.Now().Add(500 * time.Millisecond)) - hdr := make([]byte, 4) - n, err := io.ReadFull(conn, hdr) - if err == nil && n == 4 { - if got := binary.BigEndian.Uint32(hdr); got == uint32(progMismatchBodyLen)|(1<<31) { - t.Fatal("filter sent PROG_MISMATCH on a short fragment whose trailing peek bytes only superficially resembled a v4 call") - } - } - // Anything else (timeout, EOF, or unrelated bytes) is fine — we only - // care that the filter did NOT misclassify the short fragment. -} - -func TestVersionFilterDoesNotHeadOfLineBlockOnSlowConn(t *testing.T) { - // Regression test: the previous implementation peeked the first RPC - // frame inline in Accept(), so an idle TCP-only connect would block - // every later Accept() call for up to rpcVersionFilterPeekTimeout. - // The peek now runs in a per-conn goroutine; a fast follow-up connect - // must reach the inner accept handler well before the slow conn's - // peek deadline. - innerListener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatal(err) - } - defer innerListener.Close() - - listener := newVersionFilterListener(innerListener) - - delivered := make(chan struct{}, 1) - go func() { - c, aerr := listener.Accept() - if aerr != nil { - return - } - defer c.Close() - buf := make([]byte, 44) - if _, rerr := io.ReadFull(c, buf); rerr == nil { - delivered <- struct{}{} - } - }() - - // Slow client: connect, never write. Holds a goroutine inside the - // filter peeking until the deadline, but must not block the next conn. - slowConn, err := net.Dial("tcp", innerListener.Addr().String()) - if err != nil { - t.Fatal(err) - } - defer slowConn.Close() - - // Fast client: send a valid v3 frame straight away; this conn must be - // delivered to the inner accept handler without waiting for slowConn. - fastConn, err := net.Dial("tcp", innerListener.Addr().String()) - if err != nil { - t.Fatal(err) - } - defer fastConn.Close() - - if _, err := fastConn.Write(buildRPCCallFrame(11, nfsProgram, 3, 0)); err != nil { - t.Fatal(err) - } - - // Bound the wait well below rpcVersionFilterPeekTimeout (10s) so a - // regression to inline peeking would clearly time out here. - select { - case <-delivered: - case <-time.After(2 * time.Second): - t.Fatal("fast conn should not be head-of-line blocked by slow conn's peek") - } -} diff --git a/weed/server/nfs/server.go b/weed/server/nfs/server.go deleted file mode 100644 index 7e21f3450..000000000 --- a/weed/server/nfs/server.go +++ /dev/null @@ -1,250 +0,0 @@ -package nfs - -import ( - "context" - "errors" - "fmt" - "net" - "sync" - - "github.com/seaweedfs/seaweedfs/weed/filer" - "github.com/seaweedfs/seaweedfs/weed/glog" - "github.com/seaweedfs/seaweedfs/weed/pb" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" - "github.com/seaweedfs/seaweedfs/weed/wdclient" - gonfs "github.com/willscott/go-nfs" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" -) - -type Option struct { - Filer pb.ServerAddress - BindIp string - Port int - FilerRootPath string - ReadOnly bool - AllowedClients []string - VolumeServerAccess string - GrpcDialOption grpc.DialOption - // PortmapBind, when non-empty, enables a built-in portmap v2 responder - // on :111 advertising the NFS v3 and MOUNT v3 services at - // Port. Empty (the default) disables portmap; clients must then bypass - // portmap with mount -o port=,mountport=,proto=tcp,mountproto=tcp. - PortmapBind string -} - -type Server struct { - option *Option - exportRoot util.FullPath - exportID uint32 - signature int32 - handleLimit int - clientAuthorizer *clientAuthorizer - sharedReaderCache *filer.ReaderCache - chunkInvalidator chunkInvalidator - filerClient *wdclient.FilerClient - newUploader func() (chunkUploader, error) - withFilerClient filerClientExecutor - withInternalClient internalClientExecutor - - rootFSOnce sync.Once - rootFS *seaweedFileSystem -} - -func NewServer(option *Option) (*Server, error) { - if option == nil { - return nil, errors.New("nfs option is required") - } - if option.Port <= 0 { - return nil, fmt.Errorf("nfs port must be positive: %d", option.Port) - } - if option.FilerRootPath == "" { - option.FilerRootPath = "/" - } - if option.VolumeServerAccess == "" { - option.VolumeServerAccess = "direct" - } - if option.GrpcDialOption == nil { - option.GrpcDialOption = grpc.WithTransportCredentials(insecure.NewCredentials()) - } - clientAuthorizer, err := newClientAuthorizer(option.AllowedClients) - if err != nil { - return nil, err - } - var filerClient *wdclient.FilerClient - if option.VolumeServerAccess != "filerProxy" { - var opts *wdclient.FilerClientOption - if option.VolumeServerAccess == "publicUrl" { - opts = &wdclient.FilerClientOption{UrlPreference: wdclient.PreferPublicUrl} - } - filerClient = wdclient.NewFilerClient([]pb.ServerAddress{option.Filer}, option.GrpcDialOption, "", opts) - } - exportRoot := normalizeExportRoot(util.FullPath(option.FilerRootPath)) - signature := util.RandomInt32() - return &Server{ - option: option, - exportRoot: exportRoot, - exportID: exportIDForRoot(exportRoot), - signature: signature, - handleLimit: 1 << 20, - clientAuthorizer: clientAuthorizer, - filerClient: filerClient, - newUploader: newChunkUploader, - withFilerClient: newFilerClientExecutor(option, signature), - withInternalClient: newInternalClientExecutor(option, signature), - }, nil -} - -func (s *Server) Start() error { - listener, err := net.Listen("tcp", fmt.Sprintf("%s:%d", s.option.BindIp, s.option.Port)) - if err != nil { - return fmt.Errorf("listen nfs on %s:%d: %w", s.option.BindIp, s.option.Port, err) - } - - // MOUNT v3 over UDP runs alongside the TCP NFS listener on the same - // port. The kernel default for mountproto is UDP in many setups, so - // without this responder a plain `mount -t nfs : /mnt` - // gets EPROTONOSUPPORT during the MOUNT phase even though the TCP - // NFS path is fine. - mountUDP := newMountUDPServer(s.option.BindIp, s.option.Port, s) - if err := mountUDP.Start(); err != nil { - _ = listener.Close() - return fmt.Errorf("start mount udp: %w", err) - } - defer func() { - _ = mountUDP.Close() - }() - glog.V(0).Infof("MOUNT v3 UDP responder listening on %s:%d", s.option.BindIp, s.option.Port) - - var portmap *portmapServer - if s.option.PortmapBind != "" { - portmap = newPortmapServer(s.option.PortmapBind, portmapPort, uint32(s.option.Port)) - if pmErr := portmap.Start(); pmErr != nil { - _ = listener.Close() - return fmt.Errorf("start portmap: %w", pmErr) - } - glog.V(0).Infof("NFS portmap responder listening on %s:%d (NFS v3 tcp=%d, MOUNT v3 tcp=%d, MOUNT v3 udp=%d)", - s.option.PortmapBind, portmapPort, s.option.Port, s.option.Port, s.option.Port) - defer func() { - if portmap != nil { - _ = portmap.Close() - } - }() - } - - s.logMountHint() - return s.serve(listener) -} - -// logMountHint prints a copy-pasteable Linux mount command so operators can -// see at startup how to mount the export from a client. -// -// With -portmap.bind set, MOUNT is now answered over both TCP and UDP, so a -// plain `mount -t nfs host:/export /mnt` works — there is no longer any -// kernel-default mountproto path that fails. Without -portmap.bind the -// client still has to bypass portmap entirely via the explicit -// port=/mountport=/proto=/mountproto= options. -func (s *Server) logMountHint() { - exportPath := string(s.exportRoot) - if s.option.PortmapBind != "" { - glog.V(0).Infof("mount example: mount -t nfs -o nfsvers=3,nolock :%s ", exportPath) - glog.V(0).Infof("(MOUNT v3 is served over both TCP and UDP, so no mountproto override is needed.)") - return - } - glog.V(0).Infof("mount example (bypasses portmap): mount -t nfs -o nfsvers=3,nolock,noacl,port=%d,mountport=%d,proto=tcp,mountproto=tcp :%s ", - s.option.Port, s.option.Port, exportPath) - glog.V(0).Infof("tip: pass -portmap.bind to enable the built-in portmap responder on port 111 so plain `mount -t nfs host:%s /mnt` works.", exportPath) -} - -func (s *Server) serve(listener net.Listener) error { - if s.filerClient != nil { - defer s.filerClient.Close() - } - if s.clientAuthorizer != nil && s.clientAuthorizer.enabled { - listener = &allowlistListener{ - Listener: listener, - authorizer: s.clientAuthorizer, - } - } - listener = newVersionFilterListener(listener) - - handler, err := s.newHandler() - if err != nil { - _ = listener.Close() - return err - } - followCtx, followCancel := context.WithCancel(context.Background()) - defer followCancel() - followDone := make(chan struct{}) - go func() { - defer close(followDone) - s.runMetadataInvalidationLoop(followCtx) - }() - defer func() { - followCancel() - <-followDone - }() - - glog.V(0).Infof("Start Seaweed NFS Server filer=%s bind=%s export=%s exportId=%d readOnly=%t allowedClients=%d volumeServerAccess=%s", - s.option.Filer, - listener.Addr(), - s.exportRoot, - s.exportID, - s.option.ReadOnly, - len(s.option.AllowedClients), - s.option.VolumeServerAccess, - ) - - return gonfs.Serve(listener, handler) -} - -func (s *Server) newHandler() (*Handler, error) { - if s == nil { - return nil, errors.New("nfs server is not configured") - } - return &Handler{ - server: s, - rootFS: s.rootFilesystem(), - }, nil -} - -// rootFilesystem returns a single seaweedFileSystem rooted at the -// configured export, building it on first call. Both the TCP handler -// (via newHandler) and the UDP MOUNT path use the same instance so -// they share the chunk reader cache and don't reconstruct a wrapper -// per request. -func (s *Server) rootFilesystem() *seaweedFileSystem { - s.rootFSOnce.Do(func() { - s.rootFS = newSeaweedFileSystem(s, s.exportRoot, s.sharedReaderCache) - if s.sharedReaderCache == nil { - s.sharedReaderCache = s.rootFS.readerCache - } - if s.chunkInvalidator == nil { - s.chunkInvalidator = s.sharedReaderCache - } - }) - return s.rootFS -} - -func (s *Server) WithFilerClient(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error { - if s == nil || s.withFilerClient == nil { - return errors.New("nfs filer client is not configured") - } - return s.withFilerClient(streamingMode, fn) -} - -func (s *Server) LookupFn() wdclient.LookupFileIdFunctionType { - if s == nil { - return nil - } - if s.option != nil && s.option.VolumeServerAccess == "filerProxy" { - return func(ctx context.Context, fileID string) ([]string, error) { - return []string{fmt.Sprintf("http://%s/?proxyChunkId=%s", s.option.Filer.ToHttpAddress(), fileID)}, nil - } - } - if s.filerClient != nil { - return s.filerClient.GetLookupFileIdFunction() - } - return nil -} diff --git a/weed/server/nfs/server_test.go b/weed/server/nfs/server_test.go deleted file mode 100644 index 36318d9ab..000000000 --- a/weed/server/nfs/server_test.go +++ /dev/null @@ -1,1083 +0,0 @@ -package nfs - -import ( - "bytes" - "context" - "errors" - "io" - "net" - "net/http" - "net/http/httptest" - "os" - "strings" - "testing" - "time" - - billy "github.com/go-git/go-billy/v5" - "github.com/seaweedfs/seaweedfs/weed/filer" - "github.com/seaweedfs/seaweedfs/weed/operation" - "github.com/seaweedfs/seaweedfs/weed/pb" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" - "github.com/seaweedfs/seaweedfs/weed/util" - util_http "github.com/seaweedfs/seaweedfs/weed/util/http" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - gonfs "github.com/willscott/go-nfs" - gonfsfile "github.com/willscott/go-nfs/file" - "google.golang.org/grpc" - "google.golang.org/protobuf/proto" -) - -type fakeListEntriesClient struct { - responses []*filer_pb.ListEntriesResponse - index int -} - -type fakeSubscribeMetadataClient struct { - responses []*filer_pb.SubscribeMetadataResponse - index int - err error -} - -func (c *fakeListEntriesClient) Recv() (*filer_pb.ListEntriesResponse, error) { - if c.index >= len(c.responses) { - return nil, io.EOF - } - resp := c.responses[c.index] - c.index++ - return resp, nil -} - -func (c *fakeSubscribeMetadataClient) Recv() (*filer_pb.SubscribeMetadataResponse, error) { - if c.err != nil { - return nil, c.err - } - if c.index >= len(c.responses) { - return nil, io.EOF - } - resp := c.responses[c.index] - c.index++ - return resp, nil -} - -type fakeNFSFilerClient struct { - kv map[string][]byte - entries map[util.FullPath]*filer_pb.Entry - updateResult map[util.FullPath]*filer_pb.Entry - statistics *filer_pb.StatisticsResponse - creates []*filer_pb.CreateEntryRequest - updates []*filer_pb.UpdateEntryRequest - deletes []*filer_pb.DeleteEntryRequest - renames []*filer_pb.AtomicRenameEntryRequest - subscribeRequests []*filer_pb.SubscribeMetadataRequest - subscribeResponses []*filer_pb.SubscribeMetadataResponse - subscribeErr error - nextInode uint64 -} - -type fakeChunkUploadCall struct { - assignRequest *filer_pb.AssignVolumeRequest - uploadOption *operation.UploadOption - uploadURL string - data []byte -} - -type fakeChunkUploader struct { - fileID string - result *operation.UploadResult - err error - calls []fakeChunkUploadCall -} - -type recordingChunkInvalidator struct { - fileIDs []string -} - -type fakeRemoteConn struct { - remote net.Addr -} - -func (c *fakeRemoteConn) Read(_ []byte) (int, error) { return 0, io.EOF } -func (c *fakeRemoteConn) Write(p []byte) (int, error) { return len(p), nil } -func (c *fakeRemoteConn) Close() error { return nil } -func (c *fakeRemoteConn) LocalAddr() net.Addr { return &net.TCPAddr{} } -func (c *fakeRemoteConn) RemoteAddr() net.Addr { return c.remote } -func (c *fakeRemoteConn) SetDeadline(time.Time) error { return nil } -func (c *fakeRemoteConn) SetReadDeadline(time.Time) error { return nil } -func (c *fakeRemoteConn) SetWriteDeadline(time.Time) error { return nil } - -func (i *recordingChunkInvalidator) UnCache(fileID string) { - i.fileIDs = append(i.fileIDs, fileID) -} - -func (f *fakeNFSFilerClient) KvGet(_ context.Context, in *filer_pb.KvGetRequest, _ ...grpc.CallOption) (*filer_pb.KvGetResponse, error) { - if value, found := f.kv[string(in.Key)]; found { - return &filer_pb.KvGetResponse{Value: value}, nil - } - return &filer_pb.KvGetResponse{}, nil -} - -func (f *fakeNFSFilerClient) LookupDirectoryEntry(_ context.Context, in *filer_pb.LookupDirectoryEntryRequest, _ ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error) { - fullPath := util.NewFullPath(in.Directory, in.Name) - if entry := f.materializeEntry(fullPath); entry != nil { - return &filer_pb.LookupDirectoryEntryResponse{Entry: entry}, nil - } - return nil, filer_pb.ErrNotFound -} - -func (f *fakeNFSFilerClient) ListEntries(_ context.Context, in *filer_pb.ListEntriesRequest, _ ...grpc.CallOption) (nfsListEntriesClient, error) { - requestedDir := util.FullPath(in.Directory) - var entries []*filer_pb.Entry - for fullPath, entry := range f.entries { - dir, _ := fullPath.DirAndName() - if util.FullPath(dir) != requestedDir { - continue - } - if materialized := f.materializeEntry(fullPath); materialized != nil { - entries = append(entries, materialized) - } else { - entries = append(entries, cloneEntry(entry)) - } - } - responses := make([]*filer_pb.ListEntriesResponse, 0, len(entries)) - for _, entry := range entries { - responses = append(responses, &filer_pb.ListEntriesResponse{Entry: entry}) - } - return &fakeListEntriesClient{responses: responses}, nil -} - -func (f *fakeNFSFilerClient) SubscribeMetadata(_ context.Context, in *filer_pb.SubscribeMetadataRequest, _ ...grpc.CallOption) (nfsSubscribeMetadataClient, error) { - f.subscribeRequests = append(f.subscribeRequests, proto.Clone(in).(*filer_pb.SubscribeMetadataRequest)) - return &fakeSubscribeMetadataClient{ - responses: f.subscribeResponses, - err: f.subscribeErr, - }, nil -} - -func (f *fakeNFSFilerClient) CreateEntry(_ context.Context, in *filer_pb.CreateEntryRequest, _ ...grpc.CallOption) (*filer_pb.CreateEntryResponse, error) { - f.creates = append(f.creates, in) - - fullPath := util.NewFullPath(in.Directory, in.Entry.Name) - if _, found := f.entries[fullPath]; found { - return &filer_pb.CreateEntryResponse{ - Error: "entry already exists", - ErrorCode: filer_pb.FilerError_ENTRY_ALREADY_EXISTS, - }, nil - } - - entry := cloneEntry(in.Entry) - storedEntry := f.persistEntry(fullPath, entry, false) - return &filer_pb.CreateEntryResponse{ - MetadataEvent: &filer_pb.SubscribeMetadataResponse{ - EventNotification: &filer_pb.EventNotification{ - NewEntry: cloneEntry(storedEntry), - }, - }, - }, nil -} - -func (f *fakeNFSFilerClient) UpdateEntry(_ context.Context, in *filer_pb.UpdateEntryRequest, _ ...grpc.CallOption) (*filer_pb.UpdateEntryResponse, error) { - f.updates = append(f.updates, in) - - fullPath := util.NewFullPath(in.Directory, in.Entry.Name) - updatedEntry := f.updateResult[fullPath] - if updatedEntry == nil { - updatedEntry = cloneEntry(in.Entry) - } - storedEntry := f.persistEntry(fullPath, updatedEntry, false) - - return &filer_pb.UpdateEntryResponse{ - MetadataEvent: &filer_pb.SubscribeMetadataResponse{ - EventNotification: &filer_pb.EventNotification{ - NewEntry: cloneEntry(storedEntry), - }, - }, - }, nil -} - -func (f *fakeNFSFilerClient) DeleteEntry(_ context.Context, in *filer_pb.DeleteEntryRequest, _ ...grpc.CallOption) (*filer_pb.DeleteEntryResponse, error) { - f.deletes = append(f.deletes, in) - - fullPath := util.NewFullPath(in.Directory, in.Name) - entry, found := f.entries[fullPath] - if !found { - return &filer_pb.DeleteEntryResponse{Error: filer_pb.ErrNotFound.Error()}, nil - } - - if len(entry.GetHardLinkId()) > 0 { - f.decrementHardLink(entry.GetHardLinkId()) - } - if inode := entry.GetAttributes().GetInode(); inode != 0 { - f.removeInodeIndexPath(fullPath, inode) - } - delete(f.entries, fullPath) - return &filer_pb.DeleteEntryResponse{}, nil -} - -func (f *fakeNFSFilerClient) AtomicRenameEntry(_ context.Context, in *filer_pb.AtomicRenameEntryRequest, _ ...grpc.CallOption) (*filer_pb.AtomicRenameEntryResponse, error) { - f.renames = append(f.renames, in) - - oldPath := util.NewFullPath(in.OldDirectory, in.OldName) - entry, found := f.entries[oldPath] - if !found { - return nil, filer_pb.ErrNotFound - } - delete(f.entries, oldPath) - if inode := entry.GetAttributes().GetInode(); inode != 0 { - f.removeInodeIndexPath(oldPath, inode) - } - - newPath := util.NewFullPath(in.NewDirectory, in.NewName) - renamed := cloneEntry(entry) - renamed.Name = in.NewName - renamed = f.persistEntry(newPath, renamed, true) - - return &filer_pb.AtomicRenameEntryResponse{}, nil -} - -func (f *fakeNFSFilerClient) Statistics(_ context.Context, _ *filer_pb.StatisticsRequest, _ ...grpc.CallOption) (*filer_pb.StatisticsResponse, error) { - return f.statistics, nil -} - -func (f *fakeNFSFilerClient) persistEntry(fullPath util.FullPath, entry *filer_pb.Entry, preserveZeroInode bool) *filer_pb.Entry { - if f.entries == nil { - f.entries = make(map[util.FullPath]*filer_pb.Entry) - } - if f.kv == nil { - f.kv = make(map[string][]byte) - } - - cloned := cloneEntry(entry) - if cloned.Attributes == nil { - cloned.Attributes = &filer_pb.FuseAttributes{} - } - if !preserveZeroInode && cloned.Attributes.Inode == 0 { - cloned.Attributes.Inode = f.allocateInode() - } - cloned.Name = fullPath.Name() - f.entries[fullPath] = cloned - - if cloned.Attributes.Inode != 0 { - f.addInodeIndexPath(fullPath, cloned.Attributes.Inode) - } - if len(cloned.GetHardLinkId()) > 0 { - f.storeHardLinkBlob(fullPath, cloned) - } - return cloned -} - -func (f *fakeNFSFilerClient) materializeEntry(fullPath util.FullPath) *filer_pb.Entry { - entry, found := f.entries[fullPath] - if !found || entry == nil { - return nil - } - cloned := cloneEntry(entry) - if len(cloned.GetHardLinkId()) == 0 { - return cloned - } - - value, found := f.kv[string(cloned.GetHardLinkId())] - if !found { - return cloned - } - - dir, _ := fullPath.DirAndName() - fsEntry := filer.FromPbEntry(dir, cloned) - if err := fsEntry.DecodeAttributesAndChunks(value); err != nil { - return cloned - } - fsEntry.FullPath = fullPath - return fsEntry.ToProtoEntry() -} - -func (f *fakeNFSFilerClient) addInodeIndexPath(fullPath util.FullPath, inode uint64) { - if inode == 0 { - return - } - - record := &filer.InodeIndexRecord{Generation: filer.InodeIndexInitialGeneration} - if value, found := f.kv[string(filer.InodeIndexKey(inode))]; found { - if decoded, err := filer.DecodeInodeIndexRecord(value); err == nil { - record = decoded - } - } - record.Paths = append(record.Paths, string(fullPath)) - value, err := record.Encode() - if err == nil { - f.kv[string(filer.InodeIndexKey(inode))] = value - } -} - -func (f *fakeNFSFilerClient) removeInodeIndexPath(fullPath util.FullPath, inode uint64) { - if inode == 0 { - return - } - - key := string(filer.InodeIndexKey(inode)) - value, found := f.kv[key] - if !found { - return - } - record, err := filer.DecodeInodeIndexRecord(value) - if err != nil { - delete(f.kv, key) - return - } - var kept []string - for _, path := range record.Paths { - if util.FullPath(path) != fullPath { - kept = append(kept, path) - } - } - record.Paths = kept - if len(record.Paths) == 0 { - delete(f.kv, key) - return - } - value, err = record.Encode() - if err == nil { - f.kv[key] = value - } -} - -func (f *fakeNFSFilerClient) storeHardLinkBlob(fullPath util.FullPath, entry *filer_pb.Entry) { - dir, _ := fullPath.DirAndName() - fsEntry := filer.FromPbEntry(dir, cloneEntry(entry)) - fsEntry.FullPath = fullPath - value, err := fsEntry.EncodeAttributesAndChunks() - if err == nil { - f.kv[string(entry.GetHardLinkId())] = value - } -} - -func (f *fakeNFSFilerClient) decrementHardLink(hardLinkID []byte) { - value, found := f.kv[string(hardLinkID)] - if !found { - return - } - - fsEntry := &filer.Entry{} - if err := fsEntry.DecodeAttributesAndChunks(value); err != nil { - return - } - fsEntry.HardLinkCounter-- - if fsEntry.HardLinkCounter <= 0 { - delete(f.kv, string(hardLinkID)) - return - } - value, err := fsEntry.EncodeAttributesAndChunks() - if err == nil { - f.kv[string(hardLinkID)] = value - } -} - -func (f *fakeNFSFilerClient) allocateInode() uint64 { - if f.nextInode == 0 { - f.nextInode = 1000 - } - f.nextInode++ - return f.nextInode -} - -func (u *fakeChunkUploader) UploadWithRetry(_ filer_pb.FilerClient, assignRequest *filer_pb.AssignVolumeRequest, uploadOption *operation.UploadOption, genFileUrlFn func(host, fileId string) string, reader io.Reader) (string, *operation.UploadResult, error, []byte) { - data, err := io.ReadAll(reader) - if err != nil { - return "", nil, err, nil - } - - fileID := u.fileID - if fileID == "" { - fileID = "7,abc" - } - result := u.result - if result == nil { - result = &operation.UploadResult{ - Size: uint32(len(data)), - ContentMd5: "etag", - } - } - - var assignClone *filer_pb.AssignVolumeRequest - if assignRequest != nil { - assignClone, _ = proto.Clone(assignRequest).(*filer_pb.AssignVolumeRequest) - } - var optionClone *operation.UploadOption - if uploadOption != nil { - copied := *uploadOption - optionClone = &copied - } - - u.calls = append(u.calls, fakeChunkUploadCall{ - assignRequest: assignClone, - uploadOption: optionClone, - uploadURL: genFileUrlFn("volume.example:8080", fileID), - data: bytes.Clone(data), - }) - return fileID, result, u.err, data -} - -func cloneEntry(entry *filer_pb.Entry) *filer_pb.Entry { - if entry == nil { - return nil - } - cloned, _ := proto.Clone(entry).(*filer_pb.Entry) - return cloned -} - -func testEntry(name string, isDirectory bool, inode uint64, mode uint32, content []byte) *filer_pb.Entry { - return &filer_pb.Entry{ - Name: name, - IsDirectory: isDirectory, - Content: content, - Attributes: &filer_pb.FuseAttributes{ - Inode: inode, - FileMode: mode, - FileSize: uint64(len(content)), - }, - } -} - -func testIndexRecord(t *testing.T, inode uint64, generation uint64, path util.FullPath) []byte { - t.Helper() - record := &filer.InodeIndexRecord{ - Generation: generation, - Paths: []string{string(path)}, - } - value, err := record.Encode() - require.NoError(t, err) - return value -} - -func newTestServer(t *testing.T, exportRoot string, client *fakeNFSFilerClient) *Server { - t.Helper() - - server, err := NewServer(&Option{ - Filer: pb.ServerAddress("test-filer:8888"), - FilerRootPath: exportRoot, - Port: 2049, - }) - require.NoError(t, err) - - server.withInternalClient = func(_ bool, fn func(nfsFilerClient) error) error { - return fn(client) - } - server.withFilerClient = func(_ bool, fn func(filer_pb.SeaweedFilerClient) error) error { - return errors.New("test does not provide full filer client") - } - - return server -} - -func TestNewServerRejectsInvalidAllowedClientCIDR(t *testing.T) { - _, err := NewServer(&Option{ - FilerRootPath: "/exports", - Port: 2049, - AllowedClients: []string{"10.0.0.0/not-a-cidr"}, - }) - require.Error(t, err) -} - -func TestHandlerMountAndFileHandleRoundTrip(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 5, "/exports"), - string(filer.InodeIndexKey(202)): testIndexRecord(t, 202, 9, "/exports/demo.txt"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - "/exports/demo.txt": testEntry("demo.txt", false, 202, uint32(0644), []byte("hello")), - }, - } - - server := newTestServer(t, "/exports", client) - handler, err := server.newHandler() - require.NoError(t, err) - - status, filesystem, authFlavors := handler.Mount(context.Background(), nil, gonfs.MountRequest{Dirpath: []byte("/exports")}) - require.Equal(t, gonfs.MountStatusOk, status) - require.NotNil(t, filesystem) - assert.Equal(t, []gonfs.AuthFlavor{gonfs.AuthFlavorNull, gonfs.AuthFlavorUnix}, authFlavors) - - handle := handler.ToHandle(filesystem, []string{"demo.txt"}) - require.NotEmpty(t, handle) - - resolvedFS, path, err := handler.FromHandle(handle) - require.NoError(t, err) - assert.Same(t, handler.rootFS, resolvedFS) - assert.Equal(t, []string{"demo.txt"}, path) -} - -func TestHandlerAcceptsAnyMountPath(t *testing.T) { - const exportRoot = "/buckets/data" - - client := &fakeNFSFilerClient{ - entries: map[util.FullPath]*filer_pb.Entry{ - "/buckets": testEntry("buckets", true, 100, uint32(0755), nil), - "/buckets/data": testEntry("data", true, 101, uint32(0755), nil), - }, - kv: map[string][]byte{ - string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"), - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"), - }, - } - - server := newTestServer(t, exportRoot, client) - handler, err := server.newHandler() - require.NoError(t, err) - - dirpaths := []string{ - "/", - "/buckets", - "/buckets/other", - "/wrong/path", - "", - "buckets/data", - exportRoot, - exportRoot + "/", - } - for _, dirpath := range dirpaths { - t.Run(dirpath, func(t *testing.T) { - status, fs, _ := handler.Mount(context.Background(), nil, gonfs.MountRequest{Dirpath: []byte(dirpath)}) - assert.Equal(t, gonfs.MountStatusOk, status, "Mount(%q)", dirpath) - assert.NotNil(t, fs, "Mount(%q)", dirpath) - }) - } -} - -func TestHandlerSubexportMount(t *testing.T) { - const exportRoot = "/buckets" - - client := &fakeNFSFilerClient{ - entries: map[util.FullPath]*filer_pb.Entry{ - "/buckets": testEntry("buckets", true, 100, uint32(0755), nil), - "/buckets/data": testEntry("data", true, 101, uint32(0755), nil), - "/buckets/data/nested": testEntry("nested", true, 102, uint32(0755), nil), - "/buckets/file.txt": testEntry("file.txt", false, 103, uint32(0644), []byte("hi")), - }, - kv: map[string][]byte{ - string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"), - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"), - string(filer.InodeIndexKey(102)): testIndexRecord(t, 102, 1, "/buckets/data/nested"), - string(filer.InodeIndexKey(103)): testIndexRecord(t, 103, 1, "/buckets/file.txt"), - }, - } - - server := newTestServer(t, exportRoot, client) - handler, err := server.newHandler() - require.NoError(t, err) - - cases := []struct { - name string - dirpath string - wantStatus gonfs.MountStatus - wantSub util.FullPath - }{ - {name: "subdirectory_one_level", dirpath: "/buckets/data", wantStatus: gonfs.MountStatusOk, wantSub: "/buckets/data"}, - {name: "subdirectory_two_levels", dirpath: "/buckets/data/nested", wantStatus: gonfs.MountStatusOk, wantSub: "/buckets/data/nested"}, - {name: "subdirectory_trailing_slash", dirpath: "/buckets/data/", wantStatus: gonfs.MountStatusOk, wantSub: "/buckets/data"}, - {name: "missing_under_export", dirpath: "/buckets/missing", wantStatus: gonfs.MountStatusErrNoEnt}, - {name: "deep_missing_under_export", dirpath: "/buckets/data/no-such-thing", wantStatus: gonfs.MountStatusErrNoEnt}, - {name: "regular_file_not_directory", dirpath: "/buckets/file.txt", wantStatus: gonfs.MountStatusErrNotDir}, - } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - status, fs, _ := handler.Mount(context.Background(), nil, gonfs.MountRequest{Dirpath: []byte(tc.dirpath)}) - assert.Equal(t, tc.wantStatus, status, "Mount(%q)", tc.dirpath) - if tc.wantStatus != gonfs.MountStatusOk { - assert.Nil(t, fs) - return - } - require.NotNil(t, fs) - subFS, ok := fs.(*seaweedFileSystem) - require.True(t, ok) - assert.Equal(t, tc.wantSub, subFS.actualRoot) - }) - } -} - -func TestHandlerRejectsMountFromUnauthorizedClient(t *testing.T) { - client := &fakeNFSFilerClient{ - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - }, - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - }, - } - - server := newTestServer(t, "/exports", client) - server.option.AllowedClients = []string{"10.0.0.0/8"} - authorizer, err := newClientAuthorizer(server.option.AllowedClients) - require.NoError(t, err) - server.clientAuthorizer = authorizer - - handler, err := server.newHandler() - require.NoError(t, err) - - req := gonfs.MountRequest{Dirpath: []byte("/exports")} - - deniedConn := &fakeRemoteConn{remote: &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 12345}} - status, filesystem, _ := handler.Mount(context.Background(), deniedConn, req) - assert.Equal(t, gonfs.MountStatusErrAcces, status) - assert.Nil(t, filesystem) - - allowedConn := &fakeRemoteConn{remote: &net.TCPAddr{IP: net.ParseIP("10.2.3.4"), Port: 12345}} - status, filesystem, _ = handler.Mount(context.Background(), allowedConn, req) - assert.Equal(t, gonfs.MountStatusOk, status) - assert.NotNil(t, filesystem) -} - -func TestSeaweedFileSystemReadOnlyDisablesMutations(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - }, - } - - server := newTestServer(t, "/exports", client) - server.option.ReadOnly = true - - handler, err := server.newHandler() - require.NoError(t, err) - - assert.False(t, billy.CapabilityCheck(handler.rootFS, billy.WriteCapability)) - assert.False(t, billy.CapabilityCheck(handler.rootFS, billy.TruncateCapability)) - assert.Nil(t, handler.Change(handler.rootFS)) - - _, err = handler.rootFS.OpenFile("/new.txt", os.O_CREATE|os.O_RDWR, 0o644) - require.ErrorIs(t, err, billy.ErrReadOnly) - - err = handler.rootFS.MkdirAll("/docs", 0o755) - require.ErrorIs(t, err, billy.ErrReadOnly) -} - -func TestSeaweedFileSystemStatAndOpenFollowSymlinks(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - string(filer.InodeIndexKey(202)): testIndexRecord(t, 202, 2, "/exports/target.txt"), - string(filer.InodeIndexKey(303)): testIndexRecord(t, 303, 3, "/exports/link.txt"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - "/exports/target.txt": testEntry("target.txt", false, 202, uint32(0644), []byte("hello")), - "/exports/link.txt": { - Name: "link.txt", - Attributes: &filer_pb.FuseAttributes{ - Inode: 303, - FileMode: uint32(0o777), - SymlinkTarget: "target.txt", - }, - }, - }, - } - - server := newTestServer(t, "/exports", client) - handler, err := server.newHandler() - require.NoError(t, err) - - linkInfo, err := handler.rootFS.Lstat("/link.txt") - require.NoError(t, err) - assert.NotZero(t, linkInfo.Mode()&os.ModeSymlink) - - targetInfo, err := handler.rootFS.Stat("/link.txt") - require.NoError(t, err) - assert.Zero(t, targetInfo.Mode()&os.ModeSymlink) - assert.Equal(t, int64(5), targetInfo.Size()) - - file, err := handler.rootFS.Open("/link.txt") - require.NoError(t, err) - defer file.Close() - - data, err := io.ReadAll(file) - require.NoError(t, err) - assert.Equal(t, "hello", string(data)) -} - -func TestSeaweedFileSystemAppendModeAndUnsupportedLocks(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - string(filer.InodeIndexKey(202)): testIndexRecord(t, 202, 2, "/exports/demo.txt"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - "/exports/demo.txt": testEntry("demo.txt", false, 202, uint32(0644), []byte("hello")), - }, - } - - server := newTestServer(t, "/exports", client) - handler, err := server.newHandler() - require.NoError(t, err) - - assert.False(t, billy.CapabilityCheck(handler.rootFS, billy.LockCapability)) - - file, err := handler.rootFS.OpenFile("/demo.txt", os.O_WRONLY|os.O_APPEND, 0) - require.NoError(t, err) - - // POSIX allows Seek on an O_APPEND fd — it only restricts Write. A - // Seek to the beginning should succeed, but the subsequent Write must - // still land at the end of file. - newOffset, err := file.Seek(0, io.SeekStart) - require.NoError(t, err) - require.Equal(t, int64(0), newOffset) - require.ErrorIs(t, file.Lock(), billy.ErrNotSupported) - require.ErrorIs(t, file.Unlock(), billy.ErrNotSupported) - - _, err = file.Write([]byte("!")) - require.NoError(t, err) - require.NoError(t, file.Close()) - - updated, err := handler.rootFS.Open("/demo.txt") - require.NoError(t, err) - defer updated.Close() - - data, err := io.ReadAll(updated) - require.NoError(t, err) - assert.Equal(t, "hello!", string(data)) -} - -func TestServerApplyMetadataInvalidationResponseUncachesExportChunks(t *testing.T) { - invalidator := &recordingChunkInvalidator{} - server := &Server{ - exportRoot: "/exports", - chunkInvalidator: invalidator, - } - - server.applyMetadataInvalidationResponse(&filer_pb.SubscribeMetadataResponse{ - Directory: "/exports", - EventNotification: &filer_pb.EventNotification{ - OldEntry: &filer_pb.Entry{ - Name: "old.txt", - Chunks: []*filer_pb.FileChunk{ - {FileId: "1,old"}, - }, - }, - NewEntry: &filer_pb.Entry{ - Name: "new.txt", - Chunks: []*filer_pb.FileChunk{ - {FileId: "2,new"}, - {FileId: "1,old"}, - }, - }, - NewParentPath: "/exports/renamed", - }, - Events: []*filer_pb.SubscribeMetadataResponse{ - { - Directory: "/outside", - EventNotification: &filer_pb.EventNotification{ - NewEntry: &filer_pb.Entry{ - Name: "skip.txt", - Chunks: []*filer_pb.FileChunk{ - {FileId: "9,skip"}, - }, - }, - }, - }, - { - Directory: "/exports", - EventNotification: &filer_pb.EventNotification{ - NewEntry: &filer_pb.Entry{ - Name: "nested.txt", - Chunks: []*filer_pb.FileChunk{ - {FileId: "3,nested"}, - }, - }, - }, - }, - }, - }) - - assert.Equal(t, []string{"1,old", "2,new", "3,nested"}, invalidator.fileIDs) -} - -func TestServerFollowMetadataStreamSubscribesAndInvalidates(t *testing.T) { - client := &fakeNFSFilerClient{ - subscribeResponses: []*filer_pb.SubscribeMetadataResponse{ - { - Directory: "/exports", - EventNotification: &filer_pb.EventNotification{ - NewEntry: &filer_pb.Entry{ - Name: "demo.txt", - Chunks: []*filer_pb.FileChunk{ - {FileId: "7,abc"}, - }, - }, - }, - }, - }, - } - invalidator := &recordingChunkInvalidator{} - - server := newTestServer(t, "/exports", client) - server.chunkInvalidator = invalidator - - err := server.followMetadataStream(context.Background()) - require.NoError(t, err) - require.Len(t, client.subscribeRequests, 1) - assert.Equal(t, "/exports", client.subscribeRequests[0].GetPathPrefix()) - assert.Equal(t, "nfs", client.subscribeRequests[0].GetClientName()) - assert.Equal(t, []string{"7,abc"}, invalidator.fileIDs) -} - -func TestSeaweedFileSystemBackfillsLegacyInodeOnStat(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - string(filer.InodeIndexKey(303)): testIndexRecord(t, 303, 7, "/exports/legacy.txt"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - "/exports/legacy.txt": testEntry("legacy.txt", false, 0, uint32(0644), []byte("abc")), - }, - updateResult: map[util.FullPath]*filer_pb.Entry{ - "/exports/legacy.txt": testEntry("legacy.txt", false, 303, uint32(0644), []byte("abc")), - }, - } - - server := newTestServer(t, "/exports", client) - handler, err := server.newHandler() - require.NoError(t, err) - - info, err := handler.rootFS.Lstat("/legacy.txt") - require.NoError(t, err) - require.Len(t, client.updates, 1) - assert.Equal(t, int64(3), info.Size()) - - nfsInfo, ok := info.Sys().(*gonfsfile.FileInfo) - require.True(t, ok) - assert.Equal(t, uint64(303), nfsInfo.Fileid) -} - -func TestSeaweedFileSystemReadsInlineContent(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - string(filer.InodeIndexKey(202)): testIndexRecord(t, 202, 3, "/exports/demo.txt"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - "/exports/demo.txt": testEntry("demo.txt", false, 202, uint32(0644), []byte("hello")), - }, - } - - server := newTestServer(t, "/exports", client) - handler, err := server.newHandler() - require.NoError(t, err) - - file, err := handler.rootFS.Open("/demo.txt") - require.NoError(t, err) - defer file.Close() - - buf := make([]byte, 5) - n, err := file.Read(buf) - require.NoError(t, err) - assert.Equal(t, 5, n) - assert.Equal(t, "hello", string(buf)) -} - -func TestSeaweedFileSystemReadsChunkThroughFilerProxy(t *testing.T) { - initIntegrationHTTPClient.Do(util_http.InitGlobalHttpClient) - - payload := []byte("hello via filer proxy") - proxyRequests := 0 - proxyServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - proxyRequests++ - assert.Equal(t, "7,proxy", r.URL.Query().Get("proxyChunkId")) - _, _ = w.Write(payload) - })) - defer proxyServer.Close() - - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - string(filer.InodeIndexKey(202)): testIndexRecord(t, 202, 3, "/exports/proxy.txt"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": { - Name: "exports", - IsDirectory: true, - Attributes: &filer_pb.FuseAttributes{ - Inode: 101, - FileMode: uint32(0755), - }, - }, - "/exports/proxy.txt": { - Name: "proxy.txt", - Chunks: []*filer_pb.FileChunk{ - {FileId: "7,proxy", Size: uint64(len(payload))}, - }, - Attributes: &filer_pb.FuseAttributes{ - Inode: 202, - FileMode: uint32(0644), - FileSize: uint64(len(payload)), - }, - }, - }, - } - - server := newTestServer(t, "/exports", client) - server.option.VolumeServerAccess = "filerProxy" - server.option.Filer = pb.ServerAddress(strings.TrimPrefix(proxyServer.URL, "http://")) - - handler, err := server.newHandler() - require.NoError(t, err) - - file, err := handler.rootFS.Open("/proxy.txt") - require.NoError(t, err) - defer file.Close() - - data, err := io.ReadAll(file) - require.NoError(t, err) - assert.Equal(t, payload, data) - assert.Equal(t, 1, proxyRequests) -} - -func TestSeaweedFileSystemReadDirAndFSStat(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - string(filer.InodeIndexKey(202)): testIndexRecord(t, 202, 2, "/exports/b.txt"), - string(filer.InodeIndexKey(303)): testIndexRecord(t, 303, 3, "/exports/a.txt"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - "/exports/b.txt": testEntry("b.txt", false, 202, uint32(0644), []byte("b")), - "/exports/a.txt": testEntry("a.txt", false, 303, uint32(0644), []byte("aa")), - }, - statistics: &filer_pb.StatisticsResponse{ - TotalSize: 100, - UsedSize: 40, - FileCount: 3, - }, - } - - server := newTestServer(t, "/exports", client) - handler, err := server.newHandler() - require.NoError(t, err) - - entries, err := handler.rootFS.ReadDir("/") - require.NoError(t, err) - require.Len(t, entries, 2) - assert.Equal(t, "a.txt", entries[0].Name()) - assert.Equal(t, "b.txt", entries[1].Name()) - - var stat gonfs.FSStat - err = handler.FSStat(context.Background(), handler.rootFS, &stat) - require.NoError(t, err) - assert.Equal(t, uint64(100), stat.TotalSize) - assert.Equal(t, uint64(60), stat.FreeSize) - assert.Equal(t, uint64(60), stat.AvailableSize) - assert.Equal(t, uint64(3), stat.TotalFiles) -} - -func TestSeaweedFileSystemSupportsNamespaceMutations(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - }, - } - - server := newTestServer(t, "/exports", client) - handler, err := server.newHandler() - require.NoError(t, err) - - err = handler.rootFS.MkdirAll("/docs", 0o755) - require.NoError(t, err) - - file, err := handler.rootFS.Create("/docs/note.txt") - require.NoError(t, err) - _, err = file.Write([]byte("hello")) - require.NoError(t, err) - require.NoError(t, file.Close()) - - err = handler.rootFS.Chmod("/docs/note.txt", 0o600) - require.NoError(t, err) - - err = handler.rootFS.Rename("/docs/note.txt", "/docs/final.txt") - require.NoError(t, err) - - truncateFile, err := handler.rootFS.OpenFile("/docs/final.txt", os.O_WRONLY|os.O_EXCL, 0) - require.NoError(t, err) - require.NoError(t, truncateFile.Truncate(2)) - require.NoError(t, truncateFile.Close()) - - readFile, err := handler.rootFS.Open("/docs/final.txt") - require.NoError(t, err) - defer readFile.Close() - - buf := make([]byte, 2) - n, err := readFile.Read(buf) - require.NoError(t, err) - assert.Equal(t, 2, n) - assert.Equal(t, "he", string(buf)) - - info, err := handler.rootFS.Stat("/docs/final.txt") - require.NoError(t, err) - assert.Equal(t, os.FileMode(0o600), info.Mode().Perm()) - assert.Equal(t, int64(2), info.Size()) - - err = handler.rootFS.Remove("/docs/final.txt") - require.NoError(t, err) - _, err = handler.rootFS.Stat("/docs/final.txt") - require.ErrorIs(t, err, os.ErrNotExist) - - require.Len(t, client.creates, 2) - require.Len(t, client.updates, 3) - require.Len(t, client.renames, 1) - require.Len(t, client.deletes, 1) -} - -func TestSeaweedFileSystemUploadsLargeWritesAsChunks(t *testing.T) { - client := &fakeNFSFilerClient{ - kv: map[string][]byte{ - string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"), - }, - entries: map[util.FullPath]*filer_pb.Entry{ - "/exports": testEntry("exports", true, 101, uint32(0755), nil), - }, - } - uploader := &fakeChunkUploader{fileID: "9,xyz"} - - server := newTestServer(t, "/exports", client) - server.option.VolumeServerAccess = "filerProxy" - server.newUploader = func() (chunkUploader, error) { - return uploader, nil - } - - handler, err := server.newHandler() - require.NoError(t, err) - - require.NoError(t, handler.rootFS.MkdirAll("/docs", 0o755)) - - file, err := handler.rootFS.Create("/docs/big.bin") - require.NoError(t, err) - - payload := bytes.Repeat([]byte("a"), maxInlineWriteSize+1) - n, err := file.Write(payload) - require.NoError(t, err) - require.Equal(t, len(payload), n) - require.NoError(t, file.Close()) - - require.Len(t, uploader.calls, 1) - call := uploader.calls[0] - require.NotNil(t, call.assignRequest) - require.NotNil(t, call.uploadOption) - assert.Equal(t, "/exports/docs/big.bin", call.assignRequest.Path) - assert.Equal(t, "big.bin", call.uploadOption.Filename) - assert.Equal(t, "http://test-filer:8888/?proxyChunkId=9,xyz", call.uploadURL) - assert.Equal(t, payload, call.data) - - entry := client.entries["/exports/docs/big.bin"] - require.NotNil(t, entry) - require.Len(t, entry.GetChunks(), 1) - assert.Nil(t, entry.Content) - assert.Equal(t, uint64(len(payload)), entry.GetAttributes().GetFileSize()) - assert.Equal(t, "9,xyz", entry.GetChunks()[0].GetFileId()) -} diff --git a/weed/server/nfs/uploader.go b/weed/server/nfs/uploader.go deleted file mode 100644 index 7d309499e..000000000 --- a/weed/server/nfs/uploader.go +++ /dev/null @@ -1,40 +0,0 @@ -package nfs - -import ( - "io" - - "github.com/seaweedfs/seaweedfs/weed/operation" - "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" -) - -type chunkUploader interface { - UploadWithRetry( - filerClient filer_pb.FilerClient, - assignRequest *filer_pb.AssignVolumeRequest, - uploadOption *operation.UploadOption, - genFileUrlFn func(host, fileId string) string, - reader io.Reader, - ) (fileId string, uploadResult *operation.UploadResult, err error, data []byte) -} - -type operationChunkUploader struct { - uploader *operation.Uploader -} - -func (u operationChunkUploader) UploadWithRetry( - filerClient filer_pb.FilerClient, - assignRequest *filer_pb.AssignVolumeRequest, - uploadOption *operation.UploadOption, - genFileUrlFn func(host, fileId string) string, - reader io.Reader, -) (string, *operation.UploadResult, error, []byte) { - return u.uploader.UploadWithRetry(filerClient, assignRequest, uploadOption, genFileUrlFn, reader) -} - -func newChunkUploader() (chunkUploader, error) { - uploader, err := operation.NewUploader() - if err != nil { - return nil, err - } - return operationChunkUploader{uploader: uploader}, nil -}