refactor(filer): remove the inode->path index and the NFS gateway (#9724)

* fix(filer): derive inodes by hash instead of a snowflake sequencer

Compute the same inode the FUSE mount would: non-hard-linked entries hash path + crtime, hard links hash their shared HardLinkId so every link resolves to one inode. Removes the snowflake inodeSequencer and the SEAWEEDFS_FILER_SNOWFLAKE_ID knob; inodes are now deterministic across filers.

* chore: remove the experimental NFS gateway

The NFS frontend ('weed nfs') was the only consumer of the inode->path index. Remove the weed/server/nfs package, the command and its registration, the integration test harness, and the CI workflow; go mod tidy drops the willscott/go-nfs and go-nfs-client dependencies.

* refactor(filer): drop the inode->path index

With the NFS gateway gone, nothing reads it. A regular file's inode is a pure hash of its path and a hard link's is a hash of its shared HardLinkId -- both derivable on demand -- so the secondary KV index and its write/remove hooks are dead. Removes filer_inode_index.go and the recordInodeIndex hooks from the store wrapper.
This commit is contained in:
Chris Lu
2026-05-28 15:00:18 -07:00
committed by GitHub
parent 3537312045
commit dfd05d14cb
37 changed files with 64 additions and 9247 deletions

View File

@@ -1,137 +0,0 @@
name: "NFS Integration Tests"
on:
push:
branches: [ master, main ]
paths:
- 'weed/server/nfs/**'
- 'weed/command/nfs.go'
- 'weed/filer/filer_inode.go'
- 'weed/filer/filer_inode_index.go'
- 'weed/filer/filerstore_wrapper.go'
- 'weed/server/filer_grpc_server_rename.go'
- 'test/nfs/**'
- '.github/workflows/nfs-tests.yml'
pull_request:
branches: [ master, main ]
paths:
- 'weed/server/nfs/**'
- 'weed/command/nfs.go'
- 'weed/filer/filer_inode.go'
- 'weed/filer/filer_inode_index.go'
- 'weed/filer/filerstore_wrapper.go'
- 'weed/server/filer_grpc_server_rename.go'
- 'test/nfs/**'
- '.github/workflows/nfs-tests.yml'
concurrency:
group: ${{ github.head_ref }}/nfs-tests
cancel-in-progress: true
permissions:
contents: read
env:
TEST_TIMEOUT: '15m'
jobs:
nfs-integration:
name: NFS Integration Testing
runs-on: ubuntu-22.04
timeout-minutes: 20
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Set up Go
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
- name: Build SeaweedFS
run: |
cd weed
go build -o weed .
chmod +x weed
./weed version
- name: Run NFS Integration Tests
run: |
cd test/nfs
echo "Running NFS integration tests..."
echo "============================================"
# Install test dependencies
go mod download
# Run the protocol-layer tests. The kernel-mount tests require root
# for mount(2) and are exercised in their own privileged step below;
# skip them here so a "skipped because not root" line doesn't show
# up as noise on every CI run.
go test -v -timeout=${{ env.TEST_TIMEOUT }} -skip '^TestKernelMount' ./...
echo "============================================"
echo "NFS integration tests completed"
- name: Install kernel NFS client
run: |
# nfs-common provides mount.nfs; netbase provides /etc/protocols
# which mount.nfs's protocol-name lookups (`tcp`, `udp`) need.
sudo apt-get update
sudo apt-get install -y nfs-common netbase
- name: Run kernel-mount E2E tests
run: |
cd test/nfs
echo "Running kernel-mount end-to-end tests..."
echo "These mount the running 'weed nfs' subprocess via the actual"
echo "Linux NFS client to catch protocol regressions invisible to"
echo "the go-nfs-client-based tests above."
echo "============================================"
# mount(2) is privileged. Preserve PATH so 'go' (and the weed
# binary that test/nfs/framework.go locates via $PATH) resolve
# correctly under sudo, and pass through the Go module/cache dirs
# so we don't redownload modules under root.
sudo env "PATH=$PATH" \
GOMODCACHE="$(go env GOMODCACHE)" \
GOCACHE="$(go env GOCACHE)" \
go test -v -timeout=${{ env.TEST_TIMEOUT }} -run '^TestKernelMount' ./...
echo "============================================"
echo "Kernel-mount E2E tests completed"
- name: Test Summary
if: always()
run: |
echo "## NFS Integration Test Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Test Coverage" >> $GITHUB_STEP_SUMMARY
echo "- **Read/Write Round Trip**: Basic file create + read" >> $GITHUB_STEP_SUMMARY
echo "- **Directory Operations**: Mkdir, ReadDirPlus, RmDir" >> $GITHUB_STEP_SUMMARY
echo "- **Nested Directories**: Deep tree creation and leaf I/O" >> $GITHUB_STEP_SUMMARY
echo "- **Rename**: Content preserved across rename" >> $GITHUB_STEP_SUMMARY
echo "- **Overwrite + Truncate**: Setattr(size=0) + shorter write" >> $GITHUB_STEP_SUMMARY
echo "- **Large Files**: 3 MiB binary round trip" >> $GITHUB_STEP_SUMMARY
echo "- **Edge Payloads**: All 256 byte values + empty files" >> $GITHUB_STEP_SUMMARY
echo "- **Symlinks**: Symlink + Lookup" >> $GITHUB_STEP_SUMMARY
echo "- **Missing Path**: Remove on missing entry errors cleanly" >> $GITHUB_STEP_SUMMARY
echo "- **FSINFO**: Non-zero rtpref/wtpref advertised" >> $GITHUB_STEP_SUMMARY
echo "- **Sequential Append**: Two-part concatenation" >> $GITHUB_STEP_SUMMARY
echo "- **ReadDir After Remove**: Meta cache does not serve stale entries" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Kernel-Mount E2E Coverage" >> $GITHUB_STEP_SUMMARY
echo "- **V3 over TCP**: baseline NFSv3 mount + readdir" >> $GITHUB_STEP_SUMMARY
echo "- **V3 with mountproto=udp**: regression test for UDP MOUNT v3 responder" >> $GITHUB_STEP_SUMMARY
echo "- **V4 rejects cleanly**: regression test for the v4 PROG_MISMATCH path (#9262)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Harness" >> $GITHUB_STEP_SUMMARY
echo "Most tests boot their own master + volume + filer + nfs subprocess" >> $GITHUB_STEP_SUMMARY
echo "stack on loopback and drive it via the NFSv3 RPC protocol using" >> $GITHUB_STEP_SUMMARY
echo "go-nfs-client. The kernel-mount E2E tests reuse the same harness" >> $GITHUB_STEP_SUMMARY
echo "but mount the export through the in-tree Linux NFS client to" >> $GITHUB_STEP_SUMMARY
echo "catch protocol regressions a Go-only client can't see; they run" >> $GITHUB_STEP_SUMMARY
echo "in a separate privileged step (mount(2) requires root)." >> $GITHUB_STEP_SUMMARY

5
go.mod
View File

@@ -129,7 +129,6 @@ require (
github.com/cognusion/imaging v1.0.3
github.com/fluent/fluent-logger-golang v1.10.1
github.com/getsentry/sentry-go v0.44.1
github.com/go-git/go-billy/v5 v5.9.0
github.com/go-ldap/ldap/v3 v3.4.13
github.com/golang-jwt/jwt/v5 v5.3.1
github.com/google/flatbuffers/go v0.0.0-20230108230133-3b8644d32c50
@@ -152,8 +151,6 @@ require (
github.com/tarantool/go-tarantool/v2 v2.4.2
github.com/testcontainers/testcontainers-go v0.40.0
github.com/tikv/client-go/v2 v2.0.7
github.com/willscott/go-nfs v0.0.4
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886
github.com/xeipuuv/gojsonschema v1.2.0
github.com/ydb-platform/ydb-go-sdk-auth-environ v0.5.1
github.com/ydb-platform/ydb-go-sdk/v3 v3.134.2
@@ -211,6 +208,7 @@ require (
github.com/dromara/dongle v1.0.1 // indirect
github.com/gin-gonic/gin v1.11.0 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect
github.com/go-git/go-billy/v5 v5.9.0 // indirect
github.com/goccy/go-yaml v1.18.0 // indirect
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect
github.com/google/go-cmp v0.7.0 // indirect
@@ -257,7 +255,6 @@ require (
github.com/pquerna/otp v1.5.0 // indirect
github.com/pterm/pterm v0.12.82 // indirect
github.com/quic-go/qpack v0.6.0 // indirect
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 // indirect
github.com/rclone/Proton-API-Bridge v1.0.3 // indirect
github.com/rclone/go-proton-api v1.0.2 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect

6
go.sum
View File

@@ -1783,8 +1783,6 @@ github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SA
github.com/quic-go/quic-go v0.59.0/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
github.com/rabbitmq/amqp091-go v1.11.0 h1:HxIctVm9Gid/Vtn706necmZ7Wj6pgGI2eqplRbEY8O8=
github.com/rabbitmq/amqp091-go v1.11.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o=
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 h1:UVArwN/wkKjMVhh2EQGC0tEc1+FqiLlvYXY5mQ2f8Wg=
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93/go.mod h1:Nfe4efndBz4TibWycNE+lqyJZiMX4ycx+QKV8Ta0f/o=
github.com/rclone/Proton-API-Bridge v1.0.3 h1:Bs7RC4xCFSN0BPIYVda/BNxp0qo3NV0gB2VZqx2KIew=
github.com/rclone/Proton-API-Bridge v1.0.3/go.mod h1:26RAest751Ofk+F/d8xtl4UyWXrZvMQwn39U8rm/WKM=
github.com/rclone/go-proton-api v1.0.2 h1:cJtJUab0MGJ3C6q5kiEJs3pbyhSLnOKMyYOQehA0PBc=
@@ -2028,10 +2026,6 @@ github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IU
github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok=
github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
github.com/willscott/go-nfs v0.0.4 h1:1vpOPAdECmoT2KmZ8u+ukO/jfvDjMEUNYhA2F1jGJtI=
github.com/willscott/go-nfs v0.0.4/go.mod h1:VhNccO67Oug787VNXcyx9JDI3ZoSpqoKMT/lWMhUIDg=
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886 h1:DtrBtkgTJk2XGt4T7eKdKVkd9A5NCevN2e4inLXtsqA=
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886/go.mod h1:Tq++Lr/FgiS3X48q5FETemXiSLGuYMQT2sPjYNPJSwA=
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 h1:3UeQBvD0TFrlVjOeLOBz+CPAI8dnbqNSVwUwRrkp7vQ=

View File

@@ -1,36 +0,0 @@
.PHONY: all build test test-verbose test-short test-debug clean deps tidy
all: build test
# Build the weed binary first
build:
cd ../../weed && go build -o weed .
# Install test dependencies
deps:
go mod download
# Run all tests
test: build deps
go test -timeout 5m ./...
# Run tests with verbose output
test-verbose: build deps
go test -v -timeout 5m ./...
# Skip long-running integration tests
test-short: deps
go test -short -v ./...
# Run tests with debug output from SeaweedFS
test-debug: build deps
go test -v -timeout 5m ./... 2>&1 | tee test.log
# Clean up test artifacts
clean:
rm -f test.log
go clean -testcache
# Update go.sum
tidy:
go mod tidy

View File

@@ -1,92 +0,0 @@
# SeaweedFS NFS Integration Tests
End-to-end tests that boot a real SeaweedFS cluster (`master` + `volume` +
`filer`) plus the experimental `weed nfs` frontend and drive it through the
NFSv3 wire protocol. The tests talk to the server over TCP using
`github.com/willscott/go-nfs-client`, which means they do **not** need a
kernel NFS mount, privileged ports, or any platform-specific tooling.
## Prerequisites
1. Build the `weed` binary:
```bash
cd ../../weed
go build -o weed .
```
2. Go 1.24 or later.
## Running the tests
```bash
# Build weed and run everything
make test
# Verbose output, keeps the subprocess stdout
make test-verbose
# Skip integration tests — useful when iterating on the framework itself
make test-short
# Run a single test
go test -v -run TestNfsBasicReadWrite ./...
```
Every test starts its own cluster on random loopback ports, so runs are
isolated and can execute in parallel.
## Layout
- `framework.go` — launches `weed master`, `weed volume`, `weed filer`, and
`weed nfs` as subprocesses, waits for each to accept TCP, and exposes a
`Mount()` helper that returns an `nfsclient.Target`.
- `basic_test.go` — covers the most common NFS operations:
- Read/write round-trip (`TestNfsBasicReadWrite`)
- Mkdir / ReadDirPlus / RmDir (`TestNfsMkdirAndRmdir`)
- Nested directory + leaf file (`TestNfsNestedDirectories`)
- Rename preserves content (`TestNfsRenamePreservesContent`)
- Overwrite shrinks file size (`TestNfsOverwriteShrinksFile`)
- Large binary file round-trip (`TestNfsLargeFile`)
- Arbitrary binary and empty files (`TestNfsBinaryAndEmptyFiles`)
- Symlink + Readlink (`TestNfsSymlinkRoundTrip`)
- ReadDirPlus ordering sanity (`TestNfsReadDirPlusOrdering`)
- Remove on missing path errors cleanly (`TestNfsRemoveMissingFailsCleanly`)
- FSINFO advertises non-zero limits (`TestNfsFSInfoReturnsSaneLimits`)
- Sequential append writes concatenate (`TestNfsAppendIsSequential`)
- ReadDir after remove (`TestNfsReadDirAfterRemove`)
## Debugging a failing test
Keep the cluster temp dir for inspection:
```go
config := DefaultTestConfig()
config.SkipCleanup = true
```
Enable subprocess stdout/stderr:
```go
config := DefaultTestConfig()
config.EnableDebug = true
```
Or run with `-v`, which flips `EnableDebug` automatically via `testing.Verbose()`.
## Notes
- The NFS server binds to `127.0.0.1` with `-ip.bind=127.0.0.1` and exports
`/nfs_export`. The test framework pre-creates that directory via the
filer's HTTP API before starting the NFS server — the NFS server requires
its export root to exist in the filer's namespace with a real entry, and
the filer's synthetic `/` root does not match the `Name=="/"` check the
NFS server performs during `ensureIndexedEntry`.
- Ports are allocated dynamically. Each test run opens a short-lived
listener on `127.0.0.1:0`, reads back the assigned port, closes the
listener, and hands the port to `weed master/volume/filer/nfs`. There is
a tiny race window between close and reopen that has not been a problem
in practice but is worth remembering if you see a "bind: address already
in use" failure.
- All four `weed` components are started with explicit `-port.grpc=...`
flags. Without them, the default is `-port + 10000`, which overflows
`65535` whenever the HTTP port lands above `55535` — the kernel's
ephemeral port range on macOS routinely does.

View File

@@ -1,400 +0,0 @@
package nfs
import (
"bytes"
"fmt"
"io"
"os"
"path"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
nfsclient "github.com/willscott/go-nfs-client/nfs"
)
// setupFramework is a small helper that boots the cluster for a single test
// and tears everything down on completion. Every test gets a fresh filer +
// volume pair so they cannot step on each other's namespace.
func setupFramework(t *testing.T) *NfsTestFramework {
t.Helper()
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewNfsTestFramework(t, config)
require.NoError(t, fw.Setup(config), "framework setup")
t.Cleanup(fw.Cleanup)
return fw
}
// writeAll writes payload to path on the target in a single Write call. The
// NFS WRITE3 RPC chunks internally, so this exists purely so tests read
// linearly.
func writeAll(t *testing.T, target *nfsclient.Target, remotePath string, payload []byte) {
t.Helper()
file, err := target.OpenFile(remotePath, 0o644)
require.NoError(t, err, "open %s for write", remotePath)
if len(payload) > 0 {
n, err := file.Write(payload)
require.NoError(t, err, "write %s", remotePath)
require.Equal(t, len(payload), n, "short write on %s", remotePath)
}
require.NoError(t, file.Close(), "close %s", remotePath)
}
// readAll opens path on the target and returns the full file contents.
func readAll(t *testing.T, target *nfsclient.Target, remotePath string) []byte {
t.Helper()
file, err := target.Open(remotePath)
require.NoError(t, err, "open %s for read", remotePath)
defer file.Close()
content, err := io.ReadAll(file)
require.NoError(t, err, "read %s", remotePath)
return content
}
// TestNfsBasicReadWrite exercises the most common NFS path: OpenFile + Write
// + Close followed by Open + Read to verify round-trip data integrity.
func TestNfsBasicReadWrite(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
payload := []byte("hello from seaweedfs nfs integration test")
writeAll(t, target, "/hello.txt", payload)
got := readAll(t, target, "/hello.txt")
assert.Equal(t, payload, got, "round-tripped content must match")
info, err := target.Getattr("/hello.txt")
require.NoError(t, err)
assert.Equal(t, int64(len(payload)), int64(info.Filesize))
}
// TestNfsMkdirAndRmdir covers Mkdir, ReadDirPlus, and RmDir. The readdir
// assertion also verifies that the newly-created directory shows up under
// the export root the way a POSIX client would expect.
func TestNfsMkdirAndRmdir(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
_, err = target.Mkdir("/dir1", 0o755)
require.NoError(t, err)
entries, err := target.ReadDirPlus("/")
require.NoError(t, err)
found := false
for _, entry := range entries {
if entry.Name() == "dir1" {
found = true
assert.True(t, entry.IsDir(), "dir1 should be a directory")
}
}
assert.True(t, found, "expected dir1 in readdir listing")
require.NoError(t, target.RmDir("/dir1"))
// After removal, dir1 must be gone from the listing.
entries, err = target.ReadDirPlus("/")
require.NoError(t, err)
for _, entry := range entries {
assert.NotEqual(t, "dir1", entry.Name(), "dir1 should be removed")
}
}
// TestNfsNestedDirectories ensures the server can materialise a deep tree in
// a single Mkdir-per-segment sequence and that reads/writes work at the
// leaves.
func TestNfsNestedDirectories(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
for _, segment := range []string{"/a", "/a/b", "/a/b/c"} {
_, err := target.Mkdir(segment, 0o755)
require.NoError(t, err, "mkdir %s", segment)
}
payload := []byte("deep path content")
writeAll(t, target, "/a/b/c/leaf.txt", payload)
got := readAll(t, target, "/a/b/c/leaf.txt")
assert.Equal(t, payload, got)
require.NoError(t, target.Remove("/a/b/c/leaf.txt"))
require.NoError(t, target.RmDir("/a/b/c"))
require.NoError(t, target.RmDir("/a/b"))
require.NoError(t, target.RmDir("/a"))
}
// TestNfsRenamePreservesContent renames a file and makes sure the content
// at the new path matches what was written at the old one, and that the
// old path disappears. It does not assert on inode identity because pjdfstest
// already covers that and this test intentionally avoids depending on the
// mount-side identity plumbing.
func TestNfsRenamePreservesContent(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
payload := []byte("rename me")
writeAll(t, target, "/src.txt", payload)
require.NoError(t, target.Rename("/src.txt", "/dst.txt"))
_, _, err = target.Lookup("/src.txt")
assert.Error(t, err, "source should be gone after rename")
got := readAll(t, target, "/dst.txt")
assert.Equal(t, payload, got)
require.NoError(t, target.Remove("/dst.txt"))
}
// TestNfsOverwriteShrinksFile rewrites an existing file with shorter content
// and asserts Getattr reports the new (smaller) size. go-nfs-client's
// OpenFile does not pass O_TRUNC, so the test truncates explicitly via
// Setattr(size=0) before the second write — mirroring what `echo >file`
// does on a POSIX client.
func TestNfsOverwriteShrinksFile(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
writeAll(t, target, "/overwrite.txt", []byte("the quick brown fox"))
require.NoError(t, target.Setattr("/overwrite.txt", nfsclient.Sattr3{
Size: nfsclient.SetSize{SetIt: true, Size: 0},
}))
writeAll(t, target, "/overwrite.txt", []byte("short"))
info, err := target.Getattr("/overwrite.txt")
require.NoError(t, err)
assert.Equal(t, int64(len("short")), int64(info.Filesize))
got := readAll(t, target, "/overwrite.txt")
assert.Equal(t, []byte("short"), got)
require.NoError(t, target.Remove("/overwrite.txt"))
}
// TestNfsLargeFile writes a multi-megabyte payload so the write path has to
// cut chunks and flush through the volume server rather than inlining
// content in the filer entry.
func TestNfsLargeFile(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
const size = 3 * 1024 * 1024 // 3 MiB — exceeds the 4 MiB inline cutoff boundary when combined with metadata
payload := make([]byte, size)
for i := range payload {
payload[i] = byte(i % 251) // non-repeating to catch offset bugs
}
writeAll(t, target, "/big.bin", payload)
info, err := target.Getattr("/big.bin")
require.NoError(t, err)
assert.Equal(t, int64(size), int64(info.Filesize))
got := readAll(t, target, "/big.bin")
require.Equal(t, size, len(got))
assert.True(t, bytes.Equal(payload, got), "large file content must round-trip byte-for-byte")
require.NoError(t, target.Remove("/big.bin"))
}
// TestNfsBinaryAndEmptyFiles covers two edge-case payloads the write path
// tends to regress on: arbitrary binary bytes and zero-length files.
func TestNfsBinaryAndEmptyFiles(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
t.Run("AllByteValues", func(t *testing.T) {
payload := make([]byte, 256)
for i := range payload {
payload[i] = byte(i)
}
writeAll(t, target, "/binary.bin", payload)
assert.Equal(t, payload, readAll(t, target, "/binary.bin"))
require.NoError(t, target.Remove("/binary.bin"))
})
t.Run("EmptyFile", func(t *testing.T) {
writeAll(t, target, "/empty.txt", nil)
info, err := target.Getattr("/empty.txt")
require.NoError(t, err)
assert.Equal(t, int64(0), int64(info.Filesize))
require.NoError(t, target.Remove("/empty.txt"))
})
}
// TestNfsSymlinkRoundTrip covers Symlink and Readlink through the nfs server.
// Readlink returns the target path; the server does not auto-traverse it.
func TestNfsSymlinkRoundTrip(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
// Symlink uses a different RPC than open+create, and our server routes it
// through the billy Change interface.
require.NoError(t, target.Symlink("/target.txt", "/link.txt"))
// The underlying target does not need to exist for readlink to succeed.
file, _, err := target.Lookup("/link.txt")
require.NoError(t, err, "lookup symlink")
assert.True(t, file.Mode()&os.ModeSymlink != 0, "expected symlink mode, got %s", file.Mode())
require.NoError(t, target.Remove("/link.txt"))
}
// TestNfsReadDirPlusOrdering creates a handful of files with distinct names
// and ensures ReadDirPlus surfaces every one of them. The server pages
// listings from the filer, so we want to make sure nothing is truncated.
func TestNfsReadDirPlusOrdering(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
_, err = target.Mkdir("/listing", 0o755)
require.NoError(t, err)
names := []string{"alpha.txt", "beta.txt", "gamma.txt", "delta.txt", "epsilon.txt"}
for _, name := range names {
writeAll(t, target, path.Join("/listing", name), []byte(name))
}
entries, err := target.ReadDirPlus("/listing")
require.NoError(t, err)
seen := make(map[string]struct{}, len(entries))
for _, entry := range entries {
if entry.Name() == "." || entry.Name() == ".." {
continue
}
seen[entry.Name()] = struct{}{}
}
for _, name := range names {
_, ok := seen[name]
assert.True(t, ok, "expected %s in directory listing", name)
}
for _, name := range names {
require.NoError(t, target.Remove(path.Join("/listing", name)))
}
require.NoError(t, target.RmDir("/listing"))
}
// TestNfsRemoveMissingFailsCleanly asserts that removing a non-existent path
// surfaces an error instead of silently succeeding. A bug where the server
// returned NFS3_OK on missing entries would hide metadata drift.
func TestNfsRemoveMissingFailsCleanly(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
err = target.Remove("/does_not_exist.txt")
require.Error(t, err, "removing a missing file must error")
// NFS3 surfaces this as NFS3ERR_NOENT; make sure the error text is
// recognisable without locking us into the library's exact wording.
assert.True(t,
strings.Contains(strings.ToLower(err.Error()), "noent") ||
strings.Contains(strings.ToLower(err.Error()), "not exist") ||
strings.Contains(strings.ToLower(err.Error()), "no such"),
"unexpected error shape: %v", err)
}
// TestNfsFSInfoReturnsSaneLimits pokes at FSINFO so we catch regressions
// where the server advertises zero read/write limits (which would make
// clients fall back to the 8 KiB floor and slow every test that follows).
func TestNfsFSInfoReturnsSaneLimits(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
info, err := target.FSInfo()
require.NoError(t, err)
require.NotNil(t, info)
assert.Greater(t, info.RTPref, uint32(0), "rtpref must be positive")
assert.Greater(t, info.WTPref, uint32(0), "wtpref must be positive")
}
// TestNfsAppendIsSequential writes two chunks to the same file in separate
// Open cycles and asserts the concatenation is preserved. The second write
// uses O_APPEND (the default Open path in go-nfs-client does not pass
// flags, so we explicitly reopen after writing the first chunk).
func TestNfsAppendIsSequential(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
const prefix = "part1-"
const suffix = "part2"
writeAll(t, target, "/concat.txt", []byte(prefix))
file, err := target.OpenFile("/concat.txt", 0o644)
require.NoError(t, err)
// Seek to end before writing so we append rather than overwrite. go-nfs
// client's File.Seek uses the same offset tracking as Write so this is
// enough to place the second chunk after the first.
_, err = file.Seek(int64(len(prefix)), io.SeekStart)
require.NoError(t, err)
_, err = file.Write([]byte(suffix))
require.NoError(t, err)
require.NoError(t, file.Close())
got := readAll(t, target, "/concat.txt")
assert.Equal(t, prefix+suffix, string(got))
require.NoError(t, target.Remove("/concat.txt"))
}
// Regression: readdir should not emit stale entries after a remove. This is
// the scenario the PR's meta cache invalidation logic was written to fix.
func TestNfsReadDirAfterRemove(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
_, err = target.Mkdir("/churn", 0o755)
require.NoError(t, err)
for i := 0; i < 5; i++ {
writeAll(t, target, path.Join("/churn", fmt.Sprintf("f%d.txt", i)), []byte{byte(i)})
}
// Remove the middle one and re-list.
require.NoError(t, target.Remove("/churn/f2.txt"))
entries, err := target.ReadDirPlus("/churn")
require.NoError(t, err)
for _, entry := range entries {
assert.NotEqual(t, "f2.txt", entry.Name(), "removed file should not reappear in listing")
}
for i := 0; i < 5; i++ {
if i == 2 {
continue
}
require.NoError(t, target.Remove(path.Join("/churn", fmt.Sprintf("f%d.txt", i))))
}
require.NoError(t, target.RmDir("/churn"))
}

View File

@@ -1,423 +0,0 @@
package nfs
import (
"bytes"
"fmt"
"io"
"mime/multipart"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"syscall"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/test/testutil"
"github.com/stretchr/testify/require"
nfsclient "github.com/willscott/go-nfs-client/nfs"
"github.com/willscott/go-nfs-client/nfs/rpc"
)
// NfsTestFramework boots a minimal SeaweedFS cluster (master + volume + filer)
// plus the experimental `weed nfs` frontend and hands out NFSv3 RPC clients
// that talk to it. Everything is driven via subprocesses so the tests exercise
// the same binary an operator would deploy, and no kernel mount is required.
type NfsTestFramework struct {
t *testing.T
tempDir string
dataDir string
masterProcess *os.Process
volumeProcess *os.Process
filerProcess *os.Process
nfsProcess *os.Process
masterAddr string
masterGrpc int
volumeAddr string
volumeGrpc int
filerAddr string
filerGrpc int
nfsAddr string
exportRoot string
weedBinary string
isSetup bool
skipCleanup bool
}
// TestConfig controls how the framework boots the cluster.
type TestConfig struct {
NumVolumes int
EnableDebug bool
SkipCleanup bool // keep temp dir on failure for inspection
// ExportRoot is the filer path the NFS server exports. Defaults to "/"
// so tests can use any path, with a single warning logged by the server.
ExportRoot string
}
// DefaultTestConfig returns the defaults used by most tests. A dedicated
// /nfs_export subtree is used as the NFS export root because the NFS server
// requires the export directory to exist in the filer's namespace and carry
// a non-zero inode — passing "/" would succeed only for filer setups that
// have already backfilled the root inode.
func DefaultTestConfig() *TestConfig {
return &TestConfig{
NumVolumes: 3,
EnableDebug: false,
SkipCleanup: false,
ExportRoot: "/nfs_export",
}
}
// NewNfsTestFramework allocates a framework bound to the current test. Call
// Setup next to actually start the cluster.
func NewNfsTestFramework(t *testing.T, config *TestConfig) *NfsTestFramework {
if config == nil {
config = DefaultTestConfig()
}
tempDir, err := os.MkdirTemp("", "seaweedfs_nfs_test_")
require.NoError(t, err)
// testutil.MustAllocatePorts holds every listener open until the full
// batch has been reserved, which avoids the "close-then-hope" race my
// original per-port helper had. We need seven ports: four HTTP (master,
// volume, filer, nfs) and three gRPC (master, volume, filer — nfs has
// no gRPC endpoint).
ports := testutil.MustAllocatePorts(t, 7)
exportRoot := config.ExportRoot
if exportRoot == "" {
exportRoot = "/"
}
return &NfsTestFramework{
t: t,
tempDir: tempDir,
dataDir: filepath.Join(tempDir, "data"),
masterAddr: fmt.Sprintf("127.0.0.1:%d", ports[0]),
masterGrpc: ports[1],
volumeAddr: fmt.Sprintf("127.0.0.1:%d", ports[2]),
volumeGrpc: ports[3],
filerAddr: fmt.Sprintf("127.0.0.1:%d", ports[4]),
filerGrpc: ports[5],
nfsAddr: fmt.Sprintf("127.0.0.1:%d", ports[6]),
exportRoot: exportRoot,
weedBinary: findWeedBinary(),
isSetup: false,
skipCleanup: config.SkipCleanup,
}
}
// Setup starts the SeaweedFS cluster and the NFS frontend, waiting for each
// component to accept connections before moving on.
func (f *NfsTestFramework) Setup(config *TestConfig) error {
if f.isSetup {
return fmt.Errorf("framework already setup")
}
dirs := []string{
f.dataDir,
filepath.Join(f.dataDir, "master"),
filepath.Join(f.dataDir, "volume"),
}
for _, dir := range dirs {
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create directory %s: %v", dir, err)
}
}
if err := f.startMaster(config); err != nil {
return fmt.Errorf("failed to start master: %v", err)
}
if !testutil.WaitForPort(portFromAddr(f.masterAddr), testutil.SeaweedMiniStartupTimeout) {
return fmt.Errorf("master not ready at %s", f.masterAddr)
}
if err := f.startVolumeServer(config); err != nil {
return fmt.Errorf("failed to start volume server: %v", err)
}
if !testutil.WaitForPort(portFromAddr(f.volumeAddr), testutil.SeaweedMiniStartupTimeout) {
return fmt.Errorf("volume server not ready at %s", f.volumeAddr)
}
if err := f.startFiler(config); err != nil {
return fmt.Errorf("failed to start filer: %v", err)
}
if !testutil.WaitForPort(portFromAddr(f.filerAddr), testutil.SeaweedMiniStartupTimeout) {
return fmt.Errorf("filer not ready at %s", f.filerAddr)
}
// Pre-create the export root in the filer's namespace. The NFS server
// expects its export directory to exist with a real inode; uploading a
// placeholder file creates the parent directory implicitly and then
// removing the file leaves the empty directory in place.
if f.exportRoot != "/" {
if err := f.ensureExportRootExists(); err != nil {
return fmt.Errorf("failed to pre-create export root %s: %v", f.exportRoot, err)
}
}
if err := f.startNfsServer(config); err != nil {
return fmt.Errorf("failed to start NFS server: %v", err)
}
if !testutil.WaitForPort(portFromAddr(f.nfsAddr), testutil.SeaweedMiniStartupTimeout) {
return fmt.Errorf("NFS server not ready at %s", f.nfsAddr)
}
// Let the NFS server finish wiring up its gRPC subscription to the filer
// before the first client call hits MOUNT/LOOKUP.
time.Sleep(500 * time.Millisecond)
f.isSetup = true
return nil
}
// Cleanup stops all processes. Temp state is preserved if SkipCleanup is set.
func (f *NfsTestFramework) Cleanup() {
processes := []*os.Process{f.nfsProcess, f.filerProcess, f.volumeProcess, f.masterProcess}
for _, proc := range processes {
if proc != nil {
_ = proc.Signal(syscall.SIGTERM)
_, _ = proc.Wait()
}
}
if !f.skipCleanup {
_ = os.RemoveAll(f.tempDir)
}
}
// NfsAddr returns the TCP address the NFS server is listening on.
func (f *NfsTestFramework) NfsAddr() string { return f.nfsAddr }
// FilerAddr returns the TCP address of the filer.
func (f *NfsTestFramework) FilerAddr() string { return f.filerAddr }
// ExportRoot returns the path the NFS server exports.
func (f *NfsTestFramework) ExportRoot() string { return f.exportRoot }
// Mount opens an NFSv3 MOUNT+NFS connection against the running NFS server
// and returns a Target that tests can drive like a mini-VFS. Caller is
// responsible for calling the returned cleanup func to Unmount and close the
// TCP connection.
func (f *NfsTestFramework) Mount() (*nfsclient.Target, func(), error) {
var (
client *rpc.Client
err error
)
// The NFS server's TCP listener may already be accepting connections when
// waitForService returns, but the RPC program registration can trail it
// by a few milliseconds. Retry the dial to absorb that small window.
for attempt := 0; attempt < 20; attempt++ {
client, err = rpc.DialTCP("tcp", f.nfsAddr, false)
if err == nil {
break
}
time.Sleep(25 * time.Millisecond)
}
if err != nil {
return nil, nil, fmt.Errorf("dial NFS: %w", err)
}
// Note: do not set Mount.Addr here. When Addr is non-empty, the go-nfs
// client re-dials via portmapper and concatenates `:111` onto the
// address, which produces "too many colons" for a raw `host:port`
// string. Reusing the existing RPC client avoids that path entirely.
mounter := &nfsclient.Mount{Client: client}
target, err := mounter.Mount(f.exportRoot, rpc.AuthNull)
if err != nil {
client.Close()
return nil, nil, fmt.Errorf("mount %s: %w", f.exportRoot, err)
}
cleanup := func() {
_ = mounter.Unmount()
client.Close()
}
return target, cleanup, nil
}
func (f *NfsTestFramework) startMaster(config *TestConfig) error {
_, masterPort := splitHostPort(f.masterAddr)
args := []string{
"master",
"-ip=127.0.0.1",
fmt.Sprintf("-port=%d", masterPort),
fmt.Sprintf("-port.grpc=%d", f.masterGrpc),
"-mdir=" + filepath.Join(f.dataDir, "master"),
"-raftBootstrap",
"-peers=none",
}
return f.startProcess(&f.masterProcess, config, args)
}
func (f *NfsTestFramework) startVolumeServer(config *TestConfig) error {
_, volumePort := splitHostPort(f.volumeAddr)
// pb.ServerAddress encodes a non-default gRPC port as `host:port.grpc`.
// See weed/pb/server_address.go — the dot, not a colon, is the separator
// between the HTTP port and the gRPC port.
masterWithGrpc := fmt.Sprintf("%s.%d", f.masterAddr, f.masterGrpc)
args := []string{
"volume",
"-master=" + masterWithGrpc,
"-ip=127.0.0.1",
fmt.Sprintf("-port=%d", volumePort),
fmt.Sprintf("-port.grpc=%d", f.volumeGrpc),
"-dir=" + filepath.Join(f.dataDir, "volume"),
fmt.Sprintf("-max=%d", config.NumVolumes),
}
return f.startProcess(&f.volumeProcess, config, args)
}
func (f *NfsTestFramework) startFiler(config *TestConfig) error {
_, filerPort := splitHostPort(f.filerAddr)
masterWithGrpc := fmt.Sprintf("%s.%d", f.masterAddr, f.masterGrpc)
args := []string{
"filer",
"-master=" + masterWithGrpc,
"-ip=127.0.0.1",
fmt.Sprintf("-port=%d", filerPort),
fmt.Sprintf("-port.grpc=%d", f.filerGrpc),
}
return f.startProcess(&f.filerProcess, config, args)
}
func (f *NfsTestFramework) startNfsServer(config *TestConfig) error {
_, nfsPort := splitHostPort(f.nfsAddr)
// `host:port.grpc` encoding — see pb/server_address.go.
filerWithGrpc := fmt.Sprintf("%s.%d", f.filerAddr, f.filerGrpc)
args := []string{
"nfs",
"-filer=" + filerWithGrpc,
"-ip.bind=127.0.0.1",
fmt.Sprintf("-port=%d", nfsPort),
"-filer.path=" + f.exportRoot,
}
return f.startProcess(&f.nfsProcess, config, args)
}
func (f *NfsTestFramework) startProcess(target **os.Process, config *TestConfig, args []string) error {
cmd := exec.Command(f.weedBinary, args...)
cmd.Dir = f.tempDir
if config.EnableDebug {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return err
}
*target = cmd.Process
return nil
}
// portFromAddr returns just the port number from a `host:port` string.
// testutil.WaitForPort takes an int port, not a full address.
func portFromAddr(addr string) int {
_, port := splitHostPort(addr)
return port
}
// ensureExportRootExists posts a placeholder file to f.exportRoot via the
// filer's HTTP API, then deletes it. That roundtrip implicitly creates the
// target directory so the NFS server has something to mount. We bypass
// weed/pb here because the HTTP client is simpler and needs no gRPC stubs.
func (f *NfsTestFramework) ensureExportRootExists() error {
exportRoot := strings.TrimRight(f.exportRoot, "/")
if exportRoot == "" {
return nil
}
placeholder := exportRoot + "/.nfs_test_init"
filerURL := "http://" + f.filerAddr + placeholder
var body bytes.Buffer
writer := multipart.NewWriter(&body)
part, err := writer.CreateFormFile("file", ".nfs_test_init")
if err != nil {
return err
}
if _, err := io.WriteString(part, ""); err != nil {
return err
}
if err := writer.Close(); err != nil {
return err
}
httpClient := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest(http.MethodPost, filerURL, &body)
if err != nil {
return err
}
req.Header.Set("Content-Type", writer.FormDataContentType())
resp, err := httpClient.Do(req)
if err != nil {
return err
}
_, _ = io.Copy(io.Discard, resp.Body)
resp.Body.Close()
if resp.StatusCode/100 != 2 {
return fmt.Errorf("filer POST %s returned status %d", filerURL, resp.StatusCode)
}
// Delete the placeholder; the directory stays behind.
deleteReq, err := http.NewRequest(http.MethodDelete, filerURL, nil)
if err != nil {
return err
}
deleteResp, err := httpClient.Do(deleteReq)
if err != nil {
return err
}
_, _ = io.Copy(io.Discard, deleteResp.Body)
deleteResp.Body.Close()
if deleteResp.StatusCode/100 != 2 && deleteResp.StatusCode != http.StatusNotFound {
return fmt.Errorf("filer DELETE %s returned status %d", filerURL, deleteResp.StatusCode)
}
return nil
}
func splitHostPort(addr string) (string, int) {
host, portStr, err := net.SplitHostPort(addr)
if err != nil {
return "", 0
}
var port int
_, _ = fmt.Sscanf(portStr, "%d", &port)
return host, port
}
// findWeedBinary locates the weed binary, preferring the local build in the
// checkout so tests run against the code under review rather than whatever is
// on $PATH.
func findWeedBinary() string {
if _, thisFile, _, ok := runtime.Caller(0); ok {
thisDir := filepath.Dir(thisFile)
candidates := []string{
filepath.Join(thisDir, "../../weed/weed"),
filepath.Join(thisDir, "../weed/weed"),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
abs, _ := filepath.Abs(candidate)
return abs
}
}
}
cwd, _ := os.Getwd()
candidates := []string{
filepath.Join(cwd, "../../weed/weed"),
filepath.Join(cwd, "../weed/weed"),
filepath.Join(cwd, "./weed"),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
abs, _ := filepath.Abs(candidate)
return abs
}
}
if path, err := exec.LookPath("weed"); err == nil {
return path
}
return "weed"
}

View File

@@ -1,21 +0,0 @@
module seaweedfs-nfs-tests
go 1.25.0
// test/testutil lives inside the main seaweedfs module; pull it in via a
// local replace so this integration suite can reuse the shared port
// allocator and readiness helpers instead of reinventing them.
replace github.com/seaweedfs/seaweedfs => ../..
require (
github.com/seaweedfs/seaweedfs v0.0.0-00010101000000-000000000000
github.com/stretchr/testify v1.11.1
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886
)
require (
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

View File

@@ -1,14 +0,0 @@
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 h1:UVArwN/wkKjMVhh2EQGC0tEc1+FqiLlvYXY5mQ2f8Wg=
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93/go.mod h1:Nfe4efndBz4TibWycNE+lqyJZiMX4ycx+QKV8Ta0f/o=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886 h1:DtrBtkgTJk2XGt4T7eKdKVkd9A5NCevN2e4inLXtsqA=
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886/go.mod h1:Tq++Lr/FgiS3X48q5FETemXiSLGuYMQT2sPjYNPJSwA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,193 +0,0 @@
//go:build linux
package nfs
// End-to-end mount tests that drive the real Linux NFS client (mount.nfs +
// in-tree kernel) against a running `weed nfs` subprocess. These exist to
// catch regressions that the existing framework can't see, because the
// framework drives the server with willscott/go-nfs-client — the same RPC
// library the server uses internally — so any bug shared between the two
// (XDR layout, version dispatch, RPC framing) round-trips invisibly.
//
// Two real bugs hit recently were exactly that shape:
// 1. NFSv4 mis-routed to the v3 SETATTR handler (#9262). The client
// library never sends NFSv4, so the test suite never noticed; the
// Linux kernel mount path did notice, with EIO.
// 2. UDP MOUNT v3 missing. Only TCP MOUNT was advertised; the kernel
// defaults mountproto=udp in many setups, so the in-tree client
// surfaced EPROTONOSUPPORT during MOUNT setup.
//
// These tests mount over the actual loopback interface using mount.nfs and
// shell out to /bin/mount and /bin/umount. They require root (mount(2) is
// privileged) and Linux (the in-tree NFS client is what's being exercised);
// they t.Skip cleanly when either prerequisite is missing.
//
// Run locally with:
//
// cd test/nfs
// sudo go test -v -run TestKernelMount ./...
//
// CI runs them via .github/workflows/nfs-tests.yml after installing
// nfs-common (mount.nfs + helpers).
import (
"errors"
"fmt"
"net"
"os"
"os/exec"
"strings"
"testing"
)
// kernelMountSkipIfUnsupported skips the test when the host can't run a
// real NFS mount. The combined check belongs in one place so the three
// kernel-mount tests stay focused on what they're actually verifying.
func kernelMountSkipIfUnsupported(t *testing.T) {
t.Helper()
if os.Geteuid() != 0 {
t.Skip("kernel mount test requires root; mount(2) is privileged")
}
if _, err := exec.LookPath("mount.nfs"); err != nil {
t.Skipf("mount.nfs not installed: %v (CI installs the nfs-common package)", err)
}
}
// kernelMount runs /bin/mount with the given options against the framework's
// running NFS server, returns the mountpoint and an unmount closure. We pass
// explicit port=/mountport= options so the kernel never queries portmap.
// That keeps the harness honest about what it's testing — the NFS / MOUNT
// wire protocol — and avoids colliding with a system rpcbind on shared CI
// runners (port 111 is privileged and frequently in use already).
func kernelMount(t *testing.T, fw *NfsTestFramework, optsTemplate string) (string, func()) {
t.Helper()
host, portStr, err := net.SplitHostPort(fw.NfsAddr())
if err != nil {
t.Fatalf("split nfs addr %q: %v", fw.NfsAddr(), err)
}
mountpoint, err := os.MkdirTemp("", "weed-nfs-kmount-")
if err != nil {
t.Fatalf("mkdtemp: %v", err)
}
opts := strings.ReplaceAll(optsTemplate, "{port}", portStr)
target := fmt.Sprintf("%s:%s", host, fw.ExportRoot())
cmd := exec.Command("mount", "-t", "nfs", "-o", opts, target, mountpoint)
if out, err := cmd.CombinedOutput(); err != nil {
_ = os.RemoveAll(mountpoint)
t.Fatalf("mount %s -o %s failed: %v\nmount output:\n%s", target, opts, err, out)
}
teardown := func() {
// -f to bail out faster if the server's already gone.
_ = exec.Command("umount", "-f", mountpoint).Run()
_ = os.RemoveAll(mountpoint)
}
return mountpoint, teardown
}
func newKernelMountFramework(t *testing.T) *NfsTestFramework {
t.Helper()
cfg := DefaultTestConfig()
fw := NewNfsTestFramework(t, cfg)
if err := fw.Setup(cfg); err != nil {
fw.Cleanup()
t.Fatalf("framework setup: %v", err)
}
t.Cleanup(fw.Cleanup)
return fw
}
// TestKernelMountV3TCP exercises the most common mount form: NFSv3 + MOUNT
// v3, both over TCP. This is what the existing go-nfs-client tests cover at
// the protocol layer, but running it through mount.nfs and the kernel
// confirms that the wire format we emit decodes cleanly under a different
// XDR/RPC parser.
func TestKernelMountV3TCP(t *testing.T) {
kernelMountSkipIfUnsupported(t)
fw := newKernelMountFramework(t)
mountpoint, undo := kernelMount(t, fw,
"nfsvers=3,nolock,port={port},mountport={port},proto=tcp,mountproto=tcp")
defer undo()
if _, err := os.Stat(mountpoint); err != nil {
t.Errorf("stat mountpoint: %v", err)
}
if _, err := os.ReadDir(mountpoint); err != nil {
t.Errorf("readdir mountpoint: %v", err)
}
}
// TestKernelMountV3MountProtoUDP is the regression test for the UDP MOUNT
// v3 responder. mountproto=udp forces the kernel to call MOUNT over UDP
// only; before the responder existed the kernel hit nothing (MOUNT was
// advertised TCP-only) and surfaced EPROTONOSUPPORT during mount setup.
func TestKernelMountV3MountProtoUDP(t *testing.T) {
kernelMountSkipIfUnsupported(t)
fw := newKernelMountFramework(t)
mountpoint, undo := kernelMount(t, fw,
"nfsvers=3,nolock,port={port},mountport={port},proto=tcp,mountproto=udp")
defer undo()
if _, err := os.Stat(mountpoint); err != nil {
t.Errorf("stat mountpoint: %v", err)
}
}
// TestKernelMountV4RejectsCleanly is the regression test for the NFSv4
// PROG_MISMATCH path (#9262). The server only speaks NFSv3, but the
// previous behaviour was to mis-route v4 COMPOUND to the v3 SETATTR
// handler and write garbage; the kernel surfaced EIO instead of a
// version-mismatch error and (depending on distro) didn't fall back to
// v3. The version filter now answers PROG_MISMATCH so the kernel sees
// "v4 not supported" cleanly.
//
// The test asserts:
// 1. mount.nfs exits non-zero (no silent success against a v3 server);
// 2. the failure message mentions protocol/version/io, which is what the
// kernel surfaces when it gets PROG_MISMATCH instead of garbage. A
// pre-fix server returns "mount system call failed" with no further
// context, so a regression collapses the assertion onto that branch.
func TestKernelMountV4RejectsCleanly(t *testing.T) {
kernelMountSkipIfUnsupported(t)
fw := newKernelMountFramework(t)
host, portStr, err := net.SplitHostPort(fw.NfsAddr())
if err != nil {
t.Fatalf("split nfs addr: %v", err)
}
mountpoint, err := os.MkdirTemp("", "weed-nfs-kmount-v4-")
if err != nil {
t.Fatalf("mkdtemp: %v", err)
}
defer os.RemoveAll(mountpoint)
target := fmt.Sprintf("%s:%s", host, fw.ExportRoot())
cmd := exec.Command("mount", "-t", "nfs", "-o",
fmt.Sprintf("vers=4,port=%s", portStr),
target, mountpoint)
out, err := cmd.CombinedOutput()
defer exec.Command("umount", "-f", mountpoint).Run()
if err == nil {
t.Fatalf("v4 mount unexpectedly succeeded against v3-only server\nmount output:\n%s", out)
}
// Don't pin the exact error string — different distros print slightly
// different things — but require some hint that the kernel saw a
// protocol-level failure rather than a generic "mount system call
// failed". Without the version filter, mount.nfs prints the latter
// alone; with it, the former.
lower := strings.ToLower(string(out))
if !strings.Contains(lower, "protocol") &&
!strings.Contains(lower, "version") &&
!strings.Contains(lower, "i/o") {
t.Errorf("v4 mount failure didn't mention protocol/version/io; output:\n%s", out)
}
// Also require a non-zero exit so a future change that makes mount(2)
// silently succeed (e.g. by relaxing the version filter) shows up
// here even if the message phrasing changes.
var ee *exec.ExitError
if !errors.As(err, &ee) {
t.Errorf("expected mount to exit non-zero with ExitError, got %v", err)
}
}

View File

@@ -48,7 +48,6 @@ var Commands = []*Command{
cmdVolume,
cmdWebDav,
cmdSftp,
cmdNfs,
cmdWorker,
}

View File

@@ -1,127 +0,0 @@
package command
import (
"fmt"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/security"
weed_server_nfs "github.com/seaweedfs/seaweedfs/weed/server/nfs"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/version"
)
var (
nfsStandaloneOptions NfsOptions
)
type NfsOptions struct {
filer *string
ipBind *string
port *int
filerRootPath *string
readOnly *bool
allowedClients *string
volumeServerAccess *string
portmapBind *string
}
func init() {
cmdNfs.Run = runNfs // break init cycle
nfsStandaloneOptions.filer = cmdNfs.Flag.String("filer", "localhost:8888", "filer server address")
nfsStandaloneOptions.ipBind = cmdNfs.Flag.String("ip.bind", "127.0.0.1", "ip address to bind to. Defaults to loopback; override explicitly to expose the experimental server to the network.")
nfsStandaloneOptions.port = cmdNfs.Flag.Int("port", 2049, "NFS server listen port")
nfsStandaloneOptions.filerRootPath = cmdNfs.Flag.String("filer.path", "", "remote path from filer server to export. Required: no default is provided so operators must opt in to exporting a namespace subtree.")
nfsStandaloneOptions.readOnly = cmdNfs.Flag.Bool("readOnly", false, "export the filer path as read only")
nfsStandaloneOptions.allowedClients = cmdNfs.Flag.String("allowedClients", "", "comma-separated client IPs, hostnames, or CIDRs allowed to connect")
nfsStandaloneOptions.volumeServerAccess = cmdNfs.Flag.String("volumeServerAccess", "direct", "access volume servers by [direct|publicUrl|filerProxy]")
nfsStandaloneOptions.portmapBind = cmdNfs.Flag.String("portmap.bind", "", "when set, bind a built-in portmap v2 responder on <ip>:111 so plain `mount -t nfs` works without client-side portmap bypass. Empty disables it. Binding port 111 requires root or CAP_NET_BIND_SERVICE and must not conflict with a system rpcbind.")
}
var cmdNfs = &Command{
UsageLine: "nfs -port=2049 -filer=<ip:port> -filer.path=<exported subtree>",
Short: "start an experimental NFSv3 server backed by a filer",
Long: `start an experimental NFSv3 server backed by a filer.
This command serves an experimental filer-native NFSv3 frontend with
deterministic filehandles, filer-backed metadata operations, and direct
volume-server data access for chunk reads and buffered writes.
Safer defaults (since export ACLs are still not implemented):
- ip.bind defaults to 127.0.0.1, so the server is not reachable from
other hosts unless you override it explicitly.
- filer.path has no default; you must pick the subtree to export.
Override -ip.bind to a routable address only after you have reviewed
-allowedClients and the readiness of the rest of your deployment.
Mounting from a Linux client
----------------------------
The server does not run portmap/rpcbind by default. That means Linux
mount.nfs, which queries portmap on port 111 first, will fail with
"portmap query failed" against the plain form:
mount -t nfs -o nfsvers=3,nolock <host>:/export /mnt
Either tell the client to bypass portmap:
mount -t nfs -o nfsvers=3,nolock,port=2049,mountport=2049,\
proto=tcp,mountproto=tcp <host>:/export /mnt
or enable the built-in portmap responder on the server:
weed nfs ... -portmap.bind=0.0.0.0
With the responder enabled MOUNT v3 is answered over both TCP and UDP,
so the plain mount form above just works — no mountproto override is
required even on clients whose default mountproto is UDP.
Binding port 111 requires root or CAP_NET_BIND_SERVICE and must not
collide with a system rpcbind.
`,
}
func runNfs(cmd *Command, args []string) bool {
util.LoadSecurityConfiguration()
if *nfsStandaloneOptions.ipBind == "" {
*nfsStandaloneOptions.ipBind = "127.0.0.1"
}
if *nfsStandaloneOptions.filerRootPath == "" {
glog.Errorf("-filer.path is required: pick an explicit subtree to export; exporting \"/\" is not a default")
return false
}
if *nfsStandaloneOptions.filerRootPath == "/" {
glog.Warningf("-filer.path=/ exports the entire filer namespace; ensure -allowedClients or -ip.bind constrains access")
}
listenAddress := fmt.Sprintf("%s:%d", *nfsStandaloneOptions.ipBind, *nfsStandaloneOptions.port)
glog.V(0).Infof("Starting Seaweed NFS Server %s at %s", version.Version(), listenAddress)
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
nfsServer, err := weed_server_nfs.NewServer(&weed_server_nfs.Option{
Filer: pb.ServerAddress(*nfsStandaloneOptions.filer),
BindIp: *nfsStandaloneOptions.ipBind,
Port: *nfsStandaloneOptions.port,
FilerRootPath: *nfsStandaloneOptions.filerRootPath,
ReadOnly: *nfsStandaloneOptions.readOnly,
AllowedClients: util.StringSplit(*nfsStandaloneOptions.allowedClients, ","),
VolumeServerAccess: *nfsStandaloneOptions.volumeServerAccess,
GrpcDialOption: grpcDialOption,
PortmapBind: *nfsStandaloneOptions.portmapBind,
})
if err != nil {
glog.Errorf("NFS Server startup error: %v", err)
return false
}
if err := nfsServer.Start(); err != nil {
glog.Errorf("NFS Server startup error: %v", err)
return false
}
return true
}

View File

@@ -13,7 +13,6 @@ import (
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/filer/empty_folder_cleanup"
"github.com/seaweedfs/seaweedfs/weed/sequence"
"github.com/seaweedfs/seaweedfs/weed/cluster"
"github.com/seaweedfs/seaweedfs/weed/pb"
@@ -64,7 +63,6 @@ type Filer struct {
DeletionRetryQueue *DeletionRetryQueue
EmptyFolderCleaner *empty_folder_cleanup.EmptyFolderCleaner
EmptyFolderCleanupDelay time.Duration
inodeSequencer sequence.Sequencer
}
func NewFiler(masters pb.ServerDiscovery, grpcDialOption grpc.DialOption, filerHost pb.ServerAddress, filerGroup string, collection string, replication string, dataCenter string, maxFilenameLength uint32, notifyFn func()) *Filer {
@@ -79,7 +77,6 @@ func NewFiler(masters pb.ServerDiscovery, grpcDialOption grpc.DialOption, filerH
MaxFilenameLength: maxFilenameLength,
deletionQuit: make(chan struct{}),
DeletionRetryQueue: NewDeletionRetryQueue(),
inodeSequencer: newInodeSequencer(filerHost),
}
if f.UniqueFilerId < 0 {
f.UniqueFilerId = -f.UniqueFilerId

View File

@@ -1,51 +1,26 @@
package filer
import (
"os"
"strconv"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/sequence"
"github.com/seaweedfs/seaweedfs/weed/util"
)
// newInodeSequencer constructs the inode sequencer used to assign object
// identity for filer entries. The Snowflake node id defaults to a masked hash
// of filerHost, which only has 1024 possible values; operators running a
// multi-filer cluster should set SEAWEEDFS_FILER_SNOWFLAKE_ID to an explicit
// per-filer value (1..1023) to avoid birthday-paradox collisions.
//
// Initialization failures are fatal: a process-local fallback allocator would
// re-use inode values across restarts and violate the stable object identity
// guarantee that NFS filehandles and the inode secondary index rely on.
func newInodeSequencer(filerHost pb.ServerAddress) sequence.Sequencer {
snowflakeId := parseSnowflakeIdFromEnv()
seq, err := sequence.NewSnowflakeSequencer(string(filerHost), snowflakeId)
if err != nil {
glog.Fatalf("initialize inode sequencer for filer %s (snowflakeId=%d): %v", filerHost, snowflakeId, err)
}
return seq
}
func parseSnowflakeIdFromEnv() int {
raw := os.Getenv("SEAWEEDFS_FILER_SNOWFLAKE_ID")
if raw == "" {
return 0
}
id, err := strconv.Atoi(raw)
if err != nil || id < 0 || id > 0x3ff {
glog.Fatalf("SEAWEEDFS_FILER_SNOWFLAKE_ID must be an integer in [0,1023], got %q", raw)
}
return id
}
// ensureEntryInode derives a stable inode the same way the FUSE mount does, so
// the filer-stored value matches what a mount would otherwise compute and no
// per-object reverse index is required. Hard links hash their shared
// HardLinkId, so every link resolves to one inode; other entries hash the path
// and creation time.
func (f *Filer) ensureEntryInode(entry *Entry) {
if entry == nil || entry.Attr.Inode != 0 {
return
}
entry.Attr.Inode = f.nextInode()
}
func (f *Filer) nextInode() uint64 {
return f.inodeSequencer.NextFileId(1)
if entry.Attr.Crtime.IsZero() {
entry.Attr.Crtime = time.Now()
}
if len(entry.HardLinkId) > 0 {
entry.Attr.Inode = uint64(util.HashStringToLong(string(entry.HardLinkId)))
return
}
entry.Attr.Inode = entry.FullPath.AsInode(entry.Attr.Crtime.Unix())
}

View File

@@ -1,300 +0,0 @@
package filer
import (
"context"
"encoding/binary"
"encoding/json"
"sort"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util"
)
const inodeIndexKeyPrefix = "filer.inode.path."
const InodeIndexInitialGeneration uint64 = 1
type inodeIndexEntry struct {
path util.FullPath
inode uint64
}
type InodeIndexRecord struct {
Generation uint64 `json:"generation,omitempty"`
Paths []string `json:"paths,omitempty"`
}
func InodeIndexKey(inode uint64) []byte {
key := make([]byte, len(inodeIndexKeyPrefix)+8)
copy(key, inodeIndexKeyPrefix)
binary.BigEndian.PutUint64(key[len(inodeIndexKeyPrefix):], inode)
return key
}
func DecodeInodeIndexRecord(value []byte) (*InodeIndexRecord, error) {
if len(value) == 0 {
return &InodeIndexRecord{}, nil
}
// The first foundation slice stored the current path as raw bytes. Keep that
// format readable so existing records are transparently upgraded on write.
if value[0] != '{' {
record := &InodeIndexRecord{Generation: InodeIndexInitialGeneration}
record.addPath(util.FullPath(value))
return record, nil
}
record := &InodeIndexRecord{}
if err := json.Unmarshal(value, record); err != nil {
return nil, err
}
record.normalize()
return record, nil
}
func (record *InodeIndexRecord) Encode() ([]byte, error) {
record.normalize()
return json.Marshal(record)
}
func (record *InodeIndexRecord) normalize() {
if len(record.Paths) == 0 {
return
}
if record.Generation == 0 {
record.Generation = InodeIndexInitialGeneration
}
sanitized := make([]string, 0, len(record.Paths))
for _, path := range record.Paths {
if path == "" {
continue
}
sanitized = append(sanitized, path)
}
if len(sanitized) == 0 {
record.Paths = nil
return
}
sort.Strings(sanitized)
deduped := sanitized[:1]
for _, path := range sanitized[1:] {
if path == deduped[len(deduped)-1] {
continue
}
deduped = append(deduped, path)
}
record.Paths = deduped
}
func (record *InodeIndexRecord) addPath(path util.FullPath) bool {
if path == "" {
return false
}
record.normalize()
target := string(path)
index := sort.SearchStrings(record.Paths, target)
if index < len(record.Paths) && record.Paths[index] == target {
return false
}
record.Paths = append(record.Paths, "")
copy(record.Paths[index+1:], record.Paths[index:])
record.Paths[index] = target
return true
}
func (record *InodeIndexRecord) removePath(path util.FullPath) bool {
if len(record.Paths) == 0 || path == "" {
return false
}
record.normalize()
target := string(path)
index := sort.SearchStrings(record.Paths, target)
if index >= len(record.Paths) || record.Paths[index] != target {
return false
}
record.Paths = append(record.Paths[:index], record.Paths[index+1:]...)
if len(record.Paths) == 0 {
record.Paths = nil
}
return true
}
func (record *InodeIndexRecord) CanonicalPath() util.FullPath {
record.normalize()
if len(record.Paths) == 0 {
return ""
}
return util.FullPath(record.Paths[0])
}
func (record *InodeIndexRecord) FullPaths() []util.FullPath {
record.normalize()
if len(record.Paths) == 0 {
return nil
}
paths := make([]util.FullPath, 0, len(record.Paths))
for _, path := range record.Paths {
paths = append(paths, util.FullPath(path))
}
return paths
}
func (fsw *FilerStoreWrapper) lookupInodeIndex(ctx context.Context, inode uint64) (*InodeIndexRecord, error) {
if inode == 0 {
return nil, ErrKvNotFound
}
value, err := fsw.KvGet(ctx, InodeIndexKey(inode))
if err != nil {
return nil, err
}
return DecodeInodeIndexRecord(value)
}
func (fsw *FilerStoreWrapper) storeInodeIndex(ctx context.Context, path util.FullPath, inode uint64) error {
if inode == 0 || path == "" {
return nil
}
record, err := fsw.lookupInodeIndex(ctx, inode)
if err != nil {
if err != ErrKvNotFound {
return err
}
record = &InodeIndexRecord{Generation: InodeIndexInitialGeneration}
}
record.addPath(path)
value, err := record.Encode()
if err != nil {
return err
}
return fsw.KvPut(ctx, InodeIndexKey(inode), value)
}
func (fsw *FilerStoreWrapper) lookupInodePath(ctx context.Context, inode uint64) (util.FullPath, error) {
record, err := fsw.lookupInodeIndex(ctx, inode)
if err != nil {
return "", err
}
path := record.CanonicalPath()
if path == "" {
return "", ErrKvNotFound
}
return path, nil
}
func (fsw *FilerStoreWrapper) lookupInodePaths(ctx context.Context, inode uint64) ([]util.FullPath, error) {
record, err := fsw.lookupInodeIndex(ctx, inode)
if err != nil {
return nil, err
}
paths := record.FullPaths()
if len(paths) == 0 {
return nil, ErrKvNotFound
}
return paths, nil
}
func (fsw *FilerStoreWrapper) removePathFromInodeIndex(ctx context.Context, path util.FullPath, inode uint64) error {
if inode == 0 || path == "" {
return nil
}
record, err := fsw.lookupInodeIndex(ctx, inode)
if err != nil {
if err == ErrKvNotFound {
return nil
}
return err
}
if !record.removePath(path) {
return nil
}
if len(record.Paths) == 0 {
return fsw.KvDelete(ctx, InodeIndexKey(inode))
}
value, err := record.Encode()
if err != nil {
return err
}
return fsw.KvPut(ctx, InodeIndexKey(inode), value)
}
func (fsw *FilerStoreWrapper) collectInodeIndexEntries(ctx context.Context, dirPath util.FullPath) ([]inodeIndexEntry, error) {
// Honor caller cancellation during the walk: a DeleteFolderChildren on a
// pathological directory could otherwise loop indefinitely gathering
// entries even after the client has given up, turning into a DoS vector.
// If the walk is aborted, the caller treats the index cleanup as
// best-effort and drops the partial result.
var collected []inodeIndexEntry
if err := fsw.collectInodeIndexEntriesRecursive(ctx, dirPath, &collected); err != nil {
return nil, err
}
return collected, nil
}
func (fsw *FilerStoreWrapper) collectInodeIndexEntriesRecursive(ctx context.Context, dirPath util.FullPath, collected *[]inodeIndexEntry) error {
actualStore := fsw.getActualStore(dirPath + "/")
lastFileName := ""
includeStartFile := false
for {
page := make([]*Entry, 0, PaginationSize)
nextLastFileName, err := actualStore.ListDirectoryEntries(ctx, dirPath, lastFileName, includeStartFile, PaginationSize, func(entry *Entry) (bool, error) {
page = append(page, entry)
return true, nil
})
if err != nil {
return err
}
for _, entry := range page {
if entry.Attr.Inode != 0 {
*collected = append(*collected, inodeIndexEntry{path: entry.FullPath, inode: entry.Attr.Inode})
}
if entry.IsDirectory() {
if err := fsw.collectInodeIndexEntriesRecursive(ctx, entry.FullPath, collected); err != nil {
return err
}
}
}
if len(page) < PaginationSize {
return nil
}
lastFileName = nextLastFileName
includeStartFile = false
}
}
// recordInodeIndexWrite updates the inode→path secondary index after the
// primary store mutation has already succeeded. The index is best-effort: a
// failure here must not surface as an operation error, because the caller
// would then observe a failed create/update even though the entry was
// persisted, and a retry cannot heal the index (DeleteEntry exits early once
// the entry is gone). We log and let later writes rebuild the record.
func (fsw *FilerStoreWrapper) recordInodeIndexWrite(ctx context.Context, op string, path util.FullPath, inode uint64) {
if inode == 0 || path == "" {
return
}
if err := fsw.storeInodeIndex(ctx, path, inode); err != nil {
glog.WarningfCtx(ctx, "%s: update inode index for %s (inode %d): %v", op, path, inode, err)
}
}
// recordInodeIndexRemoval mirrors recordInodeIndexWrite for removals.
func (fsw *FilerStoreWrapper) recordInodeIndexRemoval(ctx context.Context, op string, path util.FullPath, inode uint64) {
if inode == 0 || path == "" {
return
}
if err := fsw.removePathFromInodeIndex(ctx, path, inode); err != nil {
glog.WarningfCtx(ctx, "%s: clear inode index for %s (inode %d): %v", op, path, inode, err)
}
}

View File

@@ -1,206 +0,0 @@
package filer
import (
"context"
"os"
"testing"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFilerStoreWrapperMaintainsInodeIndexLifecycle(t *testing.T) {
wrapper := NewFilerStoreWrapper(newStubFilerStore())
ctx := context.Background()
created := &Entry{
FullPath: util.FullPath("/docs/report.txt"),
Attr: Attr{
Mode: 0o644,
Inode: 42,
},
}
require.NoError(t, wrapper.InsertEntry(ctx, created))
path, err := wrapper.lookupInodePath(ctx, created.Attr.Inode)
require.NoError(t, err)
assert.Equal(t, created.FullPath, path)
paths, err := wrapper.lookupInodePaths(ctx, created.Attr.Inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{created.FullPath}, paths)
record, err := wrapper.lookupInodeIndex(ctx, created.Attr.Inode)
require.NoError(t, err)
assert.Equal(t, InodeIndexInitialGeneration, record.Generation)
updated := &Entry{
FullPath: util.FullPath("/docs/report.txt"),
Attr: Attr{
Mode: 0o600,
Inode: 42,
},
}
require.NoError(t, wrapper.UpdateEntry(ctx, updated))
path, err = wrapper.lookupInodePath(ctx, updated.Attr.Inode)
require.NoError(t, err)
assert.Equal(t, updated.FullPath, path)
require.NoError(t, wrapper.DeleteEntry(ctx, created.FullPath))
_, err = wrapper.lookupInodePath(ctx, created.Attr.Inode)
require.ErrorIs(t, err, ErrKvNotFound)
}
func TestFilerStoreWrapperMaintainsMultiplePathsPerInode(t *testing.T) {
wrapper := NewFilerStoreWrapper(newStubFilerStore())
ctx := context.Background()
inode := uint64(88)
hardLinkId := NewHardLinkId()
require.NoError(t, wrapper.InsertEntry(ctx, &Entry{
FullPath: util.FullPath("/links/b.txt"),
Attr: Attr{
Mode: 0o644,
Inode: inode,
},
HardLinkId: hardLinkId,
HardLinkCounter: 2,
}))
require.NoError(t, wrapper.InsertEntry(ctx, &Entry{
FullPath: util.FullPath("/links/a.txt"),
Attr: Attr{
Mode: 0o644,
Inode: inode,
},
HardLinkId: hardLinkId,
HardLinkCounter: 2,
}))
paths, err := wrapper.lookupInodePaths(ctx, inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{"/links/a.txt", "/links/b.txt"}, paths)
record, err := wrapper.lookupInodeIndex(ctx, inode)
require.NoError(t, err)
assert.Equal(t, InodeIndexInitialGeneration, record.Generation)
path, err := wrapper.lookupInodePath(ctx, inode)
require.NoError(t, err)
assert.Equal(t, util.FullPath("/links/a.txt"), path)
require.NoError(t, wrapper.DeleteEntry(ctx, util.FullPath("/links/a.txt")))
paths, err = wrapper.lookupInodePaths(ctx, inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{"/links/b.txt"}, paths)
path, err = wrapper.lookupInodePath(ctx, inode)
require.NoError(t, err)
assert.Equal(t, util.FullPath("/links/b.txt"), path)
}
func TestFilerStoreWrapperUpgradesLegacySinglePathInodeIndexRecords(t *testing.T) {
wrapper := NewFilerStoreWrapper(newStubFilerStore())
ctx := context.Background()
inode := uint64(91)
require.NoError(t, wrapper.KvPut(ctx, InodeIndexKey(inode), []byte("/legacy/path.txt")))
path, err := wrapper.lookupInodePath(ctx, inode)
require.NoError(t, err)
assert.Equal(t, util.FullPath("/legacy/path.txt"), path)
paths, err := wrapper.lookupInodePaths(ctx, inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{"/legacy/path.txt"}, paths)
require.NoError(t, wrapper.storeInodeIndex(ctx, util.FullPath("/legacy/second.txt"), inode))
paths, err = wrapper.lookupInodePaths(ctx, inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{"/legacy/path.txt", "/legacy/second.txt"}, paths)
value, err := wrapper.KvGet(ctx, InodeIndexKey(inode))
require.NoError(t, err)
assert.JSONEq(t, `{"generation":1,"paths":["/legacy/path.txt","/legacy/second.txt"]}`, string(value))
}
func TestFilerStoreWrapperKeepsInodeIndexWhenDeleteArrivesAfterRenameInsert(t *testing.T) {
wrapper := NewFilerStoreWrapper(newStubFilerStore())
ctx := context.Background()
inode := uint64(77)
require.NoError(t, wrapper.InsertEntry(ctx, &Entry{
FullPath: util.FullPath("/old/name.txt"),
Attr: Attr{
Mode: 0o644,
Inode: inode,
},
}))
require.NoError(t, wrapper.InsertEntry(ctx, &Entry{
FullPath: util.FullPath("/new/name.txt"),
Attr: Attr{
Mode: 0o644,
Inode: inode,
},
}))
require.NoError(t, wrapper.DeleteEntry(ctx, util.FullPath("/old/name.txt")))
path, err := wrapper.lookupInodePath(ctx, inode)
require.NoError(t, err)
assert.Equal(t, util.FullPath("/new/name.txt"), path)
paths, err := wrapper.lookupInodePaths(ctx, inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{"/new/name.txt"}, paths)
}
func TestRecursiveDeleteRemovesDescendantInodeIndexes(t *testing.T) {
f, store := newTestFilerWithStubStore()
ctx := context.Background()
entries := []*Entry{
{
FullPath: util.FullPath("/tree"),
Attr: Attr{
Mode: os.ModeDir | 0o755,
Inode: 100,
},
},
{
FullPath: util.FullPath("/tree/file.txt"),
Attr: Attr{
Mode: 0o644,
Inode: 101,
},
},
{
FullPath: util.FullPath("/tree/subdir"),
Attr: Attr{
Mode: os.ModeDir | 0o755,
Inode: 102,
},
},
{
FullPath: util.FullPath("/tree/subdir/nested.txt"),
Attr: Attr{
Mode: 0o644,
Inode: 103,
},
},
}
for _, entry := range entries {
require.NoError(t, f.Store.InsertEntry(ctx, entry))
}
require.NoError(t, f.DeleteEntryMetaAndData(ctx, util.FullPath("/tree"), true, false, false, false, nil, 0))
for _, inode := range []uint64{100, 101, 102, 103} {
_, err := f.Store.(*FilerStoreWrapper).lookupInodePath(ctx, inode)
require.ErrorIs(t, err, ErrKvNotFound)
}
for _, path := range []string{"/tree", "/tree/file.txt", "/tree/subdir", "/tree/subdir/nested.txt"} {
_, err := store.FindEntry(ctx, util.FullPath(path))
require.Error(t, err)
}
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"os"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/util"
@@ -11,6 +12,47 @@ import (
"github.com/stretchr/testify/require"
)
func TestEnsureEntryInodeMatchesFuseDerivation(t *testing.T) {
f := &Filer{}
crtime := time.Unix(1700000000, 0)
entry := &Entry{
FullPath: util.FullPath("/dir/file.txt"),
Attr: Attr{Crtime: crtime},
}
f.ensureEntryInode(entry)
// The filer stores exactly what the FUSE mount would compute for a
// non-hard-linked entry, and it is deterministic across calls.
assert.Equal(t, entry.FullPath.AsInode(crtime.Unix()), entry.Attr.Inode)
again := &Entry{FullPath: entry.FullPath, Attr: Attr{Crtime: crtime}}
f.ensureEntryInode(again)
assert.Equal(t, entry.Attr.Inode, again.Attr.Inode)
}
func TestEnsureEntryInodeSharesAcrossHardLinks(t *testing.T) {
f := &Filer{}
hardLinkId := NewHardLinkId()
a := &Entry{
FullPath: util.FullPath("/links/a.txt"),
Attr: Attr{Crtime: time.Unix(1700000000, 0)},
HardLinkId: hardLinkId,
}
b := &Entry{
FullPath: util.FullPath("/links/b.txt"),
Attr: Attr{Crtime: time.Unix(1800000000, 0)},
HardLinkId: hardLinkId,
}
f.ensureEntryInode(a)
f.ensureEntryInode(b)
// Every link to the same target resolves to one inode, independent of path
// or creation time.
assert.Equal(t, uint64(util.HashStringToLong(string(hardLinkId))), a.Attr.Inode)
assert.Equal(t, a.Attr.Inode, b.Attr.Inode)
}
func newTestFilerWithStubStore() (*Filer, *stubFilerStore) {
store := newStubFilerStore()
f := NewFiler(pb.ServerDiscovery{}, nil, "", "", "", "", "", 255, nil)

View File

@@ -276,7 +276,6 @@ func newTestFiler(t *testing.T, store *stubFilerStore, rs *FilerRemoteStorage) *
FilerConf: NewFilerConf(),
MaxFilenameLength: 255,
MasterClient: mc,
inodeSequencer: newInodeSequencer("test-filer"),
fileIdDeletionQueue: util.NewUnboundedQueue(),
deletionQuit: make(chan struct{}),
LocalMetaLogBuffer: log_buffer.NewLogBuffer("test", time.Minute,

View File

@@ -132,7 +132,6 @@ func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) err
return err
}
ctx = context.WithoutCancel(ctx)
fullPath := entry.FullPath
actualStore := fsw.getActualStore(entry.FullPath)
stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "insert").Inc()
start := time.Now()
@@ -152,11 +151,7 @@ func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) err
return err
}
if err := actualStore.InsertEntry(ctx, entry); err != nil {
return err
}
fsw.recordInodeIndexWrite(ctx, "InsertEntry", fullPath, entry.Attr.Inode)
return nil
return actualStore.InsertEntry(ctx, entry)
}
// InsertEntryKnownAbsent skips the pre-insert FindEntry path when the caller has
@@ -166,7 +161,6 @@ func (fsw *FilerStoreWrapper) InsertEntryKnownAbsent(ctx context.Context, entry
return err
}
ctx = context.WithoutCancel(ctx)
fullPath := entry.FullPath
actualStore := fsw.getActualStore(entry.FullPath)
stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "insert").Inc()
start := time.Now()
@@ -185,11 +179,7 @@ func (fsw *FilerStoreWrapper) InsertEntryKnownAbsent(ctx context.Context, entry
}
}
if err := actualStore.InsertEntry(ctx, entry); err != nil {
return err
}
fsw.recordInodeIndexWrite(ctx, "InsertEntryKnownAbsent", fullPath, entry.Attr.Inode)
return nil
return actualStore.InsertEntry(ctx, entry)
}
func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) error {
@@ -197,7 +187,6 @@ func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) err
return err
}
ctx = context.WithoutCancel(ctx)
fullPath := entry.FullPath
actualStore := fsw.getActualStore(entry.FullPath)
stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "update").Inc()
start := time.Now()
@@ -217,11 +206,7 @@ func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) err
return err
}
if err := actualStore.UpdateEntry(ctx, entry); err != nil {
return err
}
fsw.recordInodeIndexWrite(ctx, "UpdateEntry", fullPath, entry.Attr.Inode)
return nil
return actualStore.UpdateEntry(ctx, entry)
}
func normalizeEntryMimeForStore(entry *Entry) {
@@ -273,8 +258,6 @@ func (fsw *FilerStoreWrapper) DeleteEntry(ctx context.Context, fp util.FullPath)
if findErr == filer_pb.ErrNotFound || existingEntry == nil {
return nil
}
inode := existingEntry.Attr.Inode
fullPath := existingEntry.FullPath
if len(existingEntry.HardLinkId) != 0 {
// remove hard link
op := ctx.Value("OP")
@@ -289,11 +272,7 @@ func (fsw *FilerStoreWrapper) DeleteEntry(ctx context.Context, fp util.FullPath)
}
}
if err := actualStore.DeleteEntry(ctx, fp); err != nil {
return err
}
fsw.recordInodeIndexRemoval(ctx, "DeleteEntry", fullPath, inode)
return nil
return actualStore.DeleteEntry(ctx, fp)
}
func (fsw *FilerStoreWrapper) DeleteOneEntry(ctx context.Context, existingEntry *Entry) (err error) {
@@ -301,8 +280,6 @@ func (fsw *FilerStoreWrapper) DeleteOneEntry(ctx context.Context, existingEntry
return err
}
ctx = context.WithoutCancel(ctx)
fullPath := existingEntry.FullPath
inode := existingEntry.Attr.Inode
actualStore := fsw.getActualStore(existingEntry.FullPath)
stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "delete").Inc()
start := time.Now()
@@ -325,11 +302,7 @@ func (fsw *FilerStoreWrapper) DeleteOneEntry(ctx context.Context, existingEntry
}
}
if err := actualStore.DeleteEntry(ctx, existingEntry.FullPath); err != nil {
return err
}
fsw.recordInodeIndexRemoval(ctx, "DeleteOneEntry", fullPath, inode)
return nil
return actualStore.DeleteEntry(ctx, existingEntry.FullPath)
}
func (fsw *FilerStoreWrapper) DeleteFolderChildren(ctx context.Context, fp util.FullPath) (err error) {
@@ -344,20 +317,7 @@ func (fsw *FilerStoreWrapper) DeleteFolderChildren(ctx context.Context, fp util.
stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "deleteFolderChildren").Observe(time.Since(start).Seconds())
}()
collected, err := fsw.collectInodeIndexEntries(ctx, fp)
if err != nil {
// Index collection is best-effort: a failure here only prevents inode
// index housekeeping, not the directory removal itself.
glog.WarningfCtx(ctx, "collectInodeIndexEntries %s: %v; deleting folder children without index cleanup", fp, err)
collected = nil
}
if err := actualStore.DeleteFolderChildren(ctx, fp); err != nil {
return err
}
for _, entry := range collected {
fsw.recordInodeIndexRemoval(ctx, "DeleteFolderChildren", entry.path, entry.inode)
}
return nil
return actualStore.DeleteFolderChildren(ctx, fp)
}
func (fsw *FilerStoreWrapper) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc ListEachEntryFunc) (string, error) {

View File

@@ -1,140 +0,0 @@
package nfs
import (
"fmt"
"net"
"strings"
"github.com/seaweedfs/seaweedfs/weed/glog"
)
type clientAuthorizer struct {
exact map[string]struct{}
cidrs map[string]*net.IPNet
enabled bool
}
func newClientAuthorizer(allowed []string) (*clientAuthorizer, error) {
authorizer := &clientAuthorizer{
exact: make(map[string]struct{}),
cidrs: make(map[string]*net.IPNet),
}
for _, raw := range allowed {
entry := strings.TrimSpace(raw)
if entry == "" {
continue
}
if strings.Contains(entry, "/") {
_, network, err := net.ParseCIDR(entry)
if err != nil {
return nil, fmt.Errorf("parse allowed NFS client %q: %w", entry, err)
}
authorizer.cidrs[entry] = network
authorizer.enabled = true
continue
}
if ip := normalizeClientIP(entry); ip != nil {
authorizer.exact[ip.String()] = struct{}{}
authorizer.enabled = true
continue
}
ips, err := net.LookupIP(entry)
if err != nil {
return nil, fmt.Errorf("resolve allowed NFS client %q: %w", entry, err)
}
if len(ips) == 0 {
return nil, fmt.Errorf("resolve allowed NFS client %q: no addresses", entry)
}
authorizer.exact[entry] = struct{}{}
for _, ip := range ips {
if ip == nil {
continue
}
authorizer.exact[ip.String()] = struct{}{}
}
authorizer.enabled = true
}
return authorizer, nil
}
func (a *clientAuthorizer) isAllowedConn(conn net.Conn) bool {
if conn == nil {
return true
}
return a.isAllowedAddr(conn.RemoteAddr())
}
func (a *clientAuthorizer) isAllowedAddr(addr net.Addr) bool {
if a == nil || !a.enabled {
return true
}
if addr == nil {
return false
}
host := remoteHost(addr.String())
if host == "" {
return false
}
if _, found := a.exact[host]; found {
return true
}
ip := normalizeClientIP(host)
if ip == nil {
return false
}
if _, found := a.exact[ip.String()]; found {
return true
}
for _, network := range a.cidrs {
if network.Contains(ip) {
return true
}
}
return false
}
func remoteHost(remote string) string {
host, _, err := net.SplitHostPort(strings.TrimSpace(remote))
if err == nil {
return host
}
host = strings.TrimSpace(remote)
if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") {
host = host[1 : len(host)-1]
}
return host
}
func normalizeClientIP(host string) net.IP {
host = strings.TrimSpace(host)
if zoneIndex := strings.LastIndex(host, "%"); zoneIndex >= 0 {
host = host[:zoneIndex]
}
return net.ParseIP(host)
}
type allowlistListener struct {
net.Listener
authorizer *clientAuthorizer
}
func (l *allowlistListener) Accept() (net.Conn, error) {
for {
conn, err := l.Listener.Accept()
if err != nil {
return nil, err
}
if l.authorizer == nil || l.authorizer.isAllowedConn(conn) {
return conn, nil
}
glog.V(0).Infof("reject unauthorized nfs client %s", conn.RemoteAddr())
_ = conn.Close()
}
}

View File

@@ -1,29 +0,0 @@
package nfs
import (
"net"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestClientAuthorizerResolvesHostnameEntries(t *testing.T) {
ips, err := net.LookupIP("localhost")
require.NoError(t, err)
require.NotEmpty(t, ips)
authorizer, err := newClientAuthorizer([]string{"localhost"})
require.NoError(t, err)
matched := false
for _, ip := range ips {
if authorizer.isAllowedAddr(&net.TCPAddr{IP: ip, Port: 2049}) {
matched = true
break
}
}
assert.True(t, matched)
assert.False(t, authorizer.isAllowedAddr(&net.TCPAddr{IP: net.ParseIP("192.0.2.10"), Port: 2049}))
}

View File

@@ -1,251 +0,0 @@
package nfs
import (
"context"
"encoding/binary"
"errors"
"fmt"
"hash/crc32"
"strings"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"google.golang.org/grpc"
)
const (
fileHandleVersion = 1
fileHandleLength = 28
)
var (
ErrInvalidHandle = errors.New("invalid nfs filehandle")
ErrHandleExportMismatch = errors.New("nfs filehandle export mismatch")
ErrStaleHandle = errors.New("stale nfs filehandle")
)
type FileHandleKind uint8
const (
FileHandleKindUnknown FileHandleKind = 0
FileHandleKindFile FileHandleKind = 1
FileHandleKindDirectory FileHandleKind = 2
)
type FileHandle struct {
Kind FileHandleKind
ExportID uint32
Inode uint64
Generation uint64
}
type filerResolverClient interface {
KvGet(ctx context.Context, in *filer_pb.KvGetRequest, opts ...grpc.CallOption) (*filer_pb.KvGetResponse, error)
LookupDirectoryEntry(ctx context.Context, in *filer_pb.LookupDirectoryEntryRequest, opts ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error)
}
type Resolver struct {
exportRoot util.FullPath
exportID uint32
client filerResolverClient
}
type ResolvedHandle struct {
Handle FileHandle
Path util.FullPath
Entry *filer_pb.Entry
}
func NewFileHandle(exportID uint32, kind FileHandleKind, inode, generation uint64) FileHandle {
if generation == 0 {
generation = filer.InodeIndexInitialGeneration
}
return FileHandle{
Kind: kind,
ExportID: exportID,
Inode: inode,
Generation: generation,
}
}
func (h FileHandle) Encode() []byte {
buf := make([]byte, fileHandleLength)
buf[0] = fileHandleVersion
buf[1] = byte(h.Kind)
binary.BigEndian.PutUint32(buf[4:8], h.ExportID)
binary.BigEndian.PutUint64(buf[8:16], h.Inode)
binary.BigEndian.PutUint64(buf[16:24], h.Generation)
binary.BigEndian.PutUint32(buf[24:28], crc32.ChecksumIEEE(buf[:24]))
return buf
}
func DecodeFileHandle(raw []byte) (FileHandle, error) {
if len(raw) != fileHandleLength {
return FileHandle{}, fmt.Errorf("%w: unexpected length %d", ErrInvalidHandle, len(raw))
}
if raw[0] != fileHandleVersion {
return FileHandle{}, fmt.Errorf("%w: unsupported version %d", ErrInvalidHandle, raw[0])
}
wantChecksum := binary.BigEndian.Uint32(raw[24:28])
gotChecksum := crc32.ChecksumIEEE(raw[:24])
if wantChecksum != gotChecksum {
return FileHandle{}, fmt.Errorf("%w: checksum mismatch", ErrInvalidHandle)
}
handle := FileHandle{
Kind: FileHandleKind(raw[1]),
ExportID: binary.BigEndian.Uint32(raw[4:8]),
Inode: binary.BigEndian.Uint64(raw[8:16]),
Generation: binary.BigEndian.Uint64(raw[16:24]),
}
if handle.Generation == 0 {
return FileHandle{}, fmt.Errorf("%w: empty generation", ErrInvalidHandle)
}
return handle, nil
}
func NewResolver(exportRoot util.FullPath, client filerResolverClient) *Resolver {
root := normalizeExportRoot(exportRoot)
return &Resolver{
exportRoot: root,
exportID: exportIDForRoot(root),
client: client,
}
}
func (r *Resolver) ExportID() uint32 {
if r == nil {
return 0
}
return r.exportID
}
func (r *Resolver) ResolveHandle(ctx context.Context, raw []byte) (*ResolvedHandle, error) {
if r == nil || r.client == nil {
return nil, errors.New("nfs resolver is not configured")
}
handle, err := DecodeFileHandle(raw)
if err != nil {
return nil, err
}
if handle.ExportID != r.exportID {
return nil, ErrHandleExportMismatch
}
if handle.Inode == 0 {
return r.resolveSyntheticRoot(ctx, handle)
}
kvResp, err := r.client.KvGet(ctx, &filer_pb.KvGetRequest{Key: filer.InodeIndexKey(handle.Inode)})
if err != nil {
return nil, err
}
if kvResp.GetError() != "" {
return nil, errors.New(kvResp.GetError())
}
if len(kvResp.GetValue()) == 0 {
return nil, ErrStaleHandle
}
record, err := filer.DecodeInodeIndexRecord(kvResp.GetValue())
if err != nil {
return nil, err
}
if record.Generation != handle.Generation {
return nil, ErrStaleHandle
}
for _, path := range record.FullPaths() {
if !pathVisibleFromExport(path, r.exportRoot) {
continue
}
dir, name := path.DirAndName()
lookupResp, lookupErr := r.client.LookupDirectoryEntry(ctx, &filer_pb.LookupDirectoryEntryRequest{
Directory: dir,
Name: name,
})
if isLookupNotFound(lookupErr) || lookupResp == nil || lookupResp.Entry == nil {
continue
}
if lookupErr != nil {
return nil, lookupErr
}
if attrs := lookupResp.Entry.Attributes; attrs != nil && attrs.Inode != 0 && attrs.Inode != handle.Inode {
continue
}
if handle.Kind == FileHandleKindDirectory && !lookupResp.Entry.IsDirectory {
continue
}
if handle.Kind == FileHandleKindFile && lookupResp.Entry.IsDirectory {
continue
}
return &ResolvedHandle{
Handle: handle,
Path: path,
Entry: lookupResp.Entry,
}, nil
}
return nil, ErrStaleHandle
}
func (r *Resolver) resolveSyntheticRoot(ctx context.Context, handle FileHandle) (*ResolvedHandle, error) {
if handle.Kind != FileHandleKindDirectory || handle.Generation != filer.InodeIndexInitialGeneration {
return nil, ErrStaleHandle
}
dir, name := r.exportRoot.DirAndName()
lookupResp, err := r.client.LookupDirectoryEntry(ctx, &filer_pb.LookupDirectoryEntryRequest{
Directory: dir,
Name: name,
})
if isLookupNotFound(err) {
return &ResolvedHandle{
Handle: handle,
Path: r.exportRoot,
Entry: syntheticRootEntry(),
}, nil
}
if err != nil {
return nil, err
}
if lookupResp == nil || lookupResp.Entry == nil {
return &ResolvedHandle{
Handle: handle,
Path: r.exportRoot,
Entry: syntheticRootEntry(),
}, nil
}
return &ResolvedHandle{
Handle: handle,
Path: r.exportRoot,
Entry: lookupResp.Entry,
}, nil
}
func normalizeExportRoot(root util.FullPath) util.FullPath {
if normalized := util.NormalizePath(string(root)); normalized != "" {
return normalized
}
return "/"
}
func exportIDForRoot(root util.FullPath) uint32 {
return crc32.ChecksumIEEE([]byte(normalizeExportRoot(root)))
}
func pathVisibleFromExport(path, exportRoot util.FullPath) bool {
return path == exportRoot || path.IsUnder(exportRoot)
}
func isLookupNotFound(err error) bool {
if err == nil {
return false
}
return err == filer_pb.ErrNotFound || strings.Contains(err.Error(), filer_pb.ErrNotFound.Error())
}

View File

@@ -1,182 +0,0 @@
package nfs
import (
"context"
"testing"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
)
type fakeResolverClient struct {
kv map[string][]byte
entries map[util.FullPath]*filer_pb.Entry
}
func (f *fakeResolverClient) KvGet(_ context.Context, in *filer_pb.KvGetRequest, _ ...grpc.CallOption) (*filer_pb.KvGetResponse, error) {
if value, found := f.kv[string(in.Key)]; found {
return &filer_pb.KvGetResponse{Value: value}, nil
}
return &filer_pb.KvGetResponse{}, nil
}
func (f *fakeResolverClient) LookupDirectoryEntry(_ context.Context, in *filer_pb.LookupDirectoryEntryRequest, _ ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error) {
fullPath := util.NewFullPath(in.Directory, in.Name)
if entry, found := f.entries[fullPath]; found {
return &filer_pb.LookupDirectoryEntryResponse{Entry: entry}, nil
}
return nil, filer_pb.ErrNotFound
}
func TestFileHandleEncodeDecodeRoundTrip(t *testing.T) {
handle := NewFileHandle(1234, FileHandleKindDirectory, 5678, 9)
raw := handle.Encode()
decoded, err := DecodeFileHandle(raw)
require.NoError(t, err)
assert.Equal(t, handle, decoded)
raw[len(raw)-1] ^= 0xff
_, err = DecodeFileHandle(raw)
require.ErrorIs(t, err, ErrInvalidHandle)
}
func TestResolverUsesPathVisibleFromExportRoot(t *testing.T) {
client := &fakeResolverClient{
kv: make(map[string][]byte),
entries: make(map[util.FullPath]*filer_pb.Entry),
}
resolver := NewResolver("/exports", client)
record := &filer.InodeIndexRecord{
Generation: 7,
Paths: []string{"/a/other.txt", "/exports/demo/link.txt"},
}
value, err := record.Encode()
require.NoError(t, err)
client.kv[string(filer.InodeIndexKey(101))] = value
client.entries["/exports/demo/link.txt"] = &filer_pb.Entry{
Name: "link.txt",
Attributes: &filer_pb.FuseAttributes{
Inode: 101,
},
}
handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 101, 7)
resolved, err := resolver.ResolveHandle(context.Background(), handle.Encode())
require.NoError(t, err)
assert.Equal(t, util.FullPath("/exports/demo/link.txt"), resolved.Path)
require.NotNil(t, resolved.Entry)
assert.Equal(t, uint64(101), resolved.Entry.Attributes.Inode)
}
func TestResolverRejectsGenerationMismatch(t *testing.T) {
client := &fakeResolverClient{
kv: make(map[string][]byte),
entries: make(map[util.FullPath]*filer_pb.Entry),
}
resolver := NewResolver("/", client)
record := &filer.InodeIndexRecord{
Generation: 3,
Paths: []string{"/data/file.txt"},
}
value, err := record.Encode()
require.NoError(t, err)
client.kv[string(filer.InodeIndexKey(44))] = value
client.entries["/data/file.txt"] = &filer_pb.Entry{
Name: "file.txt",
Attributes: &filer_pb.FuseAttributes{
Inode: 44,
},
}
handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 44, 4)
_, err = resolver.ResolveHandle(context.Background(), handle.Encode())
require.ErrorIs(t, err, ErrStaleHandle)
}
func TestResolverKeepsHandleValidAcrossRename(t *testing.T) {
client := &fakeResolverClient{
kv: make(map[string][]byte),
entries: make(map[util.FullPath]*filer_pb.Entry),
}
resolver := NewResolver("/exports", client)
record := &filer.InodeIndexRecord{
Generation: 5,
Paths: []string{"/exports/new-name.txt"},
}
value, err := record.Encode()
require.NoError(t, err)
client.kv[string(filer.InodeIndexKey(88))] = value
client.entries["/exports/new-name.txt"] = &filer_pb.Entry{
Name: "new-name.txt",
Attributes: &filer_pb.FuseAttributes{
Inode: 88,
},
}
handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 88, 5)
resolved, err := resolver.ResolveHandle(context.Background(), handle.Encode())
require.NoError(t, err)
assert.Equal(t, util.FullPath("/exports/new-name.txt"), resolved.Path)
require.NotNil(t, resolved.Entry)
assert.Equal(t, uint64(88), resolved.Entry.Attributes.Inode)
}
func TestResolverRejectsHandleAfterDeleteRecreateWithNewInode(t *testing.T) {
client := &fakeResolverClient{
kv: make(map[string][]byte),
entries: make(map[util.FullPath]*filer_pb.Entry),
}
resolver := NewResolver("/exports", client)
client.entries["/exports/file.txt"] = &filer_pb.Entry{
Name: "file.txt",
Attributes: &filer_pb.FuseAttributes{
Inode: 999,
},
}
record := &filer.InodeIndexRecord{
Generation: 4,
Paths: []string{"/exports/file.txt"},
}
value, err := record.Encode()
require.NoError(t, err)
client.kv[string(filer.InodeIndexKey(77))] = value
handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 77, 4)
_, err = resolver.ResolveHandle(context.Background(), handle.Encode())
require.ErrorIs(t, err, ErrStaleHandle)
}
func TestResolverSupportsSyntheticRootHandle(t *testing.T) {
client := &fakeResolverClient{
kv: make(map[string][]byte),
entries: make(map[util.FullPath]*filer_pb.Entry),
}
resolver := NewResolver("/", client)
handle := NewFileHandle(resolver.ExportID(), FileHandleKindDirectory, 0, filer.InodeIndexInitialGeneration)
resolved, err := resolver.ResolveHandle(context.Background(), handle.Encode())
require.NoError(t, err)
assert.Equal(t, util.FullPath("/"), resolved.Path)
require.NotNil(t, resolved.Entry)
assert.True(t, resolved.Entry.IsDirectory)
}
func TestNewServerNormalizesExportRootAndExportID(t *testing.T) {
server, err := NewServer(&Option{
FilerRootPath: "/export/path/",
Port: 2049,
})
require.NoError(t, err)
assert.Equal(t, util.FullPath("/export/path"), server.exportRoot)
assert.Equal(t, exportIDForRoot("/export/path"), server.exportID)
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,182 +0,0 @@
package nfs
import (
"context"
"net"
"os"
"strings"
billy "github.com/go-git/go-billy/v5"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
gonfs "github.com/willscott/go-nfs"
)
type Handler struct {
server *Server
rootFS *seaweedFileSystem
}
var _ gonfs.Handler = (*Handler)(nil)
func (h *Handler) Mount(ctx context.Context, conn net.Conn, req gonfs.MountRequest) (gonfs.MountStatus, billy.Filesystem, []gonfs.AuthFlavor) {
if h.server.clientAuthorizer != nil && !h.server.clientAuthorizer.isAllowedConn(conn) {
return gonfs.MountStatusErrAcces, nil, []gonfs.AuthFlavor{gonfs.AuthFlavorNull}
}
fs, status := h.resolveMountFilesystem(ctx, string(req.Dirpath))
if status != gonfs.MountStatusOk {
return status, nil, []gonfs.AuthFlavor{gonfs.AuthFlavorNull}
}
return gonfs.MountStatusOk, fs, []gonfs.AuthFlavor{gonfs.AuthFlavorNull, gonfs.AuthFlavorUnix}
}
// resolveMountFilesystem resolves the MOUNT3 dirpath to a filesystem:
// exact match serves the export root; a path strictly under the export
// is mounted at that subdirectory (NoEnt/NotDir if missing or not a
// directory); anything else falls back to the export root with an INFO
// log. The UDP MOUNT path mirrors this in mount_udp.go.
func (h *Handler) resolveMountFilesystem(ctx context.Context, requestedPath string) (*seaweedFileSystem, gonfs.MountStatus) {
requested := normalizeExportRoot(util.FullPath(requestedPath))
// Exact match and outside-export both fall back to the export root.
// Only the second case logs; the first is the boring common path.
if requested == h.server.exportRoot || !requested.IsUnder(h.server.exportRoot) {
if requested != h.server.exportRoot {
glog.V(0).Infof("nfs mount: client requested %q (outside export %q); serving configured export", requestedPath, h.server.exportRoot)
}
return h.rootFS, h.lstatExportStatus(ctx)
}
entry, err := h.lookupSubexportEntry(ctx, requested)
switch {
case err != nil && isLookupNotFound(err):
return nil, gonfs.MountStatusErrNoEnt
case err != nil:
glog.Errorf("nfs mount: lookup %q under export %q failed: %v", requested, h.server.exportRoot, err)
return nil, gonfs.MountStatusErrServerFault
case entry == nil:
return nil, gonfs.MountStatusErrNoEnt
case !entry.IsDirectory:
return nil, gonfs.MountStatusErrNotDir
}
glog.V(1).Infof("nfs mount: client requested %q under export %q; mounting at subdirectory", requestedPath, h.server.exportRoot)
return newSeaweedFileSystem(h.server, requested, h.server.sharedReaderCache), gonfs.MountStatusOk
}
func (h *Handler) lstatExportStatus(ctx context.Context) gonfs.MountStatus {
if _, err := h.rootFS.fileInfoForVirtualPath(ctx, "/"); err != nil {
if os.IsNotExist(err) {
return gonfs.MountStatusErrNoEnt
}
return gonfs.MountStatusErrServerFault
}
return gonfs.MountStatusOk
}
func (h *Handler) lookupSubexportEntry(ctx context.Context, p util.FullPath) (*filer_pb.Entry, error) {
var entry *filer_pb.Entry
err := h.server.withInternalClient(false, func(client nfsFilerClient) error {
dir, name := p.DirAndName()
resp, lerr := client.LookupDirectoryEntry(ctx, &filer_pb.LookupDirectoryEntryRequest{
Directory: dir,
Name: name,
})
if lerr != nil {
return lerr
}
if resp != nil {
entry = resp.Entry
}
return nil
})
return entry, err
}
func (h *Handler) Change(filesystem billy.Filesystem) billy.Change {
if h.server != nil && h.server.option != nil && h.server.option.ReadOnly {
return nil
}
if changer, ok := filesystem.(billy.Change); ok {
return changer
}
return nil
}
func (h *Handler) FSStat(ctx context.Context, _ billy.Filesystem, stat *gonfs.FSStat) error {
return h.server.withInternalClient(false, func(client nfsFilerClient) error {
resp, err := client.Statistics(ctx, &filer_pb.StatisticsRequest{})
if err != nil {
return err
}
if resp == nil {
return nil
}
stat.TotalSize = resp.TotalSize
if resp.TotalSize >= resp.UsedSize {
stat.FreeSize = resp.TotalSize - resp.UsedSize
stat.AvailableSize = resp.TotalSize - resp.UsedSize
}
stat.TotalFiles = resp.FileCount
return nil
})
}
func (h *Handler) ToHandle(filesystem billy.Filesystem, path []string) []byte {
fs, ok := filesystem.(*seaweedFileSystem)
if !ok {
fs = h.rootFS
}
info, err := fs.fileInfoForVirtualPath(context.Background(), fs.Join(path...))
if err != nil {
return nil
}
inode := info.entry.GetAttributes().GetInode()
if inode == 0 && info.actualPath == h.server.exportRoot && info.entry.IsDirectory {
return NewFileHandle(h.server.exportID, FileHandleKindDirectory, 0, filer.InodeIndexInitialGeneration).Encode()
}
return NewFileHandle(h.server.exportID, fileHandleKindForEntry(info.entry), inode, info.generation).Encode()
}
func (h *Handler) FromHandle(raw []byte) (billy.Filesystem, []string, error) {
var resolved *ResolvedHandle
err := h.server.withInternalClient(false, func(client nfsFilerClient) error {
var resolveErr error
resolved, resolveErr = NewResolver(h.server.exportRoot, client).ResolveHandle(context.Background(), raw)
return resolveErr
})
if err != nil {
return nil, nil, err
}
if resolved.Path == h.server.exportRoot {
return h.rootFS, nil, nil
}
if !pathVisibleFromExport(resolved.Path, h.server.exportRoot) {
return nil, nil, ErrHandleExportMismatch
}
relativePath := string(resolved.Path)
if h.server.exportRoot != "/" {
relativePath = strings.TrimPrefix(relativePath, string(h.server.exportRoot))
}
return h.rootFS, util.NormalizePath(relativePath).Split(), nil
}
func (h *Handler) InvalidateHandle(billy.Filesystem, []byte) error {
return nil
}
func (h *Handler) HandleLimit() int {
return h.server.handleLimit
}
func fileHandleKindForEntry(entry *filer_pb.Entry) FileHandleKind {
if entry != nil && entry.IsDirectory {
return FileHandleKindDirectory
}
return FileHandleKindFile
}

View File

@@ -1,880 +0,0 @@
package nfs
import (
"bytes"
"context"
"crypto/md5"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"math/rand"
"mime/multipart"
"net"
"net/http"
"net/http/httptest"
"path"
"strconv"
"strings"
"sync"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
"github.com/seaweedfs/seaweedfs/weed/wdclient"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
gonfs "github.com/willscott/go-nfs"
nfsclient "github.com/willscott/go-nfs-client/nfs"
"github.com/willscott/go-nfs-client/nfs/rpc"
"github.com/willscott/go-nfs-client/nfs/xdr"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
type fakeVolumeBlob struct {
data []byte
contentEncoding string
}
type fakeVolumeServer struct {
mu sync.Mutex
blobs map[string]fakeVolumeBlob
server *httptest.Server
}
type fakeVolumeControlPlane struct {
filer_pb.UnimplementedSeaweedFilerServer
mu sync.Mutex
host string
nextID int
assigns []*filer_pb.AssignVolumeRequest
lookups []*filer_pb.LookupVolumeRequest
}
var initIntegrationHTTPClient sync.Once
const nfsProc3Link = 15
func newFakeVolumeServer(t *testing.T) *fakeVolumeServer {
t.Helper()
fake := &fakeVolumeServer{
blobs: make(map[string]fakeVolumeBlob),
}
fake.server = httptest.NewServer(http.HandlerFunc(fake.serveHTTP))
t.Cleanup(fake.server.Close)
return fake
}
func (f *fakeVolumeServer) host() string {
return strings.TrimPrefix(f.server.URL, "http://")
}
func (f *fakeVolumeServer) serveHTTP(w http.ResponseWriter, r *http.Request) {
fileID := strings.TrimPrefix(r.URL.Path, "/")
if fileID == "" {
http.NotFound(w, r)
return
}
switch r.Method {
case http.MethodPost:
part, err := firstMultipartFile(r)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
defer part.Close()
data, err := io.ReadAll(part)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
contentEncoding := part.Header.Get("Content-Encoding")
sum := md5.Sum(data)
f.mu.Lock()
f.blobs[fileID] = fakeVolumeBlob{
data: bytes.Clone(data),
contentEncoding: contentEncoding,
}
f.mu.Unlock()
w.Header().Set("Content-MD5", base64.StdEncoding.EncodeToString(sum[:]))
w.Header().Set("ETag", `"`+base64.StdEncoding.EncodeToString(sum[:])+`"`)
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{
"name": path.Base(fileID),
"size": len(data),
})
case http.MethodGet:
f.mu.Lock()
blob, found := f.blobs[fileID]
f.mu.Unlock()
if !found {
http.NotFound(w, r)
return
}
if blob.contentEncoding != "" {
w.Header().Set("Content-Encoding", blob.contentEncoding)
}
http.ServeContent(w, r, fileID, time.Unix(0, 0), bytes.NewReader(blob.data))
default:
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
}
}
func firstMultipartFile(r *http.Request) (*multipart.Part, error) {
reader, err := r.MultipartReader()
if err != nil {
return nil, err
}
for {
part, err := reader.NextPart()
if err == io.EOF {
return nil, io.ErrUnexpectedEOF
}
if err != nil {
return nil, err
}
if part.FormName() == "file" {
return part, nil
}
part.Close()
}
}
func (f *fakeVolumeControlPlane) AssignVolume(_ context.Context, req *filer_pb.AssignVolumeRequest) (*filer_pb.AssignVolumeResponse, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.assigns = append(f.assigns, req)
f.nextID++
fileID := fmt.Sprintf("7,%08x", f.nextID)
return &filer_pb.AssignVolumeResponse{
FileId: fileID,
Count: 1,
Location: &filer_pb.Location{
Url: f.host,
},
}, nil
}
func (f *fakeVolumeControlPlane) LookupVolume(_ context.Context, req *filer_pb.LookupVolumeRequest) (*filer_pb.LookupVolumeResponse, error) {
f.mu.Lock()
f.lookups = append(f.lookups, req)
f.mu.Unlock()
locations := make(map[string]*filer_pb.Locations, len(req.GetVolumeIds()))
for _, volumeID := range req.GetVolumeIds() {
locations[volumeID] = &filer_pb.Locations{
Locations: []*filer_pb.Location{
{Url: f.host},
},
}
}
return &filer_pb.LookupVolumeResponse{LocationsMap: locations}, nil
}
func startFakeVolumeControlPlane(t *testing.T, controlPlane *fakeVolumeControlPlane) string {
t.Helper()
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
grpcServer := grpc.NewServer()
filer_pb.RegisterSeaweedFilerServer(grpcServer, controlPlane)
done := make(chan error, 1)
go func() {
done <- grpcServer.Serve(listener)
}()
t.Cleanup(func() {
grpcServer.Stop()
_ = listener.Close()
select {
case err := <-done:
if err != nil && !isClosedNetworkErr(err) {
t.Errorf("fake control plane exited with error: %v", err)
}
case <-time.After(time.Second):
t.Errorf("timed out waiting for fake control plane shutdown")
}
})
return listener.Addr().String()
}
func mountTestTarget(t *testing.T, server *Server) (*nfsclient.Target, func()) {
t.Helper()
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
handler, err := server.newHandler()
require.NoError(t, err)
done := make(chan error, 1)
go func() {
done <- gonfs.Serve(listener, handler)
}()
var client *rpc.Client
for attempt := 0; attempt < 10; attempt++ {
client, err = rpc.DialTCP(listener.Addr().Network(), listener.Addr().String(), false)
if err == nil {
break
}
if attempt == 9 {
require.NoError(t, err)
}
time.Sleep(10 * time.Millisecond)
}
require.NoError(t, err)
mounter := &nfsclient.Mount{Client: client}
target, err := mounter.Mount(string(server.exportRoot), rpc.AuthNull)
require.NoError(t, err)
cleanup := func() {
_ = mounter.Unmount()
client.Close()
_ = listener.Close()
select {
case err := <-done:
if err != nil && !isClosedNetworkErr(err) {
t.Errorf("nfs server exited with error: %v", err)
}
case <-time.After(time.Second):
t.Errorf("timed out waiting for nfs server shutdown")
}
}
return target, cleanup
}
func isClosedNetworkErr(err error) bool {
if err == nil {
return false
}
if strings.Contains(err.Error(), "use of closed network connection") {
return true
}
return strings.Contains(err.Error(), "listener closed")
}
func nfsLink(target *nfsclient.Target, sourceHandle []byte, linkPath string) error {
parentDir, linkName := path.Split(path.Clean(linkPath))
if linkName == "" {
return fmt.Errorf("invalid hard link path %q", linkPath)
}
if parentDir == "" {
parentDir = "/"
}
_, parentHandle, err := target.Lookup(parentDir)
if err != nil {
return err
}
// Field layout matches the go-nfs server's onLink handler
// (vendor: github.com/willscott/go-nfs/nfs_onlink.go), which reads
// DirOpArg + SetFileAttributes + opaque target handle. That wire
// order differs from RFC 1813 §3.3.15 LINK3args {nfs_fh3 file;
// diropargs3 link;} — the go-nfs library is not strictly compliant
// here, and we mirror its layout so the integration test exercises
// the same parser the server uses. Do not reorder fields to match
// the RFC: the test would then fail against a correctly-functioning
// server.
type LinkArgs struct {
rpc.Header
Link nfsclient.Diropargs3
Sattr nfsclient.Sattr3
Target []byte
}
res, err := target.Call(&LinkArgs{
Header: rpc.Header{
Rpcvers: 2,
Prog: nfsclient.Nfs3Prog,
Vers: nfsclient.Nfs3Vers,
Proc: nfsProc3Link,
Cred: rpc.AuthNull,
Verf: rpc.AuthNull,
},
Link: nfsclient.Diropargs3{
FH: parentHandle,
Filename: linkName,
},
Target: sourceHandle,
})
if err != nil {
return err
}
status, err := xdr.ReadUint32(res)
if err != nil {
return err
}
return nfsclient.NFS3Error(status)
}
func TestSeaweedNFSAcceptsAnyMountPathOverRPC(t *testing.T) {
const exportRoot = "/buckets/data"
client := &fakeNFSFilerClient{
entries: map[util.FullPath]*filer_pb.Entry{
"/buckets": testEntry("buckets", true, 100, uint32(0755), nil),
"/buckets/data": testEntry("data", true, 101, uint32(0755), nil),
},
kv: map[string][]byte{
string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"),
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"),
},
}
server := newTestServer(t, exportRoot, client)
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
handler, err := server.newHandler()
require.NoError(t, err)
serveDone := make(chan error, 1)
go func() {
serveDone <- gonfs.Serve(listener, handler)
}()
t.Cleanup(func() {
_ = listener.Close()
select {
case err := <-serveDone:
if err != nil && !isClosedNetworkErr(err) {
t.Errorf("nfs server exited with error: %v", err)
}
case <-time.After(time.Second):
t.Errorf("timed out waiting for nfs server shutdown")
}
})
dirpaths := []string{
"/",
"/buckets",
"/buckets/other",
"/wrong/path",
exportRoot,
exportRoot + "/",
}
for _, dirpath := range dirpaths {
t.Run(dirpath, func(t *testing.T) {
var rpcClient *rpc.Client
var dialErr error
for attempt := 0; attempt < 10; attempt++ {
rpcClient, dialErr = rpc.DialTCP(listener.Addr().Network(), listener.Addr().String(), false)
if dialErr == nil {
break
}
time.Sleep(10 * time.Millisecond)
}
require.NoError(t, dialErr)
defer rpcClient.Close()
mounter := &nfsclient.Mount{Client: rpcClient}
target, err := mounter.Mount(dirpath, rpc.AuthNull)
require.NoErrorf(t, err, "Mount(%q)", dirpath)
defer target.Close()
entries, err := target.ReadDirPlus("/")
require.NoError(t, err)
assert.Empty(t, entries, "Mount(%q) should land at the empty export root", dirpath)
})
}
}
func TestSeaweedNFSSubexportMountOverRPC(t *testing.T) {
const exportRoot = "/buckets"
client := &fakeNFSFilerClient{
entries: map[util.FullPath]*filer_pb.Entry{
"/buckets": testEntry("buckets", true, 100, uint32(0755), nil),
"/buckets/data": testEntry("data", true, 101, uint32(0755), nil),
"/buckets/data/inner": testEntry("inner", false, 104, uint32(0644), []byte("payload")),
"/buckets/other": testEntry("other", true, 105, uint32(0755), nil),
"/buckets/file.txt": testEntry("file.txt", false, 103, uint32(0644), []byte("hi")),
},
kv: map[string][]byte{
string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"),
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"),
string(filer.InodeIndexKey(103)): testIndexRecord(t, 103, 1, "/buckets/file.txt"),
string(filer.InodeIndexKey(104)): testIndexRecord(t, 104, 1, "/buckets/data/inner"),
string(filer.InodeIndexKey(105)): testIndexRecord(t, 105, 1, "/buckets/other"),
},
}
server := newTestServer(t, exportRoot, client)
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
handler, err := server.newHandler()
require.NoError(t, err)
serveDone := make(chan error, 1)
go func() {
serveDone <- gonfs.Serve(listener, handler)
}()
t.Cleanup(func() {
_ = listener.Close()
select {
case err := <-serveDone:
if err != nil && !isClosedNetworkErr(err) {
t.Errorf("nfs server exited with error: %v", err)
}
case <-time.After(time.Second):
t.Errorf("timed out waiting for nfs server shutdown")
}
})
dial := func(t *testing.T) *rpc.Client {
t.Helper()
var rpcClient *rpc.Client
var dialErr error
for attempt := 0; attempt < 10; attempt++ {
rpcClient, dialErr = rpc.DialTCP(listener.Addr().Network(), listener.Addr().String(), false)
if dialErr == nil {
break
}
time.Sleep(10 * time.Millisecond)
}
require.NoError(t, dialErr)
t.Cleanup(func() { rpcClient.Close() })
return rpcClient
}
t.Run("mounts_under_export_at_subdirectory", func(t *testing.T) {
mounter := &nfsclient.Mount{Client: dial(t)}
target, err := mounter.Mount("/buckets/data", rpc.AuthNull)
require.NoError(t, err)
defer target.Close()
entries, err := target.ReadDirPlus("/")
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, "inner", entries[0].Name())
readFile, err := target.Open("/inner")
require.NoError(t, err)
defer readFile.Close()
data, err := io.ReadAll(readFile)
require.NoError(t, err)
assert.Equal(t, []byte("payload"), data)
})
t.Run("missing_entry_under_export_rejects", func(t *testing.T) {
mounter := &nfsclient.Mount{Client: dial(t)}
_, err := mounter.Mount("/buckets/missing", rpc.AuthNull)
require.Error(t, err)
})
t.Run("regular_file_under_export_rejects", func(t *testing.T) {
mounter := &nfsclient.Mount{Client: dial(t)}
_, err := mounter.Mount("/buckets/file.txt", rpc.AuthNull)
require.Error(t, err)
})
}
func TestSeaweedNFSServesInlineRoundTripOverRPC(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
server := newTestServer(t, "/exports", client)
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
_, err := target.Mkdir("/docs", 0o755)
require.NoError(t, err)
file, err := target.OpenFile("/docs/note.txt", 0o644)
require.NoError(t, err)
payload := []byte("hello over rpc")
_, err = file.Write(payload)
require.NoError(t, err)
require.NoError(t, file.Close())
readFile, err := target.Open("/docs/note.txt")
require.NoError(t, err)
defer readFile.Close()
data, err := io.ReadAll(readFile)
require.NoError(t, err)
assert.Equal(t, payload, data)
entry := client.entries["/exports/docs/note.txt"]
require.NotNil(t, entry)
assert.Equal(t, payload, entry.Content)
assert.Empty(t, entry.Chunks)
_, beforeRenameHandle, err := target.Lookup("/docs/note.txt")
require.NoError(t, err)
entries, err := target.ReadDirPlus("/docs")
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, "note.txt", entries[0].Name())
require.NoError(t, target.Rename("/docs/note.txt", "/docs/final.txt"))
_, err = target.GetAttr(beforeRenameHandle)
require.NoError(t, err)
_, _, err = target.Lookup("/docs/final.txt")
require.NoError(t, err)
_, _, err = target.Lookup("/docs/note.txt")
require.Error(t, err)
require.NoError(t, target.Remove("/docs/final.txt"))
_, _, err = target.Lookup("/docs/final.txt")
require.Error(t, err)
}
func TestSeaweedNFSReadOnlyRejectsMutations(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
string(filer.InodeIndexKey(202)): testIndexRecord(t, 202, 3, "/exports/existing.txt"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
"/exports/existing.txt": testEntry("existing.txt", false, 202, uint32(0644), []byte("seed")),
},
}
server := newTestServer(t, "/exports", client)
server.option.ReadOnly = true
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
_, err := target.OpenFile("/created.txt", 0o644)
require.Error(t, err)
nfsErr, ok := err.(*nfsclient.Error)
require.True(t, ok)
assert.Equal(t, uint32(nfsclient.NFS3ErrROFS), nfsErr.ErrorNum)
file, err := target.Open("/existing.txt")
require.NoError(t, err)
_, err = file.Write([]byte("mutate"))
require.Error(t, err)
nfsErr, ok = err.(*nfsclient.Error)
require.True(t, ok)
assert.Equal(t, uint32(nfsclient.NFS3ErrROFS), nfsErr.ErrorNum)
_ = file.Close()
readFile, err := target.Open("/existing.txt")
require.NoError(t, err)
defer readFile.Close()
data, err := io.ReadAll(readFile)
require.NoError(t, err)
assert.Equal(t, []byte("seed"), data)
}
func TestSeaweedNFSServesSymlinkRoundTripOverRPC(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
server := newTestServer(t, "/exports", client)
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
file, err := target.OpenFile("/target.txt", 0o644)
require.NoError(t, err)
_, err = file.Write([]byte("payload"))
require.NoError(t, err)
require.NoError(t, file.Close())
require.NoError(t, target.Symlink("target.txt", "/target.link"))
info, _, err := target.Lookup("/target.link")
require.NoError(t, err)
attr, ok := info.(*nfsclient.Fattr)
require.True(t, ok)
assert.Equal(t, uint32(nfsclient.NF3Lnk), attr.Type)
linkFile, err := target.Open("/target.link")
require.NoError(t, err)
defer linkFile.Close()
linkTarget, err := linkFile.Readlink()
require.NoError(t, err)
assert.Equal(t, "target.txt", linkTarget)
entry := client.entries["/exports/target.link"]
require.NotNil(t, entry)
assert.Equal(t, "target.txt", entry.GetAttributes().GetSymlinkTarget())
}
func TestSeaweedNFSServesHardLinkRoundTripOverRPC(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
server := newTestServer(t, "/exports", client)
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
file, err := target.OpenFile("/source.txt", 0o644)
require.NoError(t, err)
payload := []byte("shared content")
_, err = file.Write(payload)
require.NoError(t, err)
require.NoError(t, file.Close())
_, sourceHandle, err := target.Lookup("/source.txt")
require.NoError(t, err)
require.NoError(t, nfsLink(target, sourceHandle, "/linked.txt"))
sourceInfo, sourceHandle, err := target.Lookup("/source.txt")
require.NoError(t, err)
linkedInfo, linkedHandle, err := target.Lookup("/linked.txt")
require.NoError(t, err)
sourceAttr, ok := sourceInfo.(*nfsclient.Fattr)
require.True(t, ok)
linkAttr, ok := linkedInfo.(*nfsclient.Fattr)
require.True(t, ok)
assert.Equal(t, sourceHandle, linkedHandle)
assert.Equal(t, sourceAttr.Fileid, linkAttr.Fileid)
assert.Equal(t, uint32(2), sourceAttr.Nlink)
assert.Equal(t, uint32(2), linkAttr.Nlink)
linkedFile, err := target.Open("/linked.txt")
require.NoError(t, err)
defer linkedFile.Close()
data, err := io.ReadAll(linkedFile)
require.NoError(t, err)
assert.Equal(t, payload, data)
sourceEntry := client.entries["/exports/source.txt"]
linkedEntry := client.entries["/exports/linked.txt"]
require.NotNil(t, sourceEntry)
require.NotNil(t, linkedEntry)
assert.Equal(t, sourceEntry.GetHardLinkId(), linkedEntry.GetHardLinkId())
assert.Equal(t, int32(2), sourceEntry.GetHardLinkCounter())
assert.Equal(t, int32(2), linkedEntry.GetHardLinkCounter())
require.NoError(t, target.Remove("/source.txt"))
remainingAttr, err := target.GetAttr(sourceHandle)
require.NoError(t, err)
assert.Equal(t, uint32(1), remainingAttr.Nlink)
_, _, err = target.Lookup("/source.txt")
require.Error(t, err)
linkedFile, err = target.Open("/linked.txt")
require.NoError(t, err)
data, err = io.ReadAll(linkedFile)
require.NoError(t, err)
require.NoError(t, linkedFile.Close())
assert.Equal(t, payload, data)
require.NoError(t, target.Remove("/linked.txt"))
_, err = target.GetAttr(linkedHandle)
require.Error(t, err)
nfsErr, ok := err.(*nfsclient.Error)
require.True(t, ok)
assert.Equal(t, uint32(nfsclient.NFS3ErrStale), nfsErr.ErrorNum)
}
func TestSeaweedNFSServesLargeChunkRoundTripOverRPC(t *testing.T) {
initIntegrationHTTPClient.Do(util_http.InitGlobalHttpClient)
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
volumeServer := newFakeVolumeServer(t)
controlPlane := &fakeVolumeControlPlane{host: volumeServer.host()}
controlPlaneAddr := startFakeVolumeControlPlane(t, controlPlane)
_, grpcPortString, err := net.SplitHostPort(controlPlaneAddr)
require.NoError(t, err)
grpcPort, err := strconv.Atoi(grpcPortString)
require.NoError(t, err)
server := newTestServer(t, "/exports", client)
server.option.Filer = pb.NewServerAddressWithGrpcPort(controlPlaneAddr, grpcPort)
server.option.GrpcDialOption = grpc.WithTransportCredentials(insecure.NewCredentials())
if server.filerClient != nil {
server.filerClient.Close()
}
server.filerClient = wdclient.NewFilerClient([]pb.ServerAddress{server.option.Filer}, server.option.GrpcDialOption, "")
server.withFilerClient = func(_ bool, fn func(filer_pb.SeaweedFilerClient) error) error {
conn, err := grpc.NewClient(controlPlaneAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
return err
}
defer conn.Close()
return fn(filer_pb.NewSeaweedFilerClient(conn))
}
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
payload := make([]byte, maxInlineWriteSize+4096)
_, err = rand.New(rand.NewSource(1)).Read(payload)
require.NoError(t, err)
file, err := target.OpenFile("/big.bin", 0o644)
require.NoError(t, err)
_, err = file.Write(payload)
require.NoError(t, err)
require.NoError(t, file.Close())
entry := client.entries["/exports/big.bin"]
require.NotNil(t, entry)
require.Len(t, entry.GetChunks(), 1)
assert.Nil(t, entry.Content)
assert.Equal(t, uint64(len(payload)), entry.GetAttributes().GetFileSize())
readFile, err := target.Open("/big.bin")
require.NoError(t, err)
defer readFile.Close()
data, err := io.ReadAll(readFile)
require.NoError(t, err)
assert.Equal(t, payload, data)
controlPlane.mu.Lock()
defer controlPlane.mu.Unlock()
require.Len(t, controlPlane.assigns, 1)
assert.Equal(t, "/exports/big.bin", controlPlane.assigns[0].GetPath())
assert.NotEmpty(t, controlPlane.lookups)
}
func TestSeaweedNFSRejectsStaleHandleAfterDeleteRecreate(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
server := newTestServer(t, "/exports", client)
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
file, err := target.OpenFile("/stale.txt", 0o644)
require.NoError(t, err)
_, err = file.Write([]byte("old"))
require.NoError(t, err)
require.NoError(t, file.Close())
_, oldHandle, err := target.Lookup("/stale.txt")
require.NoError(t, err)
require.NoError(t, target.Remove("/stale.txt"))
file, err = target.OpenFile("/stale.txt", 0o644)
require.NoError(t, err)
_, err = file.Write([]byte("new"))
require.NoError(t, err)
require.NoError(t, file.Close())
_, err = target.GetAttr(oldHandle)
require.Error(t, err)
nfsErr, ok := err.(*nfsclient.Error)
require.True(t, ok)
assert.Equal(t, uint32(nfsclient.NFS3ErrStale), nfsErr.ErrorNum)
_, newHandle, err := target.Lookup("/stale.txt")
require.NoError(t, err)
_, err = target.GetAttr(newHandle)
require.NoError(t, err)
}
func TestSeaweedNFSFileHandleSurvivesServerRestart(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
server := newTestServer(t, "/exports", client)
target, cleanup := mountTestTarget(t, server)
file, err := target.OpenFile("/restart.txt", 0o644)
require.NoError(t, err)
payload := []byte("survives restart")
_, err = file.Write(payload)
require.NoError(t, err)
require.NoError(t, file.Close())
_, handle, err := target.Lookup("/restart.txt")
require.NoError(t, err)
target.Close()
cleanup()
restartedServer := newTestServer(t, "/exports", client)
restartedTarget, restartedCleanup := mountTestTarget(t, restartedServer)
defer restartedCleanup()
defer restartedTarget.Close()
attr, err := restartedTarget.GetAttr(handle)
require.NoError(t, err)
assert.Equal(t, uint64(client.entries["/exports/restart.txt"].GetAttributes().GetInode()), attr.Fileid)
_, restartedHandle, err := restartedTarget.Lookup("/restart.txt")
require.NoError(t, err)
assert.Equal(t, handle, restartedHandle)
readFile, err := restartedTarget.Open("/restart.txt")
require.NoError(t, err)
defer readFile.Close()
data, err := io.ReadAll(readFile)
require.NoError(t, err)
assert.Equal(t, payload, data)
}

View File

@@ -1,88 +0,0 @@
package nfs
import (
"context"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"google.golang.org/grpc"
)
type filerClientExecutor func(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error
type internalClientExecutor func(streamingMode bool, fn func(nfsFilerClient) error) error
type nfsListEntriesClient interface {
Recv() (*filer_pb.ListEntriesResponse, error)
}
type nfsSubscribeMetadataClient interface {
Recv() (*filer_pb.SubscribeMetadataResponse, error)
}
type nfsFilerClient interface {
KvGet(ctx context.Context, in *filer_pb.KvGetRequest, opts ...grpc.CallOption) (*filer_pb.KvGetResponse, error)
LookupDirectoryEntry(ctx context.Context, in *filer_pb.LookupDirectoryEntryRequest, opts ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error)
ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (nfsListEntriesClient, error)
SubscribeMetadata(ctx context.Context, in *filer_pb.SubscribeMetadataRequest, opts ...grpc.CallOption) (nfsSubscribeMetadataClient, error)
CreateEntry(ctx context.Context, in *filer_pb.CreateEntryRequest, opts ...grpc.CallOption) (*filer_pb.CreateEntryResponse, error)
UpdateEntry(ctx context.Context, in *filer_pb.UpdateEntryRequest, opts ...grpc.CallOption) (*filer_pb.UpdateEntryResponse, error)
DeleteEntry(ctx context.Context, in *filer_pb.DeleteEntryRequest, opts ...grpc.CallOption) (*filer_pb.DeleteEntryResponse, error)
AtomicRenameEntry(ctx context.Context, in *filer_pb.AtomicRenameEntryRequest, opts ...grpc.CallOption) (*filer_pb.AtomicRenameEntryResponse, error)
Statistics(ctx context.Context, in *filer_pb.StatisticsRequest, opts ...grpc.CallOption) (*filer_pb.StatisticsResponse, error)
}
type grpcNFSFilerClient struct {
client filer_pb.SeaweedFilerClient
}
func (c grpcNFSFilerClient) KvGet(ctx context.Context, in *filer_pb.KvGetRequest, opts ...grpc.CallOption) (*filer_pb.KvGetResponse, error) {
return c.client.KvGet(ctx, in, opts...)
}
func (c grpcNFSFilerClient) LookupDirectoryEntry(ctx context.Context, in *filer_pb.LookupDirectoryEntryRequest, opts ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error) {
return c.client.LookupDirectoryEntry(ctx, in, opts...)
}
func (c grpcNFSFilerClient) ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (nfsListEntriesClient, error) {
return c.client.ListEntries(ctx, in, opts...)
}
func (c grpcNFSFilerClient) SubscribeMetadata(ctx context.Context, in *filer_pb.SubscribeMetadataRequest, opts ...grpc.CallOption) (nfsSubscribeMetadataClient, error) {
return c.client.SubscribeMetadata(ctx, in, opts...)
}
func (c grpcNFSFilerClient) CreateEntry(ctx context.Context, in *filer_pb.CreateEntryRequest, opts ...grpc.CallOption) (*filer_pb.CreateEntryResponse, error) {
return c.client.CreateEntry(ctx, in, opts...)
}
func (c grpcNFSFilerClient) UpdateEntry(ctx context.Context, in *filer_pb.UpdateEntryRequest, opts ...grpc.CallOption) (*filer_pb.UpdateEntryResponse, error) {
return c.client.UpdateEntry(ctx, in, opts...)
}
func (c grpcNFSFilerClient) DeleteEntry(ctx context.Context, in *filer_pb.DeleteEntryRequest, opts ...grpc.CallOption) (*filer_pb.DeleteEntryResponse, error) {
return c.client.DeleteEntry(ctx, in, opts...)
}
func (c grpcNFSFilerClient) AtomicRenameEntry(ctx context.Context, in *filer_pb.AtomicRenameEntryRequest, opts ...grpc.CallOption) (*filer_pb.AtomicRenameEntryResponse, error) {
return c.client.AtomicRenameEntry(ctx, in, opts...)
}
func (c grpcNFSFilerClient) Statistics(ctx context.Context, in *filer_pb.StatisticsRequest, opts ...grpc.CallOption) (*filer_pb.StatisticsResponse, error) {
return c.client.Statistics(ctx, in, opts...)
}
func newFilerClientExecutor(option *Option, signature int32) filerClientExecutor {
return func(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error {
return pb.WithGrpcClient(streamingMode, signature, func(grpcConnection *grpc.ClientConn) error {
return fn(filer_pb.NewSeaweedFilerClient(grpcConnection))
}, option.Filer.ToGrpcAddress(), false, option.GrpcDialOption)
}
}
func newInternalClientExecutor(option *Option, signature int32) internalClientExecutor {
return func(streamingMode bool, fn func(nfsFilerClient) error) error {
return pb.WithGrpcClient(streamingMode, signature, func(grpcConnection *grpc.ClientConn) error {
return fn(grpcNFSFilerClient{client: filer_pb.NewSeaweedFilerClient(grpcConnection)})
}, option.Filer.ToGrpcAddress(), false, option.GrpcDialOption)
}
}

View File

@@ -1,147 +0,0 @@
package nfs
import (
"context"
"errors"
"io"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
)
type chunkInvalidator interface {
UnCache(fileID string)
}
type metadataInvalidation struct {
path util.FullPath
entry *filer_pb.Entry
}
func (s *Server) runMetadataInvalidationLoop(ctx context.Context) {
if s == nil || s.chunkInvalidator == nil || s.withInternalClient == nil {
return
}
waitTime := time.Second
for ctx.Err() == nil {
err := s.followMetadataStream(ctx)
if err == nil || errors.Is(err, context.Canceled) || ctx.Err() != nil {
return
}
glog.V(0).Infof("retry nfs metadata invalidation stream for %s in %v: %v", s.exportRoot, waitTime, err)
timer := time.NewTimer(waitTime)
select {
case <-ctx.Done():
if !timer.Stop() {
<-timer.C
}
return
case <-timer.C:
}
if waitTime < util.RetryWaitTime {
waitTime += waitTime / 2
}
}
}
func (s *Server) followMetadataStream(ctx context.Context) error {
req := &filer_pb.SubscribeMetadataRequest{
ClientName: "nfs",
PathPrefix: string(s.exportRoot),
ClientId: s.signature,
ClientEpoch: 1,
ClientSupportsBatching: true,
}
return s.withInternalClient(true, func(client nfsFilerClient) error {
stream, err := client.SubscribeMetadata(ctx, req)
if err != nil {
return err
}
for {
resp, err := stream.Recv()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
s.applyMetadataInvalidationResponse(resp)
}
})
}
func (s *Server) applyMetadataInvalidationResponse(resp *filer_pb.SubscribeMetadataResponse) {
if s == nil || s.chunkInvalidator == nil || resp == nil {
return
}
uncached := make(map[string]struct{})
apply := func(event *filer_pb.SubscribeMetadataResponse) {
for _, invalidation := range metadataInvalidationsForEvent(event) {
if invalidation.entry == nil || !pathVisibleFromExport(invalidation.path, s.exportRoot) {
continue
}
for _, chunk := range invalidation.entry.GetChunks() {
fileID := chunk.GetFileIdString()
if fileID == "" {
continue
}
if _, seen := uncached[fileID]; seen {
continue
}
uncached[fileID] = struct{}{}
s.chunkInvalidator.UnCache(fileID)
}
}
}
apply(resp)
for _, event := range resp.Events {
apply(event)
}
}
func metadataInvalidationsForEvent(resp *filer_pb.SubscribeMetadataResponse) []metadataInvalidation {
message := resp.GetEventNotification()
if message == nil {
return nil
}
var invalidations []metadataInvalidation
if message.OldEntry != nil && message.NewEntry != nil {
oldPath := util.NewFullPath(resp.Directory, message.OldEntry.Name)
invalidations = append(invalidations, metadataInvalidation{path: oldPath, entry: message.OldEntry})
newDir := resp.Directory
if message.NewParentPath != "" {
newDir = message.NewParentPath
}
if message.OldEntry.Name != message.NewEntry.Name || resp.Directory != newDir {
newPath := util.NewFullPath(newDir, message.NewEntry.Name)
invalidations = append(invalidations, metadataInvalidation{path: newPath, entry: message.NewEntry})
}
return invalidations
}
if message.NewEntry != nil {
newDir := resp.Directory
if message.NewParentPath != "" {
newDir = message.NewParentPath
}
newPath := util.NewFullPath(newDir, message.NewEntry.Name)
invalidations = append(invalidations, metadataInvalidation{path: newPath, entry: message.NewEntry})
}
if message.OldEntry != nil {
oldPath := util.NewFullPath(resp.Directory, message.OldEntry.Name)
invalidations = append(invalidations, metadataInvalidation{path: oldPath, entry: message.OldEntry})
}
return invalidations
}

View File

@@ -1,343 +0,0 @@
package nfs
import (
"context"
"encoding/binary"
"fmt"
"net"
"os"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util"
)
// The upstream willscott/go-nfs library only serves the MOUNT protocol over
// TCP. Linux's mount.nfs and the in-kernel NFS client default `mountproto` to
// UDP in many configurations, so against a stock `weed nfs` deployment the
// kernel queries portmap for "MOUNT v3 UDP", gets port=0 ("not registered"),
// and either falls back inconsistently or surfaces EPROTONOSUPPORT
// ("requested NFS version or transport protocol is not supported"). The user
// either has to add `mountproto=tcp` / `mountport=2049` to their mount
// options or guess that their distro happens to fall back to TCP on its own.
//
// This responder closes that gap. It speaks just enough of MOUNT v3 to handle
// MOUNT_NULL / MOUNT_MNT / MOUNT_UMNT over UDP — the only procedures the
// kernel actually invokes during mount setup and teardown — so plain
// `mount -t nfs <host>:<export> /mnt` works without any client-side protocol
// hints. The protocol layout is intentionally identical to the TCP MOUNT
// handler in handler.go's Mount() so the two paths return the same
// filehandle and the same set of auth flavors for the same export.
//
// References: RFC 1813 §5 (NFSv3/MOUNTv3), RFC 5531 (RPC).
const (
mountUDPMaxRecord = 32 * 1024
// mountUDPRetryBackoff mirrors portmapRetryBackoff so the two
// listening goroutines back off identically under host pressure.
mountUDPRetryBackoff = 50 * time.Millisecond
// mountUDPLookupTimeout bounds any filer round-trip the UDP MOUNT
// path makes (export-root existence check, subexport lookup). The
// UDP serve loop is single-threaded, so a stalled filer call would
// otherwise block every later MOUNT packet.
mountUDPLookupTimeout = 5 * time.Second
mountVersion = 3
mountProcNull = 0
mountProcMnt = 1
mountProcUmnt = 3
// MOUNT v3 status codes (mountstat3 in RFC 1813 §5.1.1).
mnt3StatOK uint32 = 0
mnt3ErrAcces uint32 = 13
mnt3ErrNoEnt uint32 = 2
mnt3ErrNotDir uint32 = 20
mnt3ErrServerFault uint32 = 10006
// XDR opaque length cap for dirpath. RFC 1813 §5.1 limits MNTPATHLEN
// to 1024; cap a bit higher for headroom and reject anything beyond.
mountUDPMaxPathLen = 4096
// AuthFlavor numeric IDs (matches go-nfs and RFC 5531 §8).
authFlavorNull = 0
authFlavorUnix = 1
)
// mountUDPServer answers MOUNT v3 RPCs over UDP. It listens on the same port
// the NFS TCP server uses (2049 by default), since that's what we advertise
// via portmap, and shares the parent Server's exportRoot, exportID, and
// client allowlist so the UDP MOUNT path applies the same access policy as
// the TCP path.
type mountUDPServer struct {
bindIP string
port int
server *Server
udpConn *net.UDPConn
mu sync.Mutex
closed bool
done chan struct{}
wg sync.WaitGroup
}
func newMountUDPServer(bindIP string, port int, server *Server) *mountUDPServer {
return &mountUDPServer{
bindIP: bindIP,
port: port,
server: server,
done: make(chan struct{}),
}
}
func (m *mountUDPServer) Start() error {
addr := net.JoinHostPort(m.bindIP, fmt.Sprintf("%d", m.port))
udpAddr, err := net.ResolveUDPAddr("udp", addr)
if err != nil {
return fmt.Errorf("mount udp resolve %s: %w", addr, err)
}
udpConn, err := net.ListenUDP("udp", udpAddr)
if err != nil {
return fmt.Errorf("mount udp listen %s: %w", addr, err)
}
m.udpConn = udpConn
m.wg.Add(1)
go func() {
defer m.wg.Done()
m.serve()
}()
return nil
}
func (m *mountUDPServer) Close() error {
m.mu.Lock()
if m.closed {
m.mu.Unlock()
return nil
}
m.closed = true
close(m.done)
m.mu.Unlock()
if m.udpConn != nil {
_ = m.udpConn.Close()
}
m.wg.Wait()
return nil
}
func (m *mountUDPServer) isClosed() bool {
m.mu.Lock()
defer m.mu.Unlock()
return m.closed
}
func (m *mountUDPServer) serve() {
buf := make([]byte, mountUDPMaxRecord)
for {
n, addr, err := m.udpConn.ReadFromUDP(buf)
if err != nil {
if m.isClosed() {
return
}
// Transient read failure: log, back off, keep the
// responder alive — same pattern as portmap UDP.
glog.V(1).Infof("mount udp read: %v", err)
select {
case <-m.done:
return
case <-time.After(mountUDPRetryBackoff):
continue
}
}
// Apply the parent server's client allowlist before we even
// look at the RPC bytes, mirroring the TCP path's
// allowlistListener wrapping.
if m.server != nil && m.server.clientAuthorizer != nil && !m.server.clientAuthorizer.isAllowedAddr(addr) {
glog.V(1).Infof("mount udp: rejecting unauthorized client %s", addr)
continue
}
reply := m.handleCall(buf[:n], addr)
if reply == nil {
continue
}
if _, err := m.udpConn.WriteToUDP(reply, addr); err != nil {
glog.V(1).Infof("mount udp write to %s: %v", addr, err)
}
}
}
// handleCall classifies one RPC CALL message and returns the encoded reply,
// or nil if the call is malformed enough to drop silently.
func (m *mountUDPServer) handleCall(callBuf []byte, addr *net.UDPAddr) []byte {
xid, prog, vers, proc, args, err := parseRPCCall(callBuf)
if err != nil {
return nil
}
if prog != mountProgram {
return encodeAcceptedReply(xid, rpcAcceptProgUnavail, nil)
}
if vers != mountVersion {
// Mismatch — advertise the v3..v3 we actually support.
body := make([]byte, 8)
binary.BigEndian.PutUint32(body[0:4], mountVersion)
binary.BigEndian.PutUint32(body[4:8], mountVersion)
return encodeAcceptedReply(xid, rpcAcceptProgMismatch, body)
}
switch proc {
case mountProcNull:
return encodeAcceptedReply(xid, rpcAcceptSuccess, nil)
case mountProcMnt:
return m.handleMount(xid, args, addr)
case mountProcUmnt:
// Stateless server: there's nothing to forget, just acknowledge.
// The client sends back the dirpath in args; we don't need to
// validate it here because UMNT has no return data.
return encodeAcceptedReply(xid, rpcAcceptSuccess, nil)
default:
// MOUNT v3 also defines DUMP / EXPORT / UMNTALL but the kernel
// mount path doesn't invoke them. Returning PROC_UNAVAIL is
// the protocol-correct response.
return encodeAcceptedReply(xid, rpcAcceptProcUnavail, nil)
}
}
// handleMount implements MOUNT v3 MNT. RFC 1813 §5.1.4:
//
// MOUNT3args { dirpath3 dirpath; } // XDR opaque
// MOUNT3res { mountstat3 status; if OK { handle, auth_flavors[] } }
//
// Mirrors Handler.resolveMountFilesystem: exact match returns the
// synthetic root handle; under-export resolves to the subdirectory's
// handle; outside-export falls back to the synthetic root.
func (m *mountUDPServer) handleMount(xid uint32, args []byte, addr *net.UDPAddr) []byte {
if len(args) < 4 {
return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil)
}
pathLen := binary.BigEndian.Uint32(args[0:4])
if pathLen > mountUDPMaxPathLen {
return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil)
}
padded := (pathLen + 3) &^ 3
if uint32(len(args)) < 4+padded {
return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil)
}
dirpath := string(args[4 : 4+pathLen])
requested := normalizeExportRoot(util.FullPath(dirpath))
flavors := []uint32{authFlavorNull, authFlavorUnix}
ctx, cancel := context.WithTimeout(context.Background(), mountUDPLookupTimeout)
defer cancel()
// Exact match and outside-export both fall back to the synthetic root
// handle. Only the second case logs; the first is the common path.
if requested == m.server.exportRoot || !requested.IsUnder(m.server.exportRoot) {
if requested != m.server.exportRoot {
glog.V(0).Infof("mount udp: client %s requested %q (outside export %q); serving configured export", addr, dirpath, m.server.exportRoot)
}
if status := m.rootMountStatus(ctx); status != mnt3StatOK {
return encodeMountStatus(xid, status)
}
return encodeMountSuccess(xid, syntheticRootHandle(m.server), flavors)
}
fh, status := m.resolveSubexportFileHandle(ctx, requested)
if status != mnt3StatOK {
return encodeMountStatus(xid, status)
}
glog.V(1).Infof("mount udp: client %s requested %q under export %q; mounting at subdirectory", addr, dirpath, m.server.exportRoot)
return encodeMountSuccess(xid, fh, flavors)
}
// rootMountStatus is the UDP analogue of Handler.lstatExportStatus:
// confirms the configured export root still exists in the filer so the
// transport-OK branches can't hand out a handle pointing at a deleted
// directory. Reuses the Server's shared rootFS instance so we don't
// construct a wrapper per MOUNT request.
func (m *mountUDPServer) rootMountStatus(ctx context.Context) uint32 {
if m.server.withInternalClient == nil {
return mnt3StatOK
}
switch _, err := m.server.rootFilesystem().fileInfoForVirtualPath(ctx, "/"); {
case err == nil:
return mnt3StatOK
case os.IsNotExist(err):
return mnt3ErrNoEnt
default:
glog.Errorf("mount udp: export root %q lookup failed: %v", m.server.exportRoot, err)
return mnt3ErrServerFault
}
}
// resolveSubexportFileHandle is the UDP analogue of the sub-fs branch in
// Handler.resolveMountFilesystem. The TCP path lets go-nfs's onMount call
// ToHandle on the returned filesystem; UDP encodes the FH itself, so the
// inode/generation lookup happens explicitly here.
//
// The UDP listener is up before serve() runs newHandler(), so a subexport
// MOUNT can land here before sharedReaderCache has been assigned. Resolve
// the rootFS first to drive Server.rootFilesystem's sync.Once and read
// the cache directly off it, so the new sub-fs always shares the same
// reader cache the TCP path uses.
func (m *mountUDPServer) resolveSubexportFileHandle(ctx context.Context, requested util.FullPath) ([]byte, uint32) {
if m.server.withInternalClient == nil {
return nil, mnt3ErrServerFault
}
rootFS := m.server.rootFilesystem()
subFS := newSeaweedFileSystem(m.server, requested, rootFS.readerCache)
info, err := subFS.fileInfoForVirtualPath(ctx, "/")
switch {
case err == nil:
case os.IsNotExist(err):
return nil, mnt3ErrNoEnt
default:
glog.Errorf("mount udp: subexport lookup %q failed: %v", requested, err)
return nil, mnt3ErrServerFault
}
if !info.entry.IsDirectory {
return nil, mnt3ErrNotDir
}
inode := info.entry.GetAttributes().GetInode()
return NewFileHandle(m.server.exportID, FileHandleKindDirectory, inode, info.generation).Encode(), mnt3StatOK
}
func syntheticRootHandle(s *Server) []byte {
return NewFileHandle(s.exportID, FileHandleKindDirectory, 0, filer.InodeIndexInitialGeneration).Encode()
}
// encodeMountStatus returns a MOUNT MNT reply carrying just an error status.
// Per RFC 1813 §5.1.4 a non-OK status terminates the response — no handle or
// flavors follow.
func encodeMountStatus(xid, status uint32) []byte {
body := make([]byte, 4)
binary.BigEndian.PutUint32(body, status)
return encodeAcceptedReply(xid, rpcAcceptSuccess, body)
}
// encodeMountSuccess builds the OK MOUNT MNT reply: status=OK, file handle
// (XDR opaque), and the supported auth_flavors list.
func encodeMountSuccess(xid uint32, handle []byte, flavors []uint32) []byte {
handleLen := uint32(len(handle))
handlePadded := (handleLen + 3) &^ 3
bodyLen := 4 + 4 + handlePadded + 4 + 4*uint32(len(flavors))
body := make([]byte, bodyLen)
binary.BigEndian.PutUint32(body[0:4], mnt3StatOK)
binary.BigEndian.PutUint32(body[4:8], handleLen)
copy(body[8:8+handleLen], handle)
// Trailing pad bytes are already zero from make().
pos := 8 + handlePadded
binary.BigEndian.PutUint32(body[pos:pos+4], uint32(len(flavors)))
pos += 4
for _, fl := range flavors {
binary.BigEndian.PutUint32(body[pos:pos+4], fl)
pos += 4
}
return encodeAcceptedReply(xid, rpcAcceptSuccess, body)
}

View File

@@ -1,431 +0,0 @@
package nfs
import (
"context"
"encoding/binary"
"net"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
gonfs "github.com/willscott/go-nfs"
)
// buildMountCallFrame constructs a MOUNT v3 RPC CALL with an opaque dirpath
// argument. The shape matches RFC 5531 §9: xid + msg_type=CALL + rpcvers=2 +
// prog + vers + proc + cred(AUTH_NONE) + verf(AUTH_NONE) + arg.
func buildMountCallFrame(xid, prog, vers, proc uint32, dirpath string) []byte {
// RPC CALL header (24 bytes) + 2 × AUTH_NONE opaque_auth (16 bytes) +
// dirpath as XDR opaque (4-byte length + padded body).
dpLen := uint32(len(dirpath))
dpPadded := (dpLen + 3) &^ 3
out := make([]byte, 24+16+4+dpPadded)
binary.BigEndian.PutUint32(out[0:4], xid)
binary.BigEndian.PutUint32(out[4:8], rpcMsgCall)
binary.BigEndian.PutUint32(out[8:12], 2) // rpcvers
binary.BigEndian.PutUint32(out[12:16], prog)
binary.BigEndian.PutUint32(out[16:20], vers)
binary.BigEndian.PutUint32(out[20:24], proc)
// cred + verf both AUTH_NONE / length 0 (already zero-filled).
binary.BigEndian.PutUint32(out[40:44], dpLen)
copy(out[44:44+dpLen], dirpath)
return out
}
func newMountUDPTestServer(t *testing.T, exportPath string) (*mountUDPServer, *net.UDPConn) {
t.Helper()
return newMountUDPTestServerWithClient(t, exportPath, nil)
}
// newMountUDPTestServerWithClient wires Server.withInternalClient when
// client is non-nil, so the under-export lookup branch in handleMount
// can find directory entries.
func newMountUDPTestServerWithClient(t *testing.T, exportPath string, client *fakeNFSFilerClient) (*mountUDPServer, *net.UDPConn) {
t.Helper()
exportRoot := normalizeExportRoot(util.FullPath(exportPath))
authz, err := newClientAuthorizer(nil)
if err != nil {
t.Fatal(err)
}
srv := &Server{
option: &Option{},
exportRoot: exportRoot,
exportID: exportIDForRoot(exportRoot),
clientAuthorizer: authz,
}
if client != nil {
srv.withInternalClient = func(_ bool, fn func(nfsFilerClient) error) error {
return fn(client)
}
}
udpAddr, err := net.ResolveUDPAddr("udp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
conn, err := net.ListenUDP("udp", udpAddr)
if err != nil {
t.Fatal(err)
}
m := &mountUDPServer{
bindIP: "127.0.0.1",
port: conn.LocalAddr().(*net.UDPAddr).Port,
server: srv,
udpConn: conn,
done: make(chan struct{}),
}
m.wg.Add(1)
go func() {
defer m.wg.Done()
m.serve()
}()
t.Cleanup(func() {
_ = m.Close()
})
return m, conn
}
func sendMountUDP(t *testing.T, target *net.UDPAddr, payload []byte) []byte {
t.Helper()
c, err := net.DialUDP("udp", nil, target)
if err != nil {
t.Fatal(err)
}
defer c.Close()
if _, err := c.Write(payload); err != nil {
t.Fatal(err)
}
_ = c.SetReadDeadline(time.Now().Add(2 * time.Second))
buf := make([]byte, 4096)
n, err := c.Read(buf)
if err != nil {
t.Fatalf("read reply: %v", err)
}
return buf[:n]
}
// parseRPCReply pulls xid, accept_stat, and the body that follows accept_stat
// out of a MSG_ACCEPTED reply. Unlike the TCP path there is no fragment
// marker — the entire UDP datagram is the reply.
func parseRPCReply(t *testing.T, reply []byte) (xid, acceptStat uint32, body []byte) {
t.Helper()
if len(reply) < 24 {
t.Fatalf("reply too short: %d bytes", len(reply))
}
xid = binary.BigEndian.Uint32(reply[0:4])
if mt := binary.BigEndian.Uint32(reply[4:8]); mt != rpcMsgReply {
t.Fatalf("msg_type=%d want REPLY(1)", mt)
}
if rs := binary.BigEndian.Uint32(reply[8:12]); rs != rpcMsgAccepted {
t.Fatalf("reply_stat=%d want MSG_ACCEPTED(0)", rs)
}
acceptStat = binary.BigEndian.Uint32(reply[20:24])
body = reply[24:]
return
}
func TestMountUDPNullReturnsSuccess(t *testing.T) {
m, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
reply := sendMountUDP(t, target, buildMountCallFrame(7, mountProgram, 3, mountProcNull, ""))
xid, astat, body := parseRPCReply(t, reply)
if xid != 7 {
t.Errorf("xid=%d want 7", xid)
}
if astat != rpcAcceptSuccess {
t.Errorf("accept_stat=%d want SUCCESS(0)", astat)
}
if len(body) != 0 {
t.Errorf("NULL reply body should be empty, got %d bytes", len(body))
}
_ = m
}
func TestMountUDPMntReturnsHandleAndFlavors(t *testing.T) {
m, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
reply := sendMountUDP(t, target, buildMountCallFrame(42, mountProgram, 3, mountProcMnt, "/exports"))
xid, astat, body := parseRPCReply(t, reply)
if xid != 42 {
t.Errorf("xid=%d want 42", xid)
}
if astat != rpcAcceptSuccess {
t.Fatalf("accept_stat=%d want SUCCESS(0)", astat)
}
if len(body) < 4 {
t.Fatalf("body too short: %d bytes", len(body))
}
status := binary.BigEndian.Uint32(body[0:4])
if status != mnt3StatOK {
t.Fatalf("mountstat3=%d want OK(0)", status)
}
// fhandle3: uint32 length + padded opaque bytes.
if len(body) < 8 {
t.Fatalf("body missing handle length: %d bytes", len(body))
}
handleLen := binary.BigEndian.Uint32(body[4:8])
handlePadded := (handleLen + 3) &^ 3
if uint32(len(body)) < 8+handlePadded+4 {
t.Fatalf("body truncated: have %d, need at least %d", len(body), 8+handlePadded+4)
}
handle := body[8 : 8+handleLen]
if _, err := DecodeFileHandle(handle); err != nil {
t.Fatalf("returned handle does not decode: %v", err)
}
flavorOff := 8 + handlePadded
count := binary.BigEndian.Uint32(body[flavorOff : flavorOff+4])
if count != 2 {
t.Errorf("flavor count=%d want 2 (NULL + UNIX)", count)
}
got := []uint32{
binary.BigEndian.Uint32(body[flavorOff+4 : flavorOff+8]),
binary.BigEndian.Uint32(body[flavorOff+8 : flavorOff+12]),
}
if got[0] != authFlavorNull || got[1] != authFlavorUnix {
t.Errorf("flavors=%v want [%d %d]", got, authFlavorNull, authFlavorUnix)
}
_ = m
}
func TestMountUDPMntAcceptsAnyPath(t *testing.T) {
const exportRoot = "/buckets/data"
_, conn := newMountUDPTestServer(t, exportRoot)
target := conn.LocalAddr().(*net.UDPAddr)
dirpaths := []string{
"/",
"/buckets",
"/buckets/other",
"/wrong/path",
"",
"buckets/data",
exportRoot,
exportRoot + "/",
}
for i, dirpath := range dirpaths {
t.Run(dirpath, func(t *testing.T) {
xid := uint32(1000 + i)
reply := sendMountUDP(t, target, buildMountCallFrame(xid, mountProgram, 3, mountProcMnt, dirpath))
_, astat, body := parseRPCReply(t, reply)
if astat != rpcAcceptSuccess {
t.Fatalf("accept_stat=%d want SUCCESS(0)", astat)
}
if len(body) < 4 {
t.Fatalf("body too short: %d bytes", len(body))
}
if got := binary.BigEndian.Uint32(body[0:4]); got != mnt3StatOK {
t.Errorf("MNT(%q): mountstat3=%d want OK(0)", dirpath, got)
}
if len(body) <= 4 {
t.Errorf("MNT(%q) success body must include handle and flavors", dirpath)
}
})
}
}
func TestMountUDPSubexportMount(t *testing.T) {
const exportRoot = "/buckets"
client := &fakeNFSFilerClient{
entries: map[util.FullPath]*filer_pb.Entry{
"/buckets": testEntry("buckets", true, 100, uint32(0755), nil),
"/buckets/data": testEntry("data", true, 101, uint32(0755), nil),
"/buckets/data/nested": testEntry("nested", true, 102, uint32(0755), nil),
"/buckets/file.txt": testEntry("file.txt", false, 103, uint32(0644), []byte("hi")),
},
kv: map[string][]byte{
string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"),
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"),
string(filer.InodeIndexKey(102)): testIndexRecord(t, 102, 1, "/buckets/data/nested"),
string(filer.InodeIndexKey(103)): testIndexRecord(t, 103, 1, "/buckets/file.txt"),
},
}
m, conn := newMountUDPTestServerWithClient(t, exportRoot, client)
target := conn.LocalAddr().(*net.UDPAddr)
// Build a TCP Handler from the same Server so we can compare the
// raw FH bytes both transports produce for the same subdirectory.
tcpHandler, err := m.server.newHandler()
require.NoError(t, err)
cases := []struct {
name string
dirpath string
wantStatus uint32
wantInode uint64
}{
{name: "subdirectory_one_level", dirpath: "/buckets/data", wantStatus: mnt3StatOK, wantInode: 101},
{name: "subdirectory_two_levels", dirpath: "/buckets/data/nested", wantStatus: mnt3StatOK, wantInode: 102},
{name: "subdirectory_trailing_slash", dirpath: "/buckets/data/", wantStatus: mnt3StatOK, wantInode: 101},
{name: "missing_under_export", dirpath: "/buckets/missing", wantStatus: mnt3ErrNoEnt},
{name: "deep_missing_under_export", dirpath: "/buckets/data/no-such-thing", wantStatus: mnt3ErrNoEnt},
{name: "regular_file_not_directory", dirpath: "/buckets/file.txt", wantStatus: mnt3ErrNotDir},
}
for i, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
xid := uint32(2000 + i)
reply := sendMountUDP(t, target, buildMountCallFrame(xid, mountProgram, 3, mountProcMnt, tc.dirpath))
_, astat, body := parseRPCReply(t, reply)
if astat != rpcAcceptSuccess {
t.Fatalf("accept_stat=%d want SUCCESS(0)", astat)
}
if len(body) < 4 {
t.Fatalf("body too short: %d bytes", len(body))
}
got := binary.BigEndian.Uint32(body[0:4])
if got != tc.wantStatus {
t.Fatalf("MNT(%q) status=%d want %d", tc.dirpath, got, tc.wantStatus)
}
if tc.wantStatus != mnt3StatOK {
if len(body) != 4 {
t.Errorf("MNT(%q) error body should carry only the status; got %d trailing bytes", tc.dirpath, len(body)-4)
}
return
}
if len(body) < 8 {
t.Fatalf("MNT(%q) success body missing handle length", tc.dirpath)
}
handleLen := binary.BigEndian.Uint32(body[4:8])
if uint32(len(body)) < 8+handleLen {
t.Fatalf("MNT(%q) success body truncated", tc.dirpath)
}
udpHandleBytes := body[8 : 8+handleLen]
handle, err := DecodeFileHandle(udpHandleBytes)
if err != nil {
t.Fatalf("MNT(%q) handle decode: %v", tc.dirpath, err)
}
if handle.Inode != tc.wantInode {
t.Errorf("MNT(%q) FH inode=%d want %d", tc.dirpath, handle.Inode, tc.wantInode)
}
if handle.Kind != FileHandleKindDirectory {
t.Errorf("MNT(%q) FH kind=%d want directory", tc.dirpath, handle.Kind)
}
// Transport parity: drive the TCP Handler with the same dirpath
// and confirm the bytes go-nfs's onMount would write match the
// UDP responder's bytes exactly. A regression that drifts the
// generation, exportID, or kind on one transport would fail here.
tcpStatus, tcpFS, _ := tcpHandler.Mount(context.Background(), nil, gonfs.MountRequest{Dirpath: []byte(tc.dirpath)})
require.Equal(t, gonfs.MountStatusOk, tcpStatus, "TCP Mount(%q)", tc.dirpath)
tcpHandleBytes := tcpHandler.ToHandle(tcpFS, nil)
require.NotEmpty(t, tcpHandleBytes, "TCP Mount(%q) ToHandle returned empty", tc.dirpath)
assert.Equal(t, tcpHandleBytes, udpHandleBytes, "TCP/UDP FH bytes diverge for %q", tc.dirpath)
})
}
}
func TestMountUDPRejectsWrongVersion(t *testing.T) {
// Same defence-in-depth as the TCP version filter: don't speak v1/v4
// MOUNT — return PROG_MISMATCH advertising 3..3 so the client knows
// to retry with v3.
_, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
reply := sendMountUDP(t, target, buildMountCallFrame(1, mountProgram, 4, mountProcNull, ""))
_, astat, body := parseRPCReply(t, reply)
if astat != rpcAcceptProgMismatch {
t.Fatalf("accept_stat=%d want PROG_MISMATCH(2)", astat)
}
if len(body) != 8 {
t.Fatalf("PROG_MISMATCH body=%d bytes want 8", len(body))
}
low := binary.BigEndian.Uint32(body[0:4])
high := binary.BigEndian.Uint32(body[4:8])
if low != 3 || high != 3 {
t.Errorf("supported range=(%d,%d) want (3,3)", low, high)
}
}
func TestMountUDPRejectsWrongProgram(t *testing.T) {
_, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
// 100021 is NLM, which we don't run here.
reply := sendMountUDP(t, target, buildMountCallFrame(1, 100021, 4, mountProcNull, ""))
_, astat, _ := parseRPCReply(t, reply)
if astat != rpcAcceptProgUnavail {
t.Errorf("accept_stat=%d want PROG_UNAVAIL(1)", astat)
}
}
func TestMountUDPUmntAcknowledges(t *testing.T) {
_, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
// UMNT carries a dirpath but the server is stateless and ignores it.
reply := sendMountUDP(t, target, buildMountCallFrame(8, mountProgram, 3, mountProcUmnt, "/exports"))
_, astat, body := parseRPCReply(t, reply)
if astat != rpcAcceptSuccess {
t.Errorf("accept_stat=%d want SUCCESS(0)", astat)
}
if len(body) != 0 {
t.Errorf("UMNT reply body should be empty, got %d bytes", len(body))
}
}
func TestMountUDPRejectsTruncatedMntArgs(t *testing.T) {
_, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
// Hand-craft an MNT call whose dirpath length field claims 16 bytes
// but no body follows. Using buildMountCallFrame would also emit a
// trailing length=0 from the empty-string default; we need exactly
// "length, no body" so the GARBAGE_ARGS path actually fires.
frame := make([]byte, 24+16+4) // header + auth + 4-byte length only
binary.BigEndian.PutUint32(frame[0:4], 1) // xid
binary.BigEndian.PutUint32(frame[4:8], rpcMsgCall) // msg_type
binary.BigEndian.PutUint32(frame[8:12], 2) // rpcvers
binary.BigEndian.PutUint32(frame[12:16], mountProgram)
binary.BigEndian.PutUint32(frame[16:20], 3) // mount vers
binary.BigEndian.PutUint32(frame[20:24], mountProcMnt)
// auth = two AUTH_NONE / length-0 stanzas (already zero from make).
binary.BigEndian.PutUint32(frame[40:44], 16) // dirpath length=16, no bytes follow
reply := sendMountUDP(t, target, frame)
_, astat, _ := parseRPCReply(t, reply)
if astat != rpcAcceptGarbageArgs {
t.Errorf("accept_stat=%d want GARBAGE_ARGS(4)", astat)
}
}
func TestMountUDPCloseStopsServing(t *testing.T) {
m, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
// Sanity: NULL works before close.
_ = sendMountUDP(t, target, buildMountCallFrame(1, mountProgram, 3, mountProcNull, ""))
if err := m.Close(); err != nil {
t.Fatalf("Close: %v", err)
}
// After Close the socket is shut, so a fresh send should fail to
// read a reply within the deadline rather than producing a
// well-formed response.
c, err := net.DialUDP("udp", nil, target)
if err != nil {
// Some platforms refuse the dial outright after Close — that's
// also acceptable: the server is gone either way.
return
}
defer c.Close()
_, _ = c.Write(buildMountCallFrame(2, mountProgram, 3, mountProcNull, ""))
_ = c.SetReadDeadline(time.Now().Add(200 * time.Millisecond))
buf := make([]byte, 1024)
if _, err := c.Read(buf); err == nil {
t.Error("Close should have stopped the responder, but a reply still arrived")
}
}

View File

@@ -1,447 +0,0 @@
package nfs
import (
"encoding/binary"
"errors"
"fmt"
"io"
"net"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
)
// Minimal PORTMAP v2 responder.
//
// The upstream willscott/go-nfs library serves NFSv3 and MOUNT on a single TCP
// port and deliberately does not register with portmap (RPC program 100000).
// Linux mount.nfs, however, queries portmap on port 111 before sending the
// MOUNT RPC, so the plain `mount -t nfs host:/export /mnt` command fails
// against a default `weed nfs` deployment.
//
// When enabled, this responder binds the privileged port 111 (RFC 1833) on
// both TCP and UDP and answers the subset of PORTMAP v2 calls that standard
// Linux clients make: PMAP_NULL, PMAP_GETPORT and PMAP_DUMP. It refuses
// registration from third parties (PMAP_SET / PMAP_UNSET return false) and
// only exposes the programs that weed itself serves.
//
// References: RFC 1833 (Portmap v2), RFC 5531 (RPC).
const (
portmapProgram = 100000
portmapVersion = 2
portmapPort = 111
pmapProcNull = 0
pmapProcSet = 1
pmapProcUnset = 2
pmapProcGetPort = 3
pmapProcDump = 4
ipProtoTCP = 6
ipProtoUDP = 17
nfsProgram = 100003
mountProgram = 100005
// RPC
rpcMsgCall = 0
rpcMsgReply = 1
rpcMsgAccepted = 0
rpcAcceptSuccess = 0
rpcAcceptProgUnavail = 1
rpcAcceptProgMismatch = 2
rpcAcceptProcUnavail = 3
rpcAcceptGarbageArgs = 4
rpcAuthNone = 0
// Defensive limits. Portmap messages are tiny in practice; these caps
// protect the responder from large or slow reads.
portmapMaxRecord = 64 * 1024
// Per-connection read/write deadlines on the TCP listener. The idle
// timeout bounds how long we wait for the next request on an otherwise
// quiet connection; the IO timeout bounds a single read or write once
// one is in flight. Both guard against slowloris-style stalls on the
// privileged port 111.
portmapTCPIdleTimeout = 30 * time.Second
portmapTCPIOTimeout = 10 * time.Second
// Back-off applied before retrying after a non-fatal listener error
// (e.g. EMFILE on TCP Accept, or a transient UDP read failure) so we
// don't busy-loop when the host is under pressure.
portmapRetryBackoff = 50 * time.Millisecond
)
type portmapEntry struct {
Program uint32
Version uint32
Protocol uint32
Port uint32
}
type portmapServer struct {
bindIP string
port int
entries []portmapEntry
tcpListener net.Listener
udpConn *net.UDPConn
// mu guards closed and conns. It is held only for bookkeeping, never
// across network IO.
mu sync.Mutex
closed bool
conns map[net.Conn]struct{}
// done is closed exactly once by Close() so that background loops can
// interrupt a retry-backoff sleep instead of waiting it out.
done chan struct{}
wg sync.WaitGroup
}
// newPortmapServer builds a responder advertising the NFS services the caller
// runs on nfsPort. NFS itself is TCP-only here (the upstream go-nfs library
// doesn't speak NFS UDP). MOUNT, however, is served over both TCP (via
// go-nfs) and UDP (via mountUDPServer in mount_udp.go), so we advertise
// both — that's what makes plain `mount -t nfs <host>:<export> /mnt` work
// against Linux clients whose default mountproto is UDP without needing
// mountproto=tcp / mountport=2049 mount options.
func newPortmapServer(bindIP string, port int, nfsPort uint32) *portmapServer {
if port <= 0 {
port = portmapPort
}
return &portmapServer{
bindIP: bindIP,
port: port,
done: make(chan struct{}),
entries: []portmapEntry{
{Program: nfsProgram, Version: 3, Protocol: ipProtoTCP, Port: nfsPort},
{Program: mountProgram, Version: 3, Protocol: ipProtoTCP, Port: nfsPort},
{Program: mountProgram, Version: 3, Protocol: ipProtoUDP, Port: nfsPort},
},
}
}
func (ps *portmapServer) Start() error {
addr := net.JoinHostPort(ps.bindIP, fmt.Sprintf("%d", ps.port))
tcpLn, err := net.Listen("tcp", addr)
if err != nil {
return fmt.Errorf("portmap tcp listen %s: %w", addr, err)
}
udpAddr, err := net.ResolveUDPAddr("udp", addr)
if err != nil {
_ = tcpLn.Close()
return fmt.Errorf("portmap udp resolve %s: %w", addr, err)
}
udpConn, err := net.ListenUDP("udp", udpAddr)
if err != nil {
_ = tcpLn.Close()
return fmt.Errorf("portmap udp listen %s: %w", addr, err)
}
ps.tcpListener = tcpLn
ps.udpConn = udpConn
ps.wg.Add(2)
go func() {
defer ps.wg.Done()
ps.serveTCP()
}()
go func() {
defer ps.wg.Done()
ps.serveUDP()
}()
return nil
}
func (ps *portmapServer) Close() error {
ps.mu.Lock()
if ps.closed {
ps.mu.Unlock()
return nil
}
ps.closed = true
conns := ps.conns
ps.conns = nil
close(ps.done)
ps.mu.Unlock()
var first error
if ps.tcpListener != nil {
if err := ps.tcpListener.Close(); err != nil {
first = err
}
}
if ps.udpConn != nil {
if err := ps.udpConn.Close(); err != nil && first == nil {
first = err
}
}
// Evict in-flight TCP handlers so Close() does not block on idle
// clients; their read goroutines will unwind on the closed conn.
for c := range conns {
_ = c.Close()
}
ps.wg.Wait()
return first
}
func (ps *portmapServer) isClosed() bool {
ps.mu.Lock()
defer ps.mu.Unlock()
return ps.closed
}
// addConn registers c for shutdown eviction. It returns false (and the
// caller must drop c) if the server has already started shutting down.
func (ps *portmapServer) addConn(c net.Conn) bool {
ps.mu.Lock()
defer ps.mu.Unlock()
if ps.closed {
return false
}
if ps.conns == nil {
ps.conns = make(map[net.Conn]struct{})
}
ps.conns[c] = struct{}{}
return true
}
func (ps *portmapServer) removeConn(c net.Conn) {
ps.mu.Lock()
defer ps.mu.Unlock()
delete(ps.conns, c)
}
func (ps *portmapServer) serveTCP() {
for {
conn, err := ps.tcpListener.Accept()
if err != nil {
if ps.isClosed() {
return
}
// Non-fatal (e.g. EMFILE, EINTR): log and back off rather
// than tear the listener down on a transient resource blip.
// Wake early if Close() fires during the sleep.
glog.V(1).Infof("portmap tcp accept: %v", err)
select {
case <-ps.done:
return
case <-time.After(portmapRetryBackoff):
continue
}
}
if !ps.addConn(conn) {
_ = conn.Close()
continue
}
ps.wg.Add(1)
go func(c net.Conn) {
defer ps.wg.Done()
defer ps.removeConn(c)
ps.handleTCPConn(c)
}(conn)
}
}
func (ps *portmapServer) handleTCPConn(conn net.Conn) {
defer conn.Close()
hdr := make([]byte, 4)
for {
_ = conn.SetReadDeadline(time.Now().Add(portmapTCPIdleTimeout))
if _, err := io.ReadFull(conn, hdr); err != nil {
return
}
mark := binary.BigEndian.Uint32(hdr)
// Bit 31: last-fragment flag. Portmap messages are always single
// fragment in practice; drop the connection if we see otherwise.
if mark&(1<<31) == 0 {
return
}
recLen := mark &^ (1 << 31)
if recLen == 0 || recLen > portmapMaxRecord {
return
}
buf := make([]byte, recLen)
_ = conn.SetReadDeadline(time.Now().Add(portmapTCPIOTimeout))
if _, err := io.ReadFull(conn, buf); err != nil {
return
}
reply := ps.handleCall(buf)
if reply == nil {
continue
}
out := make([]byte, 4+len(reply))
binary.BigEndian.PutUint32(out[0:4], uint32(len(reply))|(1<<31))
copy(out[4:], reply)
_ = conn.SetWriteDeadline(time.Now().Add(portmapTCPIOTimeout))
if _, err := conn.Write(out); err != nil {
return
}
}
}
func (ps *portmapServer) serveUDP() {
buf := make([]byte, portmapMaxRecord)
for {
n, addr, err := ps.udpConn.ReadFromUDP(buf)
if err != nil {
if ps.isClosed() {
return
}
// Transient read failure: log, back off, and keep the
// responder alive instead of taking UDP portmap down.
// Wake early if Close() fires during the sleep.
glog.V(1).Infof("portmap udp read: %v", err)
select {
case <-ps.done:
return
case <-time.After(portmapRetryBackoff):
continue
}
}
reply := ps.handleCall(buf[:n])
if reply == nil {
continue
}
if _, err := ps.udpConn.WriteToUDP(reply, addr); err != nil {
glog.V(1).Infof("portmap udp write to %s: %v", addr, err)
}
}
}
// handleCall parses one RPC CALL message and returns the encoded reply, or nil
// if the call is malformed enough that we should drop it silently.
func (ps *portmapServer) handleCall(callBuf []byte) []byte {
xid, prog, vers, proc, args, err := parseRPCCall(callBuf)
if err != nil {
return nil
}
if prog != portmapProgram {
return encodeAcceptedReply(xid, rpcAcceptProgUnavail, nil)
}
if vers != portmapVersion {
// Program-version mismatch: RFC 5531 says we should return the
// accepted range; keep it simple and report 2..2.
body := make([]byte, 8)
binary.BigEndian.PutUint32(body[0:4], portmapVersion)
binary.BigEndian.PutUint32(body[4:8], portmapVersion)
return encodeAcceptedReply(xid, rpcAcceptProgMismatch, body)
}
switch proc {
case pmapProcNull:
return encodeAcceptedReply(xid, rpcAcceptSuccess, nil)
case pmapProcGetPort:
if len(args) < 16 {
return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil)
}
q := portmapEntry{
Program: binary.BigEndian.Uint32(args[0:4]),
Version: binary.BigEndian.Uint32(args[4:8]),
Protocol: binary.BigEndian.Uint32(args[8:12]),
}
port := uint32(0)
for _, e := range ps.entries {
if e.Program == q.Program && e.Version == q.Version && e.Protocol == q.Protocol {
port = e.Port
break
}
}
body := make([]byte, 4)
binary.BigEndian.PutUint32(body, port)
return encodeAcceptedReply(xid, rpcAcceptSuccess, body)
case pmapProcDump:
// Each entry is 4-byte value_follows + 16-byte mapping = 20 bytes,
// plus a 4-byte terminator value_follows=FALSE.
body := make([]byte, 0, 20*len(ps.entries)+4)
for _, e := range ps.entries {
chunk := make([]byte, 20)
binary.BigEndian.PutUint32(chunk[0:4], 1) // value_follows = TRUE
binary.BigEndian.PutUint32(chunk[4:8], e.Program)
binary.BigEndian.PutUint32(chunk[8:12], e.Version)
binary.BigEndian.PutUint32(chunk[12:16], e.Protocol)
binary.BigEndian.PutUint32(chunk[16:20], e.Port)
body = append(body, chunk...)
}
end := make([]byte, 4) // value_follows = FALSE
body = append(body, end...)
return encodeAcceptedReply(xid, rpcAcceptSuccess, body)
case pmapProcSet, pmapProcUnset:
// Don't accept third-party registrations. bool=FALSE.
body := make([]byte, 4)
return encodeAcceptedReply(xid, rpcAcceptSuccess, body)
default:
return encodeAcceptedReply(xid, rpcAcceptProcUnavail, nil)
}
}
// parseRPCCall parses the fixed portion of an RPC CALL header and returns the
// remaining procedure arguments. It skips both opaque_auth fields (cred and
// verf) so callers get a buffer starting at the procedure arguments.
func parseRPCCall(buf []byte) (xid, prog, vers, proc uint32, args []byte, err error) {
// Minimum header: xid + msg_type + rpcvers + prog + vers + proc + 2x
// (flavor + len) = 6*4 + 2*8 = 40 bytes.
const minHeader = 40
if len(buf) < minHeader {
err = fmt.Errorf("rpc call too short: %d bytes", len(buf))
return
}
xid = binary.BigEndian.Uint32(buf[0:4])
if msgType := binary.BigEndian.Uint32(buf[4:8]); msgType != rpcMsgCall {
err = fmt.Errorf("not an rpc call: msg_type=%d", msgType)
return
}
if rpcvers := binary.BigEndian.Uint32(buf[8:12]); rpcvers != 2 {
err = fmt.Errorf("unsupported rpc version %d", rpcvers)
return
}
prog = binary.BigEndian.Uint32(buf[12:16])
vers = binary.BigEndian.Uint32(buf[16:20])
proc = binary.BigEndian.Uint32(buf[20:24])
p := 24
for i := 0; i < 2; i++ {
if len(buf) < p+8 {
err = fmt.Errorf("truncated opaque_auth at offset %d", p)
return
}
authLen := binary.BigEndian.Uint32(buf[p+4 : p+8])
// Validate before applying the XDR 4-byte padding so that
// lengths near uint32 max can't wrap to a tiny padded value.
if authLen > uint32(portmapMaxRecord) {
err = errors.New("opaque_auth length exceeds limit")
return
}
padded := (authLen + 3) &^ 3
end := uint64(p) + 8 + uint64(padded)
if end > uint64(len(buf)) {
err = fmt.Errorf("truncated opaque_auth body at offset %d (len=%d)", p, authLen)
return
}
p = int(end)
}
args = buf[p:]
return
}
// encodeAcceptedReply builds a MSG_ACCEPTED reply with the given accept_stat.
// body is the already-XDR-encoded data that follows accept_stat in the reply.
// For SUCCESS it is the procedure result; it is nil for most error
// accept_stat values (PROG_UNAVAIL, PROC_UNAVAIL, GARBAGE_ARGS) but is
// non-nil for PROG_MISMATCH, which carries a struct { uint32 low; uint32
// high; } mismatch_info range per RFC 5531 §9.
func encodeAcceptedReply(xid, acceptStat uint32, body []byte) []byte {
out := make([]byte, 24+len(body))
binary.BigEndian.PutUint32(out[0:4], xid)
binary.BigEndian.PutUint32(out[4:8], rpcMsgReply)
binary.BigEndian.PutUint32(out[8:12], rpcMsgAccepted)
// verf: AUTH_NONE, zero-length opaque
binary.BigEndian.PutUint32(out[12:16], rpcAuthNone)
binary.BigEndian.PutUint32(out[16:20], 0)
binary.BigEndian.PutUint32(out[20:24], acceptStat)
copy(out[24:], body)
return out
}

View File

@@ -1,418 +0,0 @@
package nfs
import (
"bytes"
"encoding/binary"
"io"
"net"
"strconv"
"testing"
"time"
)
func buildRPCCall(t *testing.T, xid, prog, vers, proc uint32, credBody, verfBody, args []byte) []byte {
t.Helper()
pad := func(b []byte) []byte {
r := len(b) % 4
if r == 0 {
return b
}
out := make([]byte, len(b)+4-r)
copy(out, b)
return out
}
buf := new(bytes.Buffer)
write := func(v uint32) {
var b [4]byte
binary.BigEndian.PutUint32(b[:], v)
buf.Write(b[:])
}
write(xid)
write(rpcMsgCall)
write(2) // rpcvers
write(prog)
write(vers)
write(proc)
// cred
write(rpcAuthNone)
write(uint32(len(credBody)))
buf.Write(pad(credBody))
// verf
write(rpcAuthNone)
write(uint32(len(verfBody)))
buf.Write(pad(verfBody))
buf.Write(args)
return buf.Bytes()
}
func parseAcceptedReply(t *testing.T, reply []byte) (xid, acceptStat uint32, body []byte) {
t.Helper()
if len(reply) < 24 {
t.Fatalf("reply too short: %d bytes", len(reply))
}
xid = binary.BigEndian.Uint32(reply[0:4])
if mt := binary.BigEndian.Uint32(reply[4:8]); mt != rpcMsgReply {
t.Fatalf("msg_type=%d, want REPLY", mt)
}
if rs := binary.BigEndian.Uint32(reply[8:12]); rs != rpcMsgAccepted {
t.Fatalf("reply_stat=%d, want ACCEPTED", rs)
}
// verf
verfLen := binary.BigEndian.Uint32(reply[16:20])
if verfLen != 0 {
t.Fatalf("unexpected verf length %d", verfLen)
}
acceptStat = binary.BigEndian.Uint32(reply[20:24])
body = reply[24:]
return
}
func newTestPortmap() *portmapServer {
return newPortmapServer("127.0.0.1", portmapPort, 2049)
}
func TestParseRPCCall_SkipsAuth(t *testing.T) {
cred := []byte("hello") // 5 bytes -> padded to 8
verf := []byte{}
args := []byte{0x01, 0x02, 0x03, 0x04}
msg := buildRPCCall(t, 42, portmapProgram, portmapVersion, pmapProcNull, cred, verf, args)
xid, prog, vers, proc, gotArgs, err := parseRPCCall(msg)
if err != nil {
t.Fatalf("parseRPCCall: %v", err)
}
if xid != 42 || prog != portmapProgram || vers != portmapVersion || proc != pmapProcNull {
t.Fatalf("header mismatch: xid=%d prog=%d vers=%d proc=%d", xid, prog, vers, proc)
}
if !bytes.Equal(gotArgs, args) {
t.Fatalf("args mismatch: got %x want %x", gotArgs, args)
}
}
func TestParseRPCCall_RejectsReply(t *testing.T) {
buf := make([]byte, 40)
binary.BigEndian.PutUint32(buf[4:8], rpcMsgReply)
if _, _, _, _, _, err := parseRPCCall(buf); err == nil {
t.Fatal("expected error on reply-typed message")
}
}
func TestParseRPCCall_TruncatedAuth(t *testing.T) {
// Claim huge cred length but provide no body.
buf := make([]byte, 40)
binary.BigEndian.PutUint32(buf[4:8], rpcMsgCall)
binary.BigEndian.PutUint32(buf[8:12], 2)
binary.BigEndian.PutUint32(buf[28:32], 1000) // cred len
if _, _, _, _, _, err := parseRPCCall(buf); err == nil {
t.Fatal("expected error on truncated auth")
}
}
func TestHandleCall_Null(t *testing.T) {
ps := newTestPortmap()
msg := buildRPCCall(t, 7, portmapProgram, portmapVersion, pmapProcNull, nil, nil, nil)
reply := ps.handleCall(msg)
xid, acc, body := parseAcceptedReply(t, reply)
if xid != 7 || acc != rpcAcceptSuccess || len(body) != 0 {
t.Fatalf("null reply xid=%d acc=%d body=%x", xid, acc, body)
}
}
func TestHandleCall_GetPort_HitAndMiss(t *testing.T) {
ps := newTestPortmap()
buildQuery := func(prog, vers, prot uint32) []byte {
args := make([]byte, 16)
binary.BigEndian.PutUint32(args[0:4], prog)
binary.BigEndian.PutUint32(args[4:8], vers)
binary.BigEndian.PutUint32(args[8:12], prot)
// port field is ignored by the server; leave zero
return args
}
cases := []struct {
name string
prog, vers, prot uint32
wantPort uint32
}{
{"nfs-v3-tcp-hit", nfsProgram, 3, ipProtoTCP, 2049},
{"mount-v3-tcp-hit", mountProgram, 3, ipProtoTCP, 2049},
{"mount-v3-udp-hit", mountProgram, 3, ipProtoUDP, 2049},
{"mount-v1-tcp-miss", mountProgram, 1, ipProtoTCP, 0},
{"nfs-v3-udp-miss", nfsProgram, 3, ipProtoUDP, 0},
{"nlm-miss", 100021, 4, ipProtoTCP, 0},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
msg := buildRPCCall(t, 11, portmapProgram, portmapVersion, pmapProcGetPort, nil, nil, buildQuery(tc.prog, tc.vers, tc.prot))
reply := ps.handleCall(msg)
xid, acc, body := parseAcceptedReply(t, reply)
if xid != 11 {
t.Fatalf("xid=%d want 11", xid)
}
if acc != rpcAcceptSuccess {
t.Fatalf("acc=%d want SUCCESS", acc)
}
if len(body) != 4 {
t.Fatalf("getport body len=%d want 4", len(body))
}
got := binary.BigEndian.Uint32(body)
if got != tc.wantPort {
t.Fatalf("port=%d want %d", got, tc.wantPort)
}
})
}
}
func TestHandleCall_Dump(t *testing.T) {
ps := newTestPortmap()
msg := buildRPCCall(t, 13, portmapProgram, portmapVersion, pmapProcDump, nil, nil, nil)
reply := ps.handleCall(msg)
_, acc, body := parseAcceptedReply(t, reply)
if acc != rpcAcceptSuccess {
t.Fatalf("acc=%d", acc)
}
var entries []portmapEntry
p := 0
for p+4 <= len(body) {
vf := binary.BigEndian.Uint32(body[p : p+4])
p += 4
if vf == 0 {
break
}
if p+16 > len(body) {
t.Fatalf("truncated entry at %d", p)
}
entries = append(entries, portmapEntry{
Program: binary.BigEndian.Uint32(body[p : p+4]),
Version: binary.BigEndian.Uint32(body[p+4 : p+8]),
Protocol: binary.BigEndian.Uint32(body[p+8 : p+12]),
Port: binary.BigEndian.Uint32(body[p+12 : p+16]),
})
p += 16
}
if len(entries) != 3 {
t.Fatalf("got %d dump entries, want 3: %+v", len(entries), entries)
}
wantSet := map[portmapEntry]bool{
{Program: nfsProgram, Version: 3, Protocol: ipProtoTCP, Port: 2049}: false,
{Program: mountProgram, Version: 3, Protocol: ipProtoTCP, Port: 2049}: false,
{Program: mountProgram, Version: 3, Protocol: ipProtoUDP, Port: 2049}: false,
}
for _, e := range entries {
if _, ok := wantSet[e]; !ok {
t.Fatalf("unexpected dump entry %+v", e)
}
wantSet[e] = true
}
for e, seen := range wantSet {
if !seen {
t.Fatalf("missing dump entry %+v", e)
}
}
}
func TestHandleCall_UnknownProg(t *testing.T) {
ps := newTestPortmap()
msg := buildRPCCall(t, 1, 999999, 1, 0, nil, nil, nil)
reply := ps.handleCall(msg)
_, acc, _ := parseAcceptedReply(t, reply)
if acc != rpcAcceptProgUnavail {
t.Fatalf("acc=%d want PROG_UNAVAIL", acc)
}
}
func TestHandleCall_VersionMismatch(t *testing.T) {
ps := newTestPortmap()
msg := buildRPCCall(t, 1, portmapProgram, 42, pmapProcNull, nil, nil, nil)
reply := ps.handleCall(msg)
_, acc, body := parseAcceptedReply(t, reply)
if acc != rpcAcceptProgMismatch {
t.Fatalf("acc=%d want PROG_MISMATCH", acc)
}
if len(body) != 8 {
t.Fatalf("mismatch body len=%d want 8", len(body))
}
lo := binary.BigEndian.Uint32(body[0:4])
hi := binary.BigEndian.Uint32(body[4:8])
if lo != portmapVersion || hi != portmapVersion {
t.Fatalf("mismatch range lo=%d hi=%d", lo, hi)
}
}
func TestHandleCall_UnknownProc(t *testing.T) {
ps := newTestPortmap()
msg := buildRPCCall(t, 1, portmapProgram, portmapVersion, 42, nil, nil, nil)
reply := ps.handleCall(msg)
_, acc, _ := parseAcceptedReply(t, reply)
if acc != rpcAcceptProcUnavail {
t.Fatalf("acc=%d want PROC_UNAVAIL", acc)
}
}
func TestHandleCall_SetRefused(t *testing.T) {
ps := newTestPortmap()
args := make([]byte, 16) // mapping struct
msg := buildRPCCall(t, 1, portmapProgram, portmapVersion, pmapProcSet, nil, nil, args)
reply := ps.handleCall(msg)
_, acc, body := parseAcceptedReply(t, reply)
if acc != rpcAcceptSuccess {
t.Fatalf("acc=%d", acc)
}
if len(body) != 4 || binary.BigEndian.Uint32(body) != 0 {
t.Fatalf("PMAP_SET must return FALSE, got %x", body)
}
}
// pickFreePort asks the OS for an unused high port by opening and closing a
// listener on it. Used so the end-to-end tests can run in parallel without
// stepping on the privileged default port 111.
func pickFreePort(t *testing.T) int {
t.Helper()
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("listen: %v", err)
}
defer ln.Close()
return ln.Addr().(*net.TCPAddr).Port
}
func TestPortmapServer_UDPGetPort(t *testing.T) {
port := pickFreePort(t)
ps := newPortmapServer("127.0.0.1", port, 2049)
if err := ps.Start(); err != nil {
t.Fatalf("start: %v", err)
}
t.Cleanup(func() { _ = ps.Close() })
args := make([]byte, 16)
binary.BigEndian.PutUint32(args[0:4], nfsProgram)
binary.BigEndian.PutUint32(args[4:8], 3)
binary.BigEndian.PutUint32(args[8:12], ipProtoTCP)
msg := buildRPCCall(t, 99, portmapProgram, portmapVersion, pmapProcGetPort, nil, nil, args)
conn, err := net.Dial("udp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port)))
if err != nil {
t.Fatalf("dial udp: %v", err)
}
defer conn.Close()
_ = conn.SetDeadline(time.Now().Add(2 * time.Second))
if _, err := conn.Write(msg); err != nil {
t.Fatalf("write: %v", err)
}
buf := make([]byte, 4096)
n, err := conn.Read(buf)
if err != nil {
t.Fatalf("read: %v", err)
}
xid, acc, body := parseAcceptedReply(t, buf[:n])
if xid != 99 || acc != rpcAcceptSuccess || len(body) != 4 {
t.Fatalf("bad reply xid=%d acc=%d body=%x", xid, acc, body)
}
if got := binary.BigEndian.Uint32(body); got != 2049 {
t.Fatalf("udp getport port=%d want 2049", got)
}
}
func TestPortmapServer_CloseEvictsIdleTCPConn(t *testing.T) {
port := pickFreePort(t)
ps := newPortmapServer("127.0.0.1", port, 2049)
if err := ps.Start(); err != nil {
t.Fatalf("start: %v", err)
}
conn, err := net.Dial("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port)))
if err != nil {
_ = ps.Close()
t.Fatalf("dial: %v", err)
}
defer conn.Close()
// Issue one call and read its reply so the server-side connection is
// definitely registered before we trigger shutdown.
msg := buildRPCCall(t, 1, portmapProgram, portmapVersion, pmapProcNull, nil, nil, nil)
var mark [4]byte
binary.BigEndian.PutUint32(mark[:], uint32(len(msg))|(1<<31))
_ = conn.SetDeadline(time.Now().Add(2 * time.Second))
if _, err := conn.Write(mark[:]); err != nil {
t.Fatalf("write mark: %v", err)
}
if _, err := conn.Write(msg); err != nil {
t.Fatalf("write msg: %v", err)
}
if _, err := io.ReadFull(conn, mark[:]); err != nil {
t.Fatalf("read mark: %v", err)
}
rlen := binary.BigEndian.Uint32(mark[:]) &^ (1 << 31)
if _, err := io.ReadFull(conn, make([]byte, rlen)); err != nil {
t.Fatalf("read body: %v", err)
}
// Close must return long before the TCP idle deadline (30s) — in
// other words, the server must actively close the idle conn rather
// than wait for the deadline or for the client to disconnect.
done := make(chan error, 1)
go func() { done <- ps.Close() }()
select {
case err := <-done:
if err != nil {
t.Fatalf("Close: %v", err)
}
case <-time.After(2 * time.Second):
t.Fatal("Close did not return within 2s; in-flight conn not evicted")
}
_ = conn.SetReadDeadline(time.Now().Add(1 * time.Second))
if _, err := conn.Read(make([]byte, 4)); err == nil {
t.Fatal("expected read error on client conn after server Close")
}
}
func TestPortmapServer_TCPGetPort(t *testing.T) {
port := pickFreePort(t)
ps := newPortmapServer("127.0.0.1", port, 2049)
if err := ps.Start(); err != nil {
t.Fatalf("start: %v", err)
}
t.Cleanup(func() { _ = ps.Close() })
args := make([]byte, 16)
binary.BigEndian.PutUint32(args[0:4], mountProgram)
binary.BigEndian.PutUint32(args[4:8], 3)
binary.BigEndian.PutUint32(args[8:12], ipProtoTCP)
msg := buildRPCCall(t, 123, portmapProgram, portmapVersion, pmapProcGetPort, nil, nil, args)
conn, err := net.Dial("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port)))
if err != nil {
t.Fatalf("dial tcp: %v", err)
}
defer conn.Close()
_ = conn.SetDeadline(time.Now().Add(2 * time.Second))
// record mark: last-fragment bit + length
var mark [4]byte
binary.BigEndian.PutUint32(mark[:], uint32(len(msg))|(1<<31))
if _, err := conn.Write(mark[:]); err != nil {
t.Fatalf("write mark: %v", err)
}
if _, err := conn.Write(msg); err != nil {
t.Fatalf("write msg: %v", err)
}
var rmark [4]byte
if _, err := io.ReadFull(conn, rmark[:]); err != nil {
t.Fatalf("read mark: %v", err)
}
rlen := binary.BigEndian.Uint32(rmark[:]) &^ (1 << 31)
buf := make([]byte, rlen)
if _, err := io.ReadFull(conn, buf); err != nil {
t.Fatalf("read body: %v", err)
}
xid, acc, body := parseAcceptedReply(t, buf)
if xid != 123 || acc != rpcAcceptSuccess || len(body) != 4 {
t.Fatalf("bad reply xid=%d acc=%d body=%x", xid, acc, body)
}
if got := binary.BigEndian.Uint32(body); got != 2049 {
t.Fatalf("tcp getport port=%d want 2049", got)
}
}

View File

@@ -1,377 +0,0 @@
package nfs
import (
"bufio"
"encoding/binary"
"errors"
"io"
"net"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
)
// The upstream willscott/go-nfs library dispatches RPC calls by (program,
// procedure) only — it does not validate the RPC program version. That means
// a Linux client speaking NFSv4 (program 100003 vers 4) lands on the same
// handler map as NFSv3: proc=1 routes to NFSv3 SETATTR, which parses the
// NFSv4 COMPOUND args as if they were SETATTR3args and writes a malformed
// reply. The client cannot decode that reply, the kernel returns
// EPROTONOSUPPORT, and mount.nfs prints "requested NFS version or transport
// protocol is not supported" without ever falling back to v3.
//
// The default Linux mount.nfs path is to try NFSv4 first, so this affects
// every plain `mount -t nfs <host>:<export> /mnt` against a `weed nfs`
// deployment. To make the v4→v3 fallback work, we wrap the listener so the
// first RPC frame on each new TCP connection is inspected: if the program is
// NFS or MOUNT and the version is not 3, we synthesize a PROG_MISMATCH reply
// (with the supported version range 3..3) directly to the socket and close
// the connection. The client then retries with v3 and proceeds normally.
//
// Clients keep the same program/version for the lifetime of a TCP connection
// in practice, so we only need to check the first frame; subsequent frames
// flow through to go-nfs unchanged. This avoids vendoring go-nfs while still
// producing protocol-correct rejections.
// RPC numeric constants used here (rpcMsgCall, rpcMsgReply, rpcMsgAccepted,
// rpcAcceptProgMismatch, rpcAuthNone, nfsProgram, mountProgram) are defined
// alongside the portmap responder in portmap.go to keep one source of truth
// per package.
const (
// rpcVersionFilterPeekTimeout bounds how long we wait for the first frame
// header on a new connection before giving up and letting go-nfs handle
// the (possibly half-open) socket.
rpcVersionFilterPeekTimeout = 10 * time.Second
// peeked length: 4-byte fragment marker + 24 bytes of fixed RPC header
// (xid + msg_type + rpcvers + prog + vers + proc).
rpcVersionFilterPeekLen = 28
// rpcVersionFilterAcceptBackoff is how long the accept loop sleeps
// after a transient Accept() error (EMFILE, EAGAIN, ECONNABORTED,
// etc.) before retrying. Mirrors portmapRetryBackoff in portmap.go so
// both NFS-listening goroutines back off identically under host
// resource pressure.
rpcVersionFilterAcceptBackoff = 50 * time.Millisecond
supportedNFSVer = 3
)
// versionFilterListener moves the per-connection RPC peek off the
// Listener.Accept() critical path. Peeking inline would let one slow or idle
// client (or a TCP three-way handshake without any RPC payload) hold
// rpcVersionFilterPeekTimeout — i.e. up to 10 seconds — of head-of-line
// blocking against every other connect, since gonfs.Serve only calls Accept
// serially. Instead, a background goroutine runs the inner Accept() loop and
// hands each raw conn to its own short-lived goroutine that does the peek;
// validated conns are sent on acceptCh and the wrapper's Accept() reads from
// that channel. Rejected conns never reach the channel — PROG_MISMATCH is
// already on the wire by the time the per-conn goroutine returns.
type versionFilterListener struct {
inner net.Listener
acceptCh chan net.Conn
// closed is signalled either by Close() or by the accept loop after the
// inner listener returns a terminal error. After it fires Accept() will
// stop blocking and return acceptErr (or net.ErrClosed if none).
closed chan struct{}
closeOnce sync.Once
mu sync.Mutex
acceptErr error
// inFlight tracks raw (pre-peek) conns that are currently in
// handleConn so Close() can break their Peek() deadline by closing
// them, instead of waiting up to rpcVersionFilterPeekTimeout per
// idle client for the timeout to fire on its own.
inFlight map[net.Conn]struct{}
startOnce sync.Once
wg sync.WaitGroup
}
func newVersionFilterListener(inner net.Listener) net.Listener {
return &versionFilterListener{
inner: inner,
acceptCh: make(chan net.Conn),
closed: make(chan struct{}),
}
}
// start lazily kicks off the background accept loop the first time someone
// calls Accept(). This matches the behaviour of the embedded-listener form we
// replaced — no goroutines spawn just from constructing the wrapper.
func (l *versionFilterListener) start() {
l.startOnce.Do(func() {
l.wg.Add(1)
go l.acceptLoop()
})
}
func (l *versionFilterListener) Accept() (net.Conn, error) {
l.start()
select {
case c := <-l.acceptCh:
return c, nil
case <-l.closed:
return nil, l.terminalErr()
}
}
func (l *versionFilterListener) Close() error {
l.signalClose()
err := l.inner.Close()
// Eagerly close any raw conns currently blocked in filterFirstRPCFrame's
// Peek so handleConn returns promptly. Without this, an idle client
// (TCP handshake without any RPC payload) holds Close() up to
// rpcVersionFilterPeekTimeout — 10s of stop-the-world per such conn.
l.evictInFlight()
l.wg.Wait()
return err
}
func (l *versionFilterListener) Addr() net.Addr {
return l.inner.Addr()
}
func (l *versionFilterListener) signalClose() {
l.closeOnce.Do(func() {
close(l.closed)
})
}
func (l *versionFilterListener) terminalErr() error {
l.mu.Lock()
defer l.mu.Unlock()
if l.acceptErr != nil {
return l.acceptErr
}
return net.ErrClosed
}
// trackInFlight records a raw conn that's about to be peeked, so Close()
// can break its Peek() deadline by closing it. Returns false if shutdown
// has already started; the caller must close the conn and bail.
func (l *versionFilterListener) trackInFlight(c net.Conn) bool {
l.mu.Lock()
defer l.mu.Unlock()
select {
case <-l.closed:
return false
default:
}
if l.inFlight == nil {
l.inFlight = make(map[net.Conn]struct{})
}
l.inFlight[c] = struct{}{}
return true
}
func (l *versionFilterListener) untrackInFlight(c net.Conn) {
l.mu.Lock()
defer l.mu.Unlock()
delete(l.inFlight, c)
}
// evictInFlight closes every conn currently in handleConn so their
// in-flight Peek() returns immediately. delete(nil-map, k) is a no-op,
// so handleConn's deferred untrackInFlight is safe even after we've
// nilled the map here.
func (l *versionFilterListener) evictInFlight() {
l.mu.Lock()
conns := l.inFlight
l.inFlight = nil
l.mu.Unlock()
for c := range conns {
_ = c.Close()
}
}
func (l *versionFilterListener) acceptLoop() {
defer l.wg.Done()
defer l.signalClose()
for {
conn, err := l.inner.Accept()
if err != nil {
// Permanent: the inner listener has been closed (Close(),
// shutdown, or an unrecoverable error from the OS). Surface
// the error to Accept() and stop.
if errors.Is(err, net.ErrClosed) {
l.mu.Lock()
if l.acceptErr == nil {
l.acceptErr = err
}
l.mu.Unlock()
return
}
// Transient (EMFILE, EAGAIN, ECONNABORTED on accept,
// timeouts if a deadline is ever set): treating these as
// terminal would tear the whole NFS server down on a
// resource blip. Back off briefly and retry, mirroring the
// pattern in portmap.go's serveTCP.
glog.V(1).Infof("nfs version filter: transient accept error: %v", err)
select {
case <-l.closed:
return
case <-time.After(rpcVersionFilterAcceptBackoff):
continue
}
}
l.wg.Add(1)
go l.handleConn(conn)
}
}
// handleConn runs the version peek for a single accepted conn. Because each
// conn has its own goroutine, a slow client only blocks itself; concurrent
// peeks proceed in parallel up to whatever the runtime can schedule. If
// Close() fires before the peek completes we drop the validated conn so we
// don't leak a socket past shutdown.
func (l *versionFilterListener) handleConn(conn net.Conn) {
defer l.wg.Done()
if !l.trackInFlight(conn) {
// Shutdown beat us: don't start the Peek that we'd then
// have to break, just close the raw conn.
_ = conn.Close()
return
}
defer l.untrackInFlight(conn)
wrapped, accepted := filterFirstRPCFrame(conn)
if !accepted {
// Already replied with PROG_MISMATCH and closed conn.
return
}
select {
case l.acceptCh <- wrapped:
case <-l.closed:
_ = wrapped.Close()
}
}
// peekedConn returns the bytes that filterFirstRPCFrame already buffered when
// it peeked the first RPC header, then transparently reads from the
// underlying connection. Writes go straight to the socket; the bufio reader
// only buffers the read side.
type peekedConn struct {
net.Conn
reader io.Reader
}
func (c *peekedConn) Read(p []byte) (int, error) {
return c.reader.Read(p)
}
// filterFirstRPCFrame inspects the first RPC frame on conn and decides whether
// to pass it through to go-nfs. Returns (wrappedConn, true) if the frame is
// for a supported (program, version) — including programs we don't recognize,
// since go-nfs handles its own PROG_UNAVAIL response. Returns (nil, false) if
// we already replied with PROG_MISMATCH and closed conn.
//
// On peek failure (early close, deadline) we pass the connection through:
// returning an error here would silently drop legitimate clients on a flaky
// link, and go-nfs has its own per-frame error handling.
func filterFirstRPCFrame(conn net.Conn) (net.Conn, bool) {
r := bufio.NewReader(conn)
deadlineErr := conn.SetReadDeadline(time.Now().Add(rpcVersionFilterPeekTimeout))
hdr, peekErr := r.Peek(rpcVersionFilterPeekLen)
// Always clear the deadline before returning to go-nfs; failing to do so
// would make every subsequent Read() time out at the same instant.
if deadlineErr == nil {
_ = conn.SetReadDeadline(time.Time{})
}
if peekErr != nil {
return &peekedConn{Conn: conn, reader: r}, true
}
fragMark := binary.BigEndian.Uint32(hdr[0:4])
if fragMark&(1<<31) == 0 {
// Multi-fragment record: portmap-style filtering of the first frame
// would need reassembly. Fall through to go-nfs which handles this.
return &peekedConn{Conn: conn, reader: r}, true
}
// Peek(28) can read across record boundaries — the first fragment may
// be shorter than the fixed RPC CALL header (24 bytes after the marker)
// with the remaining bytes belonging to the *next* RPC. Indexing into
// hdr[16:24] without first checking the fragment length would parse
// fields from a different RPC and either spuriously reject or pass it.
// Pass through if the first fragment can't possibly hold a full header
// and let go-nfs surface the framing error.
if fragLen := fragMark &^ uint32(1<<31); fragLen < 24 {
return &peekedConn{Conn: conn, reader: r}, true
}
xid := binary.BigEndian.Uint32(hdr[4:8])
if msgType := binary.BigEndian.Uint32(hdr[8:12]); msgType != rpcMsgCall {
// Not a CALL — odd, but pass through.
return &peekedConn{Conn: conn, reader: r}, true
}
if rpcVers := binary.BigEndian.Uint32(hdr[12:16]); rpcVers != 2 {
// ONC RPC v2 is the only version we and go-nfs speak; if the
// rpcvers field is anything else the rest of the header is
// untrusted (could be a non-RPC protocol that happens to share
// the port, or simply garbled traffic). Don't synthesize a
// PROG_MISMATCH that lies about supporting NFS — pass it
// through and let go-nfs / RFC 5531 §9 RPC_MISMATCH handling
// in the upstream library do the right thing.
return &peekedConn{Conn: conn, reader: r}, true
}
prog := binary.BigEndian.Uint32(hdr[16:20])
vers := binary.BigEndian.Uint32(hdr[20:24])
switch prog {
case nfsProgram, mountProgram:
default:
// Unknown program: let go-nfs reply PROG_UNAVAIL itself.
return &peekedConn{Conn: conn, reader: r}, true
}
if vers == supportedNFSVer {
return &peekedConn{Conn: conn, reader: r}, true
}
glog.V(1).Infof("nfs: rejecting client %s with PROG_MISMATCH: prog=%d vers=%d (supported=%d)",
conn.RemoteAddr(), prog, vers, supportedNFSVer)
if err := writeProgMismatchTCP(conn, xid, supportedNFSVer, supportedNFSVer); err != nil {
glog.V(1).Infof("nfs: write PROG_MISMATCH to %s: %v", conn.RemoteAddr(), err)
}
_ = conn.Close()
return nil, false
}
// writeProgMismatchTCP encodes a single-frame TCP RPC reply carrying
// MSG_ACCEPTED + PROG_MISMATCH along with the supported version range, per
// RFC 5531 section 9. The frame layout is:
//
// uint32 fragment_header (last-fragment | length)
// uint32 xid
// uint32 msg_type=REPLY(1)
// uint32 reply_stat=MSG_ACCEPTED(0)
// uint32 verf_flavor=AUTH_NONE(0)
// uint32 verf_len=0
// uint32 accept_stat=PROG_MISMATCH(2)
// uint32 low
// uint32 high
const progMismatchBodyLen = 32
func writeProgMismatchTCP(w io.Writer, xid, low, high uint32) error {
out := make([]byte, 4+progMismatchBodyLen)
binary.BigEndian.PutUint32(out[0:4], uint32(progMismatchBodyLen)|(1<<31))
binary.BigEndian.PutUint32(out[4:8], xid)
binary.BigEndian.PutUint32(out[8:12], rpcMsgReply)
binary.BigEndian.PutUint32(out[12:16], rpcMsgAccepted)
binary.BigEndian.PutUint32(out[16:20], rpcAuthNone)
binary.BigEndian.PutUint32(out[20:24], 0) // verf opaque length (always zero for AUTH_NONE)
binary.BigEndian.PutUint32(out[24:28], rpcAcceptProgMismatch)
binary.BigEndian.PutUint32(out[28:32], low)
binary.BigEndian.PutUint32(out[32:36], high)
_, err := w.Write(out)
return err
}

View File

@@ -1,561 +0,0 @@
package nfs
import (
"encoding/binary"
"errors"
"io"
"net"
"sync"
"testing"
"time"
)
// buildRPCCallFrame constructs a single TCP-framed RPC CALL header without
// procedure arguments — enough for the version filter to decide whether to
// reject the connection. The frame layout matches RFC 5531 (Open Network
// Computing RPC v2): a 4-byte fragment marker (last-fragment bit set on a
// 40-byte body) followed by xid + msg_type=CALL + rpcvers=2 + prog + vers +
// proc + two empty AUTH_NONE opaque_auth structs.
func buildRPCCallFrame(xid, prog, vers, proc uint32) []byte {
const bodyLen = 40
frame := make([]byte, 4+bodyLen)
binary.BigEndian.PutUint32(frame[0:4], uint32(bodyLen)|(1<<31))
binary.BigEndian.PutUint32(frame[4:8], xid)
binary.BigEndian.PutUint32(frame[8:12], 0) // msg_type CALL
binary.BigEndian.PutUint32(frame[12:16], 2)
binary.BigEndian.PutUint32(frame[16:20], prog)
binary.BigEndian.PutUint32(frame[20:24], vers)
binary.BigEndian.PutUint32(frame[24:28], proc)
// cred + verf both AUTH_NONE / length 0
return frame
}
// readPROGMismatchReply parses a TCP-framed PROG_MISMATCH reply produced by
// writeProgMismatchTCP and returns the xid plus the supported (low, high)
// version range advertised by the server.
func readPROGMismatchReply(t *testing.T, conn net.Conn) (xid, low, high uint32) {
t.Helper()
_ = conn.SetReadDeadline(time.Now().Add(2 * time.Second))
buf := make([]byte, 4+progMismatchBodyLen)
n, err := io.ReadFull(conn, buf)
if err != nil {
t.Fatalf("read reply: %v (got %d bytes)", err, n)
}
frag := binary.BigEndian.Uint32(buf[0:4])
if frag&(1<<31) == 0 {
t.Fatalf("reply frame missing last-fragment bit: %x", frag)
}
if got := frag &^ (1 << 31); got != progMismatchBodyLen {
t.Fatalf("reply body length=%d want %d", got, progMismatchBodyLen)
}
xid = binary.BigEndian.Uint32(buf[4:8])
if mt := binary.BigEndian.Uint32(buf[8:12]); mt != 1 {
t.Fatalf("reply msg_type=%d want REPLY(1)", mt)
}
if rs := binary.BigEndian.Uint32(buf[12:16]); rs != 0 {
t.Fatalf("reply reply_stat=%d want MSG_ACCEPTED(0)", rs)
}
if as := binary.BigEndian.Uint32(buf[24:28]); as != 2 {
t.Fatalf("reply accept_stat=%d want PROG_MISMATCH(2)", as)
}
low = binary.BigEndian.Uint32(buf[28:32])
high = binary.BigEndian.Uint32(buf[32:36])
return
}
func TestVersionFilterRejectsNFSv4WithProgMismatch(t *testing.T) {
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
// In a real server, accepted conns are passed to go-nfs. We just need
// to drive Accept() so the filter runs; the test never sees a wrapped
// conn because the v4 frame is rejected.
accepted := make(chan net.Conn, 1)
go func() {
for {
c, aerr := listener.Accept()
if aerr != nil {
return
}
accepted <- c
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatalf("dial: %v", err)
}
defer conn.Close()
// NFSv4 NULL: the first probe Linux mount.nfs sends when trying v4.
if _, err := conn.Write(buildRPCCallFrame(0xdeadbeef, nfsProgram, 4, 0)); err != nil {
t.Fatalf("write: %v", err)
}
xid, low, high := readPROGMismatchReply(t, conn)
const wantXID uint32 = 0xdeadbeef
if xid != wantXID {
t.Errorf("xid=%x want %x", xid, wantXID)
}
if low != supportedNFSVer || high != supportedNFSVer {
t.Errorf("supported range=(%d,%d) want (%d,%d)", low, high, supportedNFSVer, supportedNFSVer)
}
// Filter must close the connection after replying so the client knows
// not to send another RPC on this socket. Insist on io.EOF specifically:
// "any error" would let a stuck (but still-open) connection pass this
// check via a deadline timeout, which is exactly the regression we want
// to catch.
_ = conn.SetReadDeadline(time.Now().Add(time.Second))
one := make([]byte, 1)
n, err := conn.Read(one)
switch {
case err == nil:
t.Errorf("expected EOF after PROG_MISMATCH but read returned %d bytes", n)
case !errors.Is(err, io.EOF):
t.Errorf("expected io.EOF after PROG_MISMATCH, got %v (likely a regression where the filter replies but does not close)", err)
}
select {
case c := <-accepted:
c.Close()
t.Error("rejected connection should not be returned to caller")
case <-time.After(100 * time.Millisecond):
}
}
func TestVersionFilterRejectsMOUNTv4WithProgMismatch(t *testing.T) {
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
go func() {
for {
c, aerr := listener.Accept()
if aerr != nil {
return
}
c.Close()
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
if _, err := conn.Write(buildRPCCallFrame(42, mountProgram, 4, 0)); err != nil {
t.Fatal(err)
}
xid, low, high := readPROGMismatchReply(t, conn)
if xid != 42 {
t.Errorf("xid=%d want 42", xid)
}
if low != supportedNFSVer || high != supportedNFSVer {
t.Errorf("supported range=(%d,%d) want (3,3)", low, high)
}
}
func TestVersionFilterPassesThroughNFSv3(t *testing.T) {
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
got := make(chan []byte, 1)
go func() {
c, aerr := listener.Accept()
if aerr != nil {
return
}
defer c.Close()
buf := make([]byte, 44)
_, rerr := io.ReadFull(c, buf)
if rerr != nil {
return
}
got <- buf
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
frame := buildRPCCallFrame(7, nfsProgram, 3, 0)
if _, err := conn.Write(frame); err != nil {
t.Fatal(err)
}
select {
case received := <-got:
if string(received) != string(frame) {
t.Error("v3 frame was modified or partially consumed by filter")
}
case <-time.After(2 * time.Second):
t.Fatal("v3 frame not delivered to inner accept handler")
}
}
func TestVersionFilterPassesThroughUnknownProgram(t *testing.T) {
// The filter should only police NFS / MOUNT versions; other programs
// reach go-nfs which already responds PROG_UNAVAIL itself. Otherwise
// adding a new program (e.g. NLM) here would require updating the
// filter, which would defeat the point of using it as a thin shim.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
delivered := make(chan struct{}, 1)
go func() {
c, aerr := listener.Accept()
if aerr != nil {
return
}
defer c.Close()
buf := make([]byte, 44)
if _, rerr := io.ReadFull(c, buf); rerr == nil {
delivered <- struct{}{}
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
// Program 100021 is NLM, which weed nfs doesn't run; let go-nfs handle
// the unsupported-program reply.
if _, err := conn.Write(buildRPCCallFrame(99, 100021, 4, 0)); err != nil {
t.Fatal(err)
}
select {
case <-delivered:
case <-time.After(2 * time.Second):
t.Fatal("unknown-program frame should pass through filter")
}
}
// transientErrListener wraps a real net.Listener but injects a configurable
// number of transient Accept() errors before delegating. It exists only to
// regression-test the version filter's transient-retry behaviour without
// having to provoke real EMFILE conditions on the host.
type transientErrListener struct {
inner net.Listener
mu sync.Mutex
remaining int
}
type fakeAcceptError struct{}
func (fakeAcceptError) Error() string { return "fake transient accept error" }
func (l *transientErrListener) Accept() (net.Conn, error) {
l.mu.Lock()
if l.remaining > 0 {
l.remaining--
l.mu.Unlock()
return nil, fakeAcceptError{}
}
l.mu.Unlock()
return l.inner.Accept()
}
func (l *transientErrListener) Close() error { return l.inner.Close() }
func (l *transientErrListener) Addr() net.Addr { return l.inner.Addr() }
func TestVersionFilterRetriesTransientAcceptErrors(t *testing.T) {
// Regression test: previously the accept loop exited on any error
// from the inner listener, which meant a single transient EMFILE /
// EAGAIN under host resource pressure would tear the entire NFS
// server down. Inject a few fake transient errors and assert the
// filter still delivers the next real connection.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
injected := &transientErrListener{inner: innerListener, remaining: 3}
listener := newVersionFilterListener(injected)
delivered := make(chan struct{}, 1)
go func() {
c, aerr := listener.Accept()
if aerr != nil {
return
}
defer c.Close()
buf := make([]byte, 44)
if _, rerr := io.ReadFull(c, buf); rerr == nil {
delivered <- struct{}{}
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
if _, err := conn.Write(buildRPCCallFrame(1, nfsProgram, 3, 0)); err != nil {
t.Fatal(err)
}
// 3 transient errors × ~50ms backoff plus normal accept latency. Allow
// a generous bound so flakes on slow CI don't surface here, but still
// tight enough to catch a regression to "any error is terminal".
select {
case <-delivered:
case <-time.After(2 * time.Second):
t.Fatal("filter did not retry transient Accept() errors and recover")
}
}
func TestVersionFilterCloseReturnsPromptlyWithIdlePeekConns(t *testing.T) {
// Regression test: Close() used to wait on every handleConn goroutine
// via wg.Wait, but those goroutines could be stuck in
// filterFirstRPCFrame's Peek() until rpcVersionFilterPeekTimeout (10s)
// fired. An idle client that completed a TCP handshake but never sent
// a byte would stretch shutdown by up to that timeout per conn.
// Close() now eagerly closes any tracked in-flight raw conns, which
// forces Peek() to return immediately and lets handleConn finish.
//
// Black-box test: only observes Close() latency. With the regression
// in place Close() would block ~10s; with the fix it returns in well
// under a second.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
listener := newVersionFilterListener(innerListener)
// Drive Accept once so the background accept loop is running.
go func() { _, _ = listener.Accept() }()
const idleConns = 4
dialed := make([]net.Conn, 0, idleConns)
defer func() {
for _, c := range dialed {
_ = c.Close()
}
}()
for i := 0; i < idleConns; i++ {
c, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
dialed = append(dialed, c)
}
// Give handleConn time to invoke Peek for each idle conn — without
// this the test could race ahead and Close() while no goroutine has
// actually started peeking yet, masking the regression.
time.Sleep(100 * time.Millisecond)
// Close() must finish in well under rpcVersionFilterPeekTimeout (10s).
// 2s is a generous bound that still clearly distinguishes "broke the
// peek by closing the conn" from "waited for the peek deadline".
start := time.Now()
if err := listener.Close(); err != nil {
t.Errorf("Close: %v", err)
}
elapsed := time.Since(start)
if elapsed > 2*time.Second {
t.Errorf("Close took %v with %d idle pre-peek conns; should be sub-second once they're forcibly closed", elapsed, idleConns)
}
}
func TestVersionFilterPassesThroughNonV2RPC(t *testing.T) {
// Anything that isn't ONC RPC v2 isn't ours to classify — even if the
// bytes at hdr[16:24] happen to look like nfsProgram + vers=4, we
// shouldn't synthesize a PROG_MISMATCH advertising NFSv3 support for
// what could be a completely different protocol sharing the port.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
delivered := make(chan struct{}, 1)
go func() {
c, aerr := listener.Accept()
if aerr != nil {
return
}
defer c.Close()
buf := make([]byte, 44)
if _, rerr := io.ReadFull(c, buf); rerr == nil {
delivered <- struct{}{}
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
// Build a CALL frame, then overwrite the rpcvers field with 99.
// Without the rpcvers guard the filter would still parse prog=NFS,
// vers=4 from the same buffer and reject with PROG_MISMATCH.
frame := buildRPCCallFrame(0xfeedbeef, nfsProgram, 4, 0)
binary.BigEndian.PutUint32(frame[12:16], 99) // bogus rpcvers
if _, err := conn.Write(frame); err != nil {
t.Fatal(err)
}
// Try to read a PROG_MISMATCH reply with a short deadline — none
// should arrive because the filter shouldn't pretend to know what
// this protocol is.
_ = conn.SetReadDeadline(time.Now().Add(500 * time.Millisecond))
hdr := make([]byte, 4)
if n, err := io.ReadFull(conn, hdr); err == nil && n == 4 {
if got := binary.BigEndian.Uint32(hdr); got == uint32(progMismatchBodyLen)|(1<<31) {
t.Fatal("filter sent PROG_MISMATCH for a non-v2 RPC frame")
}
}
// And the connection should reach the inner accept handler.
select {
case <-delivered:
case <-time.After(2 * time.Second):
t.Fatal("non-v2 RPC frame should pass through filter to inner accept")
}
}
func TestVersionFilterIgnoresShortFirstFragment(t *testing.T) {
// Peek(28) can read past the first fragment's body when the body is
// shorter than the 24-byte fixed RPC CALL header. Without a length
// check, the prog/vers fields would be sourced from bytes belonging to
// the *next* RPC (or a syntactic accident), and the filter could
// spuriously reject the connection. Send a 12-byte first fragment whose
// trailing peek-region bytes look like an NFSv4 CALL header, and assert
// the filter does NOT emit a PROG_MISMATCH reply.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
go func() {
for {
c, aerr := listener.Accept()
if aerr != nil {
return
}
c.Close()
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
const shortBody = 12
payload := make([]byte, 4+24)
binary.BigEndian.PutUint32(payload[0:4], shortBody|(1<<31)) // last-fragment, body=12
// Bytes 4..16 are the actual fragment body (12 bytes — too short for a
// CALL header; the filter must not look at them as one).
// Bytes 16..28 sit past the fragment in the peek window. If we were to
// (incorrectly) read prog/vers from hdr[16:24], we'd see NFS+v4 here.
binary.BigEndian.PutUint32(payload[16:20], nfsProgram)
binary.BigEndian.PutUint32(payload[20:24], 4)
if _, err := conn.Write(payload); err != nil {
t.Fatal(err)
}
// If the filter erroneously rejected, it would send a 36-byte TCP RPC
// reply (4-byte frag marker + 32-byte PROG_MISMATCH body) within ms.
// Wait briefly and assert nothing PROG_MISMATCH-shaped came back.
_ = conn.SetReadDeadline(time.Now().Add(500 * time.Millisecond))
hdr := make([]byte, 4)
n, err := io.ReadFull(conn, hdr)
if err == nil && n == 4 {
if got := binary.BigEndian.Uint32(hdr); got == uint32(progMismatchBodyLen)|(1<<31) {
t.Fatal("filter sent PROG_MISMATCH on a short fragment whose trailing peek bytes only superficially resembled a v4 call")
}
}
// Anything else (timeout, EOF, or unrelated bytes) is fine — we only
// care that the filter did NOT misclassify the short fragment.
}
func TestVersionFilterDoesNotHeadOfLineBlockOnSlowConn(t *testing.T) {
// Regression test: the previous implementation peeked the first RPC
// frame inline in Accept(), so an idle TCP-only connect would block
// every later Accept() call for up to rpcVersionFilterPeekTimeout.
// The peek now runs in a per-conn goroutine; a fast follow-up connect
// must reach the inner accept handler well before the slow conn's
// peek deadline.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
delivered := make(chan struct{}, 1)
go func() {
c, aerr := listener.Accept()
if aerr != nil {
return
}
defer c.Close()
buf := make([]byte, 44)
if _, rerr := io.ReadFull(c, buf); rerr == nil {
delivered <- struct{}{}
}
}()
// Slow client: connect, never write. Holds a goroutine inside the
// filter peeking until the deadline, but must not block the next conn.
slowConn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer slowConn.Close()
// Fast client: send a valid v3 frame straight away; this conn must be
// delivered to the inner accept handler without waiting for slowConn.
fastConn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer fastConn.Close()
if _, err := fastConn.Write(buildRPCCallFrame(11, nfsProgram, 3, 0)); err != nil {
t.Fatal(err)
}
// Bound the wait well below rpcVersionFilterPeekTimeout (10s) so a
// regression to inline peeking would clearly time out here.
select {
case <-delivered:
case <-time.After(2 * time.Second):
t.Fatal("fast conn should not be head-of-line blocked by slow conn's peek")
}
}

View File

@@ -1,250 +0,0 @@
package nfs
import (
"context"
"errors"
"fmt"
"net"
"sync"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/wdclient"
gonfs "github.com/willscott/go-nfs"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
type Option struct {
Filer pb.ServerAddress
BindIp string
Port int
FilerRootPath string
ReadOnly bool
AllowedClients []string
VolumeServerAccess string
GrpcDialOption grpc.DialOption
// PortmapBind, when non-empty, enables a built-in portmap v2 responder
// on <PortmapBind>:111 advertising the NFS v3 and MOUNT v3 services at
// Port. Empty (the default) disables portmap; clients must then bypass
// portmap with mount -o port=,mountport=,proto=tcp,mountproto=tcp.
PortmapBind string
}
type Server struct {
option *Option
exportRoot util.FullPath
exportID uint32
signature int32
handleLimit int
clientAuthorizer *clientAuthorizer
sharedReaderCache *filer.ReaderCache
chunkInvalidator chunkInvalidator
filerClient *wdclient.FilerClient
newUploader func() (chunkUploader, error)
withFilerClient filerClientExecutor
withInternalClient internalClientExecutor
rootFSOnce sync.Once
rootFS *seaweedFileSystem
}
func NewServer(option *Option) (*Server, error) {
if option == nil {
return nil, errors.New("nfs option is required")
}
if option.Port <= 0 {
return nil, fmt.Errorf("nfs port must be positive: %d", option.Port)
}
if option.FilerRootPath == "" {
option.FilerRootPath = "/"
}
if option.VolumeServerAccess == "" {
option.VolumeServerAccess = "direct"
}
if option.GrpcDialOption == nil {
option.GrpcDialOption = grpc.WithTransportCredentials(insecure.NewCredentials())
}
clientAuthorizer, err := newClientAuthorizer(option.AllowedClients)
if err != nil {
return nil, err
}
var filerClient *wdclient.FilerClient
if option.VolumeServerAccess != "filerProxy" {
var opts *wdclient.FilerClientOption
if option.VolumeServerAccess == "publicUrl" {
opts = &wdclient.FilerClientOption{UrlPreference: wdclient.PreferPublicUrl}
}
filerClient = wdclient.NewFilerClient([]pb.ServerAddress{option.Filer}, option.GrpcDialOption, "", opts)
}
exportRoot := normalizeExportRoot(util.FullPath(option.FilerRootPath))
signature := util.RandomInt32()
return &Server{
option: option,
exportRoot: exportRoot,
exportID: exportIDForRoot(exportRoot),
signature: signature,
handleLimit: 1 << 20,
clientAuthorizer: clientAuthorizer,
filerClient: filerClient,
newUploader: newChunkUploader,
withFilerClient: newFilerClientExecutor(option, signature),
withInternalClient: newInternalClientExecutor(option, signature),
}, nil
}
func (s *Server) Start() error {
listener, err := net.Listen("tcp", fmt.Sprintf("%s:%d", s.option.BindIp, s.option.Port))
if err != nil {
return fmt.Errorf("listen nfs on %s:%d: %w", s.option.BindIp, s.option.Port, err)
}
// MOUNT v3 over UDP runs alongside the TCP NFS listener on the same
// port. The kernel default for mountproto is UDP in many setups, so
// without this responder a plain `mount -t nfs <host>:<export> /mnt`
// gets EPROTONOSUPPORT during the MOUNT phase even though the TCP
// NFS path is fine.
mountUDP := newMountUDPServer(s.option.BindIp, s.option.Port, s)
if err := mountUDP.Start(); err != nil {
_ = listener.Close()
return fmt.Errorf("start mount udp: %w", err)
}
defer func() {
_ = mountUDP.Close()
}()
glog.V(0).Infof("MOUNT v3 UDP responder listening on %s:%d", s.option.BindIp, s.option.Port)
var portmap *portmapServer
if s.option.PortmapBind != "" {
portmap = newPortmapServer(s.option.PortmapBind, portmapPort, uint32(s.option.Port))
if pmErr := portmap.Start(); pmErr != nil {
_ = listener.Close()
return fmt.Errorf("start portmap: %w", pmErr)
}
glog.V(0).Infof("NFS portmap responder listening on %s:%d (NFS v3 tcp=%d, MOUNT v3 tcp=%d, MOUNT v3 udp=%d)",
s.option.PortmapBind, portmapPort, s.option.Port, s.option.Port, s.option.Port)
defer func() {
if portmap != nil {
_ = portmap.Close()
}
}()
}
s.logMountHint()
return s.serve(listener)
}
// logMountHint prints a copy-pasteable Linux mount command so operators can
// see at startup how to mount the export from a client.
//
// With -portmap.bind set, MOUNT is now answered over both TCP and UDP, so a
// plain `mount -t nfs host:/export /mnt` works — there is no longer any
// kernel-default mountproto path that fails. Without -portmap.bind the
// client still has to bypass portmap entirely via the explicit
// port=/mountport=/proto=/mountproto= options.
func (s *Server) logMountHint() {
exportPath := string(s.exportRoot)
if s.option.PortmapBind != "" {
glog.V(0).Infof("mount example: mount -t nfs -o nfsvers=3,nolock <host>:%s <mountpoint>", exportPath)
glog.V(0).Infof("(MOUNT v3 is served over both TCP and UDP, so no mountproto override is needed.)")
return
}
glog.V(0).Infof("mount example (bypasses portmap): mount -t nfs -o nfsvers=3,nolock,noacl,port=%d,mountport=%d,proto=tcp,mountproto=tcp <host>:%s <mountpoint>",
s.option.Port, s.option.Port, exportPath)
glog.V(0).Infof("tip: pass -portmap.bind to enable the built-in portmap responder on port 111 so plain `mount -t nfs host:%s /mnt` works.", exportPath)
}
func (s *Server) serve(listener net.Listener) error {
if s.filerClient != nil {
defer s.filerClient.Close()
}
if s.clientAuthorizer != nil && s.clientAuthorizer.enabled {
listener = &allowlistListener{
Listener: listener,
authorizer: s.clientAuthorizer,
}
}
listener = newVersionFilterListener(listener)
handler, err := s.newHandler()
if err != nil {
_ = listener.Close()
return err
}
followCtx, followCancel := context.WithCancel(context.Background())
defer followCancel()
followDone := make(chan struct{})
go func() {
defer close(followDone)
s.runMetadataInvalidationLoop(followCtx)
}()
defer func() {
followCancel()
<-followDone
}()
glog.V(0).Infof("Start Seaweed NFS Server filer=%s bind=%s export=%s exportId=%d readOnly=%t allowedClients=%d volumeServerAccess=%s",
s.option.Filer,
listener.Addr(),
s.exportRoot,
s.exportID,
s.option.ReadOnly,
len(s.option.AllowedClients),
s.option.VolumeServerAccess,
)
return gonfs.Serve(listener, handler)
}
func (s *Server) newHandler() (*Handler, error) {
if s == nil {
return nil, errors.New("nfs server is not configured")
}
return &Handler{
server: s,
rootFS: s.rootFilesystem(),
}, nil
}
// rootFilesystem returns a single seaweedFileSystem rooted at the
// configured export, building it on first call. Both the TCP handler
// (via newHandler) and the UDP MOUNT path use the same instance so
// they share the chunk reader cache and don't reconstruct a wrapper
// per request.
func (s *Server) rootFilesystem() *seaweedFileSystem {
s.rootFSOnce.Do(func() {
s.rootFS = newSeaweedFileSystem(s, s.exportRoot, s.sharedReaderCache)
if s.sharedReaderCache == nil {
s.sharedReaderCache = s.rootFS.readerCache
}
if s.chunkInvalidator == nil {
s.chunkInvalidator = s.sharedReaderCache
}
})
return s.rootFS
}
func (s *Server) WithFilerClient(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error {
if s == nil || s.withFilerClient == nil {
return errors.New("nfs filer client is not configured")
}
return s.withFilerClient(streamingMode, fn)
}
func (s *Server) LookupFn() wdclient.LookupFileIdFunctionType {
if s == nil {
return nil
}
if s.option != nil && s.option.VolumeServerAccess == "filerProxy" {
return func(ctx context.Context, fileID string) ([]string, error) {
return []string{fmt.Sprintf("http://%s/?proxyChunkId=%s", s.option.Filer.ToHttpAddress(), fileID)}, nil
}
}
if s.filerClient != nil {
return s.filerClient.GetLookupFileIdFunction()
}
return nil
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,40 +0,0 @@
package nfs
import (
"io"
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
)
type chunkUploader interface {
UploadWithRetry(
filerClient filer_pb.FilerClient,
assignRequest *filer_pb.AssignVolumeRequest,
uploadOption *operation.UploadOption,
genFileUrlFn func(host, fileId string) string,
reader io.Reader,
) (fileId string, uploadResult *operation.UploadResult, err error, data []byte)
}
type operationChunkUploader struct {
uploader *operation.Uploader
}
func (u operationChunkUploader) UploadWithRetry(
filerClient filer_pb.FilerClient,
assignRequest *filer_pb.AssignVolumeRequest,
uploadOption *operation.UploadOption,
genFileUrlFn func(host, fileId string) string,
reader io.Reader,
) (string, *operation.UploadResult, error, []byte) {
return u.uploader.UploadWithRetry(filerClient, assignRequest, uploadOption, genFileUrlFn, reader)
}
func newChunkUploader() (chunkUploader, error) {
uploader, err := operation.NewUploader()
if err != nil {
return nil, err
}
return operationChunkUploader{uploader: uploader}, nil
}