diff --git a/weed/s3api/iceberg/path_validation.go b/weed/s3api/iceberg/path_validation.go new file mode 100644 index 000000000..faf044881 --- /dev/null +++ b/weed/s3api/iceberg/path_validation.go @@ -0,0 +1,70 @@ +package iceberg + +import ( + "net/http" + "strings" + + "github.com/gorilla/mux" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" +) + +// validateRequestPath rejects Iceberg REST requests whose captured +// {prefix}/{namespace}/{table} mux vars would produce a parent-directory +// traversal when joined into a filer path. The iceberg router runs with +// SkipClean(true), so `..` survives routing; downstream path.Join calls +// (stageCreateMarkerDir, location builders, etc.) then collapse it and +// escape the table-bucket directory. +// +// {prefix} maps to a table-bucket name; {table} is a single path segment; +// {namespace} is unit-separator (0x1F) joined parts that get flattened into +// a single dotted name for the on-disk layout — each part is validated +// individually. +func validateRequestPath(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + // Use the comma-ok form so vars only checked when the matched route + // actually captures them; when captured, an empty value is itself a + // rejection because downstream path.Join would collapse it. + if prefix, ok := vars["prefix"]; ok { + if prefix == "" || !s3_constants.IsValidBucketName(prefix) { + writeError(w, http.StatusBadRequest, "BadRequest", "invalid prefix") + return + } + } + if table, ok := vars["table"]; ok { + if table == "" || !isValidNameSegment(table) { + writeError(w, http.StatusBadRequest, "BadRequest", "invalid table name") + return + } + } + if ns, ok := vars["namespace"]; ok { + if ns == "" { + writeError(w, http.StatusBadRequest, "BadRequest", "invalid namespace") + return + } + // Reject leading/trailing/consecutive unit separators so distinct + // inputs cannot collapse to the same parsed namespace via + // parseNamespace's empty-part filter. + for _, part := range strings.Split(ns, "\x1F") { + if part == "" || !isValidNameSegment(part) { + writeError(w, http.StatusBadRequest, "BadRequest", "invalid namespace") + return + } + } + } + next.ServeHTTP(w, r) + }) +} + +// isValidNameSegment rejects a single path-segment value (bucket prefix slot, +// table name, or one namespace part) that would be unsafe to embed in a filer +// path: `.`, `..`, embedded slash/backslash, or NUL. +func isValidNameSegment(s string) bool { + if s == "" { + return true + } + if s == "." || s == ".." { + return false + } + return !strings.ContainsAny(s, "/\\\x00") +} diff --git a/weed/s3api/iceberg/path_validation_test.go b/weed/s3api/iceberg/path_validation_test.go new file mode 100644 index 000000000..eaeea0737 --- /dev/null +++ b/weed/s3api/iceberg/path_validation_test.go @@ -0,0 +1,116 @@ +package iceberg + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/gorilla/mux" +) + +func TestValidateRequestPath_RejectsTraversal(t *testing.T) { + tests := []struct { + name string + rawPath string + wantCode int + }{ + {"clean namespace+table passes", "/v1/namespaces/sales/tables/orders", http.StatusOK}, + {"clean prefixed passes", "/v1/wh/namespaces/sales/tables/orders", http.StatusOK}, + {"clean namespace only passes", "/v1/namespaces/sales", http.StatusOK}, + + // SkipClean(true) means raw `..` survives routing — these are the + // realistic traversal shapes the middleware must catch. + {"dotdot as prefix var rejected", "/v1/../namespaces/sales", http.StatusBadRequest}, + {"dotdot as namespace var rejected", "/v1/namespaces/..", http.StatusBadRequest}, + {"dotdot as namespace var prefixed rejected", "/v1/wh/namespaces/..", http.StatusBadRequest}, + {"dotdot as table var rejected", "/v1/namespaces/sales/tables/..", http.StatusBadRequest}, + {"dot as table var rejected", "/v1/namespaces/sales/tables/.", http.StatusBadRequest}, + // Iceberg clients send the 0x1F unit separator percent-encoded; mux + // decodes it before the middleware sees the namespace var. + {"unit-sep namespace with dotdot part rejected", "/v1/namespaces/sales%1F..%1Fevil", http.StatusBadRequest}, + {"leading unit-sep namespace rejected", "/v1/namespaces/%1Fsales", http.StatusBadRequest}, + {"trailing unit-sep namespace rejected", "/v1/namespaces/sales%1F", http.StatusBadRequest}, + {"consecutive unit-sep namespace rejected", "/v1/namespaces/sales%1F%1Fevil", http.StatusBadRequest}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + router := mux.NewRouter().SkipClean(true) + router.Use(validateRequestPath) + handlerCalled := false + pass := func(w http.ResponseWriter, r *http.Request) { + handlerCalled = true + w.WriteHeader(http.StatusOK) + } + router.HandleFunc("/v1/namespaces/{namespace}", pass) + router.HandleFunc("/v1/namespaces/{namespace}/tables/{table}", pass) + router.HandleFunc("/v1/{prefix}/namespaces/{namespace}", pass) + router.HandleFunc("/v1/{prefix}/namespaces/{namespace}/tables/{table}", pass) + + req := httptest.NewRequest(http.MethodGet, tt.rawPath, nil) + rr := httptest.NewRecorder() + router.ServeHTTP(rr, req) + + if rr.Code != tt.wantCode { + t.Fatalf("path %q: got status %d, want %d (body=%q)", tt.rawPath, rr.Code, tt.wantCode, rr.Body.String()) + } + if tt.wantCode == http.StatusBadRequest && handlerCalled { + t.Fatalf("path %q: inner handler reached despite rejection", tt.rawPath) + } + }) + } +} + +// Defense-in-depth: if a future route or middleware ever leaves one of the +// captured vars empty, the middleware must still reject the request. The +// default mux regex won't normally allow this. +func TestValidateRequestPath_RejectsEmptyCapturedVars(t *testing.T) { + tests := []struct { + name string + vars map[string]string + }{ + {"empty prefix", map[string]string{"prefix": "", "namespace": "ns"}}, + {"empty table", map[string]string{"namespace": "ns", "table": ""}}, + {"empty namespace", map[string]string{"namespace": ""}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + handlerCalled := false + h := validateRequestPath(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + handlerCalled = true + })) + req := mux.SetURLVars(httptest.NewRequest(http.MethodGet, "/", nil), tt.vars) + rr := httptest.NewRecorder() + h.ServeHTTP(rr, req) + if handlerCalled { + t.Fatalf("vars %v: inner handler reached despite empty capture", tt.vars) + } + }) + } +} + +func TestIsValidNameSegment(t *testing.T) { + tests := []struct { + name string + input string + want bool + }{ + {"empty ok", "", true}, + {"plain", "orders", true}, + {"with dot inside", "my.table", true}, + {"hidden", ".hidden", true}, + + {"bare dot", ".", false}, + {"bare dotdot", "..", false}, + {"contains slash", "foo/bar", false}, + {"contains backslash", "foo\\bar", false}, + {"contains nul", "foo\x00bar", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isValidNameSegment(tt.input); got != tt.want { + t.Errorf("isValidNameSegment(%q) = %v, want %v", tt.input, got, tt.want) + } + }) + } +} diff --git a/weed/s3api/iceberg/server.go b/weed/s3api/iceberg/server.go index 80f83df7d..fe280d829 100644 --- a/weed/s3api/iceberg/server.go +++ b/weed/s3api/iceberg/server.go @@ -71,6 +71,13 @@ func (s *Server) RegisterRoutes(router *mux.Router) { // Add middleware to log all requests/responses router.Use(loggingMiddleware) + // Reject `..`/`.`/NUL in {prefix}/{namespace}/{table} vars before any + // handler runs. The router uses SkipClean(true), so traversal segments + // would otherwise reach path.Join in stage-marker / location builders. + // Registered after loggingMiddleware so rejected requests still get + // audit-logged. + router.Use(validateRequestPath) + // Configuration endpoint - no auth needed for config router.HandleFunc("/v1/config", s.handleConfig).Methods(http.MethodGet) diff --git a/weed/s3api/s3_constants/header.go b/weed/s3api/s3_constants/header.go index 9cfc11f35..97afcb392 100644 --- a/weed/s3api/s3_constants/header.go +++ b/weed/s3api/s3_constants/header.go @@ -177,6 +177,41 @@ func GetBucketAndObject(r *http.Request) (bucket, object string) { return } +// IsValidObjectKey rejects S3 object keys that — after normalization +// (backslash→slash, slash collapse) — contain a `.` or `..` path segment, or +// embed a NUL byte. Such keys are collapsed by filepath.Join inside the filer +// and would escape the bucket directory, so they must not reach the gRPC layer. +// Gorilla mux URL-decodes captured vars before this runs, so `%2e%2e` is +// already `..` here. +func IsValidObjectKey(object string) bool { + if object == "" { + return true + } + if strings.ContainsRune(object, '\x00') { + return false + } + object = strings.ReplaceAll(object, "\\", "/") + for _, seg := range strings.Split(object, "/") { + if seg == "." || seg == ".." { + return false + } + } + return true +} + +// IsValidBucketName rejects bucket names captured from the URL path that are +// unsafe to use in filer path construction (`.`, `..`, contain `/` or `\`, or +// embed NUL). This is a path-safety check, not a full S3 naming-rule check. +func IsValidBucketName(bucket string) bool { + if bucket == "" { + return true + } + if bucket == "." || bucket == ".." { + return false + } + return !strings.ContainsAny(bucket, "/\\\x00") +} + // NormalizeObjectKey normalizes object keys by removing duplicate slashes and converting backslashes. // This normalizes keys from various sources (URL path, form values, etc.) to a consistent format. // It also converts Windows-style backslashes to forward slashes for cross-platform compatibility. diff --git a/weed/s3api/s3_constants/header_test.go b/weed/s3api/s3_constants/header_test.go index 17769c6cd..dc001ec0a 100644 --- a/weed/s3api/s3_constants/header_test.go +++ b/weed/s3api/s3_constants/header_test.go @@ -89,6 +89,65 @@ func TestNormalizeObjectKey(t *testing.T) { } } +func TestIsValidObjectKey(t *testing.T) { + tests := []struct { + name string + input string + want bool + }{ + {"empty", "", true}, + {"plain", "folder/file.txt", true}, + {"leading slash", "/folder/file.txt", true}, + {"trailing slash", "folder/", true}, + {"hidden file ok", ".hidden", true}, + {"dotdot in name ok", "..hidden", true}, + {"double dots inside name", "foo..bar/baz", true}, + + {"bare dotdot", "..", false}, + {"bare dot", ".", false}, + {"leading dotdot segment", "../evil-bucket/test.txt", false}, + {"leading dot-slash", "./evil/test.txt", false}, + {"nested dotdot segment", "good/../evil/test.txt", false}, + {"trailing dotdot segment", "good/..", false}, + {"backslash dotdot", "..\\evil\\test.txt", false}, + {"mixed-slash dotdot", "good\\..\\evil/test.txt", false}, + {"dotdot after duplicate slash", "good//../evil", false}, + {"nul byte", "foo\x00bar", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsValidObjectKey(tt.input); got != tt.want { + t.Errorf("IsValidObjectKey(%q) = %v, want %v", tt.input, got, tt.want) + } + }) + } +} + +func TestIsValidBucketName(t *testing.T) { + tests := []struct { + name string + input string + want bool + }{ + {"empty ok", "", true}, + {"plain", "my-bucket", true}, + {"name containing dots", "my.bucket.name", true}, + + {"bare dot", ".", false}, + {"bare dotdot", "..", false}, + {"with slash", "evil/bucket", false}, + {"with backslash", "evil\\bucket", false}, + {"with nul", "evil\x00bucket", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsValidBucketName(tt.input); got != tt.want { + t.Errorf("IsValidBucketName(%q) = %v, want %v", tt.input, got, tt.want) + } + }) + } +} + func TestRemoveDuplicateSlashes(t *testing.T) { tests := []struct { name string diff --git a/weed/s3api/s3api_path_validation.go b/weed/s3api/s3api_path_validation.go new file mode 100644 index 000000000..0ca22789c --- /dev/null +++ b/weed/s3api/s3api_path_validation.go @@ -0,0 +1,38 @@ +package s3api + +import ( + "net/http" + + "github.com/gorilla/mux" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3err" +) + +// validateRequestPath rejects requests whose captured {bucket}/{object} mux +// vars would normalize to a parent-directory traversal once joined into a +// filer path. The router runs with mux.NewRouter().SkipClean(true), so +// segments like `..` survive routing; the filer's util.JoinPath later collapses +// them via filepath.Join. Without this guard, `GET /bucket-A/../evil-bucket/k` +// matches as bucket=bucket-A, object=../evil-bucket/k, the filer resolves the +// read against evil-bucket, while IAM authorizes against bucket-A. +func validateRequestPath(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + // When a var is in the matched route it must be non-empty: an empty + // bucket would let downstream path.Join collapse it and let the object + // key pick the bucket. + if bucket, ok := vars["bucket"]; ok { + if bucket == "" || !s3_constants.IsValidBucketName(bucket) { + s3err.WriteErrorResponse(w, r, s3err.ErrInvalidRequest) + return + } + } + if object, ok := vars["object"]; ok { + if object == "" || !s3_constants.IsValidObjectKey(object) { + s3err.WriteErrorResponse(w, r, s3err.ErrInvalidRequest) + return + } + } + next.ServeHTTP(w, r) + }) +} diff --git a/weed/s3api/s3api_path_validation_test.go b/weed/s3api/s3api_path_validation_test.go new file mode 100644 index 000000000..feb9ccb16 --- /dev/null +++ b/weed/s3api/s3api_path_validation_test.go @@ -0,0 +1,87 @@ +package s3api + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/gorilla/mux" +) + +func TestValidateRequestPath_RejectsTraversal(t *testing.T) { + tests := []struct { + name string + // rawPath is sent as the Request-URI; net/http.NewRequest does not + // rewrite the path, so `..` segments survive into mux when the router + // is built with SkipClean(true) — matching the production setup in + // weed/command/s3.go. + rawPath string + wantCode int + }{ + {"clean path passes", "/bucket-a/folder/file.txt", http.StatusOK}, + {"bucket only passes", "/bucket-a", http.StatusOK}, + {"trailing slash passes", "/bucket-a/folder/", http.StatusOK}, + + {"leading dotdot rejected", "/bucket-a/../evil-bucket/test.txt", http.StatusBadRequest}, + {"nested dotdot rejected", "/bucket-a/good/../evil/test.txt", http.StatusBadRequest}, + {"backslash dotdot rejected", "/bucket-a/..\\evil\\test.txt", http.StatusBadRequest}, + {"percent-encoded dotdot rejected", "/bucket-a/%2e%2e/evil/test.txt", http.StatusBadRequest}, + {"bare dot object rejected", "/bucket-a/./evil/test.txt", http.StatusBadRequest}, + {"dotdot bucket rejected", "/../buckets/evil", http.StatusBadRequest}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + router := mux.NewRouter().SkipClean(true) + sub := router.PathPrefix("/{bucket}").Subrouter() + sub.Use(validateRequestPath) + handlerCalled := false + pass := func(w http.ResponseWriter, r *http.Request) { + handlerCalled = true + w.WriteHeader(http.StatusOK) + } + // Mirror the production routes: /{bucket}/{object:(?s).+} for + // object-scoped requests, bare /{bucket} for bucket-scoped ones. + sub.Path("/{object:(?s).+}").HandlerFunc(pass) + sub.Path("").HandlerFunc(pass) + + req := httptest.NewRequest(http.MethodGet, tt.rawPath, nil) + rr := httptest.NewRecorder() + router.ServeHTTP(rr, req) + + if rr.Code != tt.wantCode { + t.Fatalf("path %q: got status %d, want %d (body=%q)", tt.rawPath, rr.Code, tt.wantCode, rr.Body.String()) + } + if tt.wantCode == http.StatusBadRequest && handlerCalled { + t.Fatalf("path %q: inner handler reached despite rejection", tt.rawPath) + } + }) + } +} + +// Defense-in-depth: a future router or middleware that captures the {bucket} +// or {object} mux var as an empty string must still be rejected, even though +// mux's default `[^/]+` regex won't match an empty segment from a real URL. +func TestValidateRequestPath_RejectsEmptyCapturedVars(t *testing.T) { + tests := []struct { + name string + vars map[string]string + }{ + {"empty bucket", map[string]string{"bucket": "", "object": "key"}}, + {"empty object", map[string]string{"bucket": "bucket-a", "object": ""}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + handlerCalled := false + h := validateRequestPath(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + handlerCalled = true + })) + req := mux.SetURLVars(httptest.NewRequest(http.MethodGet, "/", nil), tt.vars) + rr := httptest.NewRecorder() + h.ServeHTTP(rr, req) + if handlerCalled { + t.Fatalf("vars %v: inner handler reached despite empty capture", tt.vars) + } + }) + } +} diff --git a/weed/s3api/s3api_server.go b/weed/s3api/s3api_server.go index af4ed6785..fa03ac7c7 100644 --- a/weed/s3api/s3api_server.go +++ b/weed/s3api/s3api_server.go @@ -735,6 +735,11 @@ func (s3a *S3ApiServer) registerRouter(router *mux.Router) { corsMiddleware := s3a.getCORSMiddleware() for _, bucket := range routers { + // Reject `..`/`.`/NUL in {bucket} or {object} vars before any handler + // runs. SkipClean(true) keeps `..` in the matched path; the filer would + // otherwise collapse it via filepath.Join and cross bucket boundaries. + bucket.Use(validateRequestPath) + // Apply CORS middleware to bucket routers for automatic CORS header handling bucket.Use(corsMiddleware.Handler)