Files
versitygw/s3api/utils/multipart_reader.go
niksis02 9f786b3c2c feat: global error refactoring
Fixes #2123
Fixes #2120
Fixes #2116
Fixes #2111
Fixes #2108
Fixes #2086
Fixes #2085
Fixes #2083
Fixes #2081
Fixes #2080
Fixes #2073
Fixes #2072
Fixes #2071
Fixes #2069
Fixes #2044
Fixes #2043
Fixes #2042
Fixes #2041
Fixes #2040
Fixes #2039
Fixes #2036
Fixes #2035
Fixes #2034
Fixes #2028
Fixes #2020
Fixes #1842
Fixes #1810
Fixes #1780
Fixes #1775
Fixes #1736
Fixes #1705
Fixes #1663
Fixes #1645
Fixes #1583
Fixes #1526
Fixes #1514
Fixes #1493
Fixes #1487
Fixes #959
Fixes #779
Closes #823
Closes #85

Refactor global S3 error handling around structured error types and centralized XML response generation.

All S3 errors now share the common APIError base for the fields every error has: Code, HTTP status code, and Message. Non-traditional errors that need AWS-compatible XML fields now have dedicated typed errors in the s3err package. Each typed error implements the shared S3Error behavior so controllers and middleware can handle errors consistently while still emitting error-specific XML fields.

Add a dedicated InvalidArgumentError type because InvalidArgument is used widely across request validation, auth, copy source handling, object lock validation, multipart validation, and header parsing. The new InvalidArgument path uses explicit InvalidArgErrorCode constants with predefined descriptions and ArgumentName values, keeping call sites readable while preserving the correct InvalidArgument XML shape and optional ArgumentValue.

New structured errors added in s3err:
- `AccessForbiddenError`: Method, ResourceType
- `BadDigestError`: CalculatedDigest, ExpectedDigest
- `BucketError`: BucketName
- `ContentSHA256MismatchError`: ClientComputedContentSHA256, S3ComputedContentSHA256
- `EntityTooLargeError`: ProposedSize, MaxSizeAllowed
- `EntityTooSmallError`: ProposedSize, MinSizeAllowed
- `ExpiredPresignedURLError`: ServerTime, XAmzExpires, Expires
- `InvalidAccessKeyIdError`: AWSAccessKeyId
- `InvalidArgumentError`: Description, ArgumentName, ArgumentValue
- `InvalidChunkSizeError`: Chunk, BadChunkSize
- `InvalidDigestError`: ContentMD5
- `InvalidLocationConstraintError`: LocationConstraint
- `InvalidPartError`: UploadId, PartNumber, ETag
- `InvalidRangeError`: RangeRequested, ActualObjectSize
- `InvalidTagError`: TagKey, TagValue
- `KeyTooLongError`: Size, MaxSizeAllowed
- `MetadataTooLargeError`: Size, MaxSizeAllowed
- `MethodNotAllowedError`: Method, ResourceType, AllowedMethods
- `NoSuchUploadError`: UploadId
- `NoSuchVersionError`: Key, VersionId
- `NotImplementedError`: Header, AdditionalMessage
- `PreconditionFailedError`: Condition
- `RequestTimeTooSkewedError`: RequestTime, ServerTime, MaxAllowedSkewMilliseconds
- `SignatureDoesNotMatchError`: AWSAccessKeyId, StringToSign, SignatureProvided, StringToSignBytes, CanonicalRequest, CanonicalRequestBytes

Fix CompleteMultipartUpload validation in the Azure backend so missing or empty `ETag` values return the appropriate S3 error instead of allowing a gateway panic.

Fix presigned authentication expiration validation to compare server time in `UTC`, matching the `UTC` timestamp used by presigned URL signing.

Add request ID and host ID support across S3 requests. Each request now receives AWS S3-like identifiers, returned in response headers as `x-amz-request-id` and `x-amz-id-2` and included in all XML error responses as RequestId and HostId. The generated ID structure is designed to resemble AWS S3 request IDs and host IDs.

The request signature calculation/validation for streaming uploads was previously delayed until the request body was fully read, both for Authorization header authentication and presigned URLs.
Now, the signature is validated immediately in the authorization middlewares without reading the request body, since the signature calculation itself does not depend on the request body. Instead, only the `x-amz-content-sha256` SHA-256 hash calculation is delayed.
2026-05-21 23:49:34 +04:00

351 lines
9.8 KiB
Go

// Copyright 2026 Versity Software
// This file is licensed under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package utils
import (
"bufio"
"bytes"
"fmt"
"io"
"mime"
"net/textproto"
"strings"
"github.com/versity/versitygw/debuglogger"
"github.com/versity/versitygw/s3err"
)
const finalBoundaryDelimiterLen = 8 // len("\r\n--") + len("--\r\n")
// MultipartParser parses S3 browser-based POST multipart/form-data in a streaming way.
// It buffers regular form fields, but it does not buffer the file part.
type MultipartParser struct {
br *bufio.Reader
boundary string
requestContentLength int64
bytesRead int64
}
// NewMultipartParser creates a new streaming multipart parser.
// boundary should be the raw boundary value from Content-Type,
func NewMultipartParser(body io.Reader, boundary string, requestContentLength int64) (*MultipartParser, error) {
if body == nil {
debuglogger.Logf("multipart parser requires non-nil body reader")
return nil, fmt.Errorf("nil body reader")
}
if requestContentLength < 0 {
debuglogger.Logf("invalid multipart request content-length: %d", requestContentLength)
return nil, fmt.Errorf("invalid request content-length: %d", requestContentLength)
}
boundary = strings.TrimSpace(boundary)
if boundary == "" {
debuglogger.Logf("multipart boundary is empty")
return nil, s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
return &MultipartParser{
br: bufio.NewReader(body),
boundary: boundary,
requestContentLength: requestContentLength,
}, nil
}
// MpFileReader is the streaming interface for the file part of a multipart POST.
// It extends io.Reader with a Length method that returns the number of file-content
// bytes actually delivered to callers (boundary and delimiter bytes are not counted).
type MpFileReader interface {
io.Reader
Length() int64
}
type MpParseResult struct {
// Fields contains all non-file form fields collected before the file part.
Fields map[string]string
// FileRdr streams the file payload without buffering the entire part in memory.
FileRdr MpFileReader
// ContentLength is the expected byte length of the file payload only.
ContentLength int64
}
// Parse parses all non-file fields and returns:
// - form values
// - a streaming file reader
// - file content length
//
// The returned file reader MUST be read until EOF, otherwise final-boundary
// validation is not triggered.
func (mp *MultipartParser) Parse() (*MpParseResult, error) {
fields := make(map[string]string)
if err := mp.expectInitialBoundary(); err != nil {
return nil, err
}
for {
headers, err := mp.readHeaders()
if err != nil {
return nil, err
}
cd, ok := headers[textproto.CanonicalMIMEHeaderKey("Content-Disposition")]
if !ok {
debuglogger.Logf("multipart part is missing Content-Disposition header")
return nil, s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
disp, params, err := mime.ParseMediaType(cd)
if err != nil {
debuglogger.Logf("invalid multipart Content-Disposition header %q: %v", cd, err)
return nil, s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
if disp != "form-data" {
debuglogger.Logf("unexpected multipart disposition: %s", disp)
return nil, s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
name := strings.ToLower(params["name"])
if name == "" {
debuglogger.Logf("multipart part is missing field name")
return nil, s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
_, hasFilename := params["filename"]
isFilePart := name == "file" || hasFilename
// At this point, headers + blank line have already been consumed,
// so bytesRead points exactly at the first byte of file content.
if isFilePart {
fileContentLength := mp.requestContentLength - mp.bytesRead - int64(len(mp.boundary)) - finalBoundaryDelimiterLen
if fileContentLength < 0 {
debuglogger.Logf("calculated negative multipart file content-length: %d", fileContentLength)
return nil, fmt.Errorf("calculated negative file content-length: %d", fileContentLength)
}
fr := &finalFileReader{
r: mp.br,
trailer: []byte("\r\n--" + mp.boundary + "--\r\n"),
}
return &MpParseResult{
Fields: fields,
FileRdr: fr,
ContentLength: fileContentLength,
}, nil
}
value, err := mp.readFieldValue()
if err != nil {
return nil, err
}
if strings.HasPrefix(name, "x-amz-meta-") {
val, ok := fields[name]
if ok {
fields[name] = val + "," + value
continue
}
}
fields[name] = value
}
}
func (mp *MultipartParser) expectInitialBoundary() error {
line, _, err := mp.readLine()
if err != nil {
return s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
want := "--" + mp.boundary
if line != want {
debuglogger.Logf("unexpected initial multipart boundary: got %q want %q", line, want)
return s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
return nil
}
func (mp *MultipartParser) readHeaders() (map[string]string, error) {
headers := make(map[string]string)
for {
line, _, err := mp.readLine()
if err != nil {
return nil, s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
// Blank line terminates headers.
if line == "" {
return headers, nil
}
key, value, ok := strings.Cut(line, ":")
if !ok {
debuglogger.Logf("invalid multipart header line: %q", line)
return nil, s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
key = textproto.CanonicalMIMEHeaderKey(strings.TrimSpace(key))
value = strings.TrimSpace(value)
headers[key] = value
}
}
// readFieldValue reads a regular form field until the next boundary line.
// It keeps exact field bytes except for the final CRLF that belongs to the boundary separator.
func (mp *MultipartParser) readFieldValue() (string, error) {
boundaryLine := "--" + mp.boundary
finalBoundaryLine := boundaryLine + "--"
var buf bytes.Buffer
for {
line, raw, err := mp.readLine()
if err != nil {
return "", err
}
switch line {
case boundaryLine:
trimTrailingCRLF(&buf)
return buf.String(), nil
case finalBoundaryLine:
debuglogger.Logf("multipart POST ended before file part was found")
// S3 returns '0' as ArgumentValue
return "", s3err.GetInvalidArgumentErr(s3err.InvalidArgPOSTFileRequired, "0")
default:
buf.Write(raw)
}
}
}
// readLine reads one CRLF-terminated line, counts consumed bytes,
// returns the line without trailing CRLF, and also the raw bytes including CRLF.
func (mp *MultipartParser) readLine() (string, []byte, error) {
s, err := mp.br.ReadString('\n')
if err != nil {
debuglogger.Logf("failed to read multipart line: %v", err)
return "", nil, s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
mp.bytesRead += int64(len(s))
if !strings.HasSuffix(s, "\r\n") {
debuglogger.Logf("multipart line is not CRLF-terminated: %q", s)
return "", nil, s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
}
return strings.TrimSuffix(s, "\r\n"), []byte(s), nil
}
func trimTrailingCRLF(buf *bytes.Buffer) {
b := buf.Bytes()
if len(b) >= 2 && b[len(b)-2] == '\r' && b[len(b)-1] == '\n' {
buf.Truncate(len(b) - 2)
}
}
// finalFileReader streams file bytes until it reaches the final multipart boundary:
//
// \r\n--<boundary>--\r\n
//
// Any epilogue bytes after that final boundary are ignored.
type finalFileReader struct {
r *bufio.Reader
// trailer is the exact byte sequence that terminates the file part.
trailer []byte
// buf keeps unread bytes plus a trailer-sized lookbehind window so
// boundary bytes split across reads are not emitted as file content.
buf []byte
// bytesRead counts only the file-content bytes delivered to callers.
// Boundary and delimiter bytes are never included in this count.
bytesRead int64
done bool
failed error
eof bool
}
func (r *finalFileReader) Read(p []byte) (int, error) {
if r.failed != nil {
return 0, r.failed
}
if r.done {
return 0, io.EOF
}
if len(p) == 0 {
return 0, nil
}
for {
// If the final boundary is already in the buffer, return only file
// bytes before it and stop cleanly once the boundary starts at buf[0].
if idx := bytes.Index(r.buf, r.trailer); idx >= 0 {
if idx == 0 {
r.buf = nil
r.done = true
return 0, io.EOF
}
n := copy(p, r.buf[:idx])
r.buf = r.buf[n:]
r.bytesRead += int64(n)
return n, nil
}
// Bytes before this point cannot be part of a future trailer match, so
// they are safe to release to the caller.
safe := len(r.buf) - len(r.trailer) + 1
if safe > 0 {
n := copy(p, r.buf[:safe])
r.buf = r.buf[n:]
r.bytesRead += int64(n)
return n, nil
}
if r.eof {
// Reaching EOF without finding the expected closing boundary means
// the multipart body was truncated or malformed.
debuglogger.Logf("multipart file stream ended before final boundary %q", string(r.trailer))
r.failed = s3err.GetAPIError(s3err.ErrMalformedPOSTRequest)
return 0, r.failed
}
chunk := make([]byte, 4096)
n, err := r.r.Read(chunk)
if n > 0 {
r.buf = append(r.buf, chunk[:n]...)
}
if err == io.EOF {
r.eof = true
continue
}
if err != nil {
debuglogger.Logf("failed to read multipart file data: %v", err)
r.failed = err
return 0, err
}
}
}
// Length returns the total number of file-content bytes delivered to callers so far.
// Multipart boundary and delimiter bytes are never counted.
func (r *finalFileReader) Length() int64 {
return r.bytesRead
}