diff --git a/backend/posix/posix.go b/backend/posix/posix.go index de4214e..bb99ebe 100644 --- a/backend/posix/posix.go +++ b/backend/posix/posix.go @@ -1352,6 +1352,8 @@ func getPartChecksum(algo types.ChecksumAlgorithm, part types.CompletedPart) str return backend.GetStringFromPtr(part.ChecksumSHA1) case types.ChecksumAlgorithmSha256: return backend.GetStringFromPtr(part.ChecksumSHA256) + case types.ChecksumAlgorithmCrc64nvme: + return backend.GetStringFromPtr(part.ChecksumCRC64NVME) default: return "" } @@ -1418,6 +1420,12 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM last := len(parts) - 1 var totalsize int64 + var composableCRC bool + switch checksums.Type { + case types.ChecksumTypeFullObject: + composableCRC = utils.IsChecksumComposable(checksumAlgorithm) + } + // The initialie values is the lower limit of partNumber: 0 var partNumber int32 for i, part := range parts { @@ -1441,7 +1449,7 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM } totalsize += fi.Size() - // all parts except the last need to be greater, thena + // all parts except the last need to be greater, than or equal to // the minimum allowed size (5 Mib) if i < last && fi.Size() < backend.MinPartSize { return res, "", s3err.GetAPIError(s3err.ErrEntityTooSmall) @@ -1476,9 +1484,11 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM var compositeChecksumRdr *utils.CompositeChecksumReader switch checksums.Type { case types.ChecksumTypeFullObject: - hashRdr, err = utils.NewHashReader(nil, "", utils.HashType(strings.ToLower(string(checksumAlgorithm)))) - if err != nil { - return res, "", fmt.Errorf("initialize hash reader: %w", err) + if !composableCRC { + hashRdr, err = utils.NewHashReader(nil, "", utils.HashType(strings.ToLower(string(checksumAlgorithm)))) + if err != nil { + return res, "", fmt.Errorf("initialize hash reader: %w", err) + } } case types.ChecksumTypeComposite: compositeChecksumRdr, err = utils.NewCompositeChecksumReader(utils.HashType(strings.ToLower(string(checksumAlgorithm)))) @@ -1497,22 +1507,46 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM } defer f.cleanup() - for _, part := range parts { + var composableCsum string + for i, part := range parts { partObjPath := filepath.Join(objdir, uploadID, fmt.Sprintf("%v", *part.PartNumber)) fullPartPath := filepath.Join(bucket, partObjPath) pf, err := os.Open(fullPartPath) if err != nil { return res, "", fmt.Errorf("open part %v: %v", *part.PartNumber, err) } + pfi, err := pf.Stat() + if err != nil { + pf.Close() + return res, "", fmt.Errorf("stat part %v: %v", *part.PartNumber, err) + } var rdr io.Reader = pf - if checksums.Type == types.ChecksumTypeFullObject { + switch checksums.Type { + case types.ChecksumTypeFullObject: + if composableCRC { + if i == 0 { + composableCsum = getPartChecksum(checksumAlgorithm, part) + break + } + composableCsum, err = utils.AddCRCChecksum(checksumAlgorithm, + composableCsum, getPartChecksum(checksumAlgorithm, part), + pfi.Size()) + if err != nil { + pf.Close() + return res, "", fmt.Errorf("add part %v checksum: %w", + *part.PartNumber, err) + } + break + } hashRdr.SetReader(rdr) rdr = hashRdr - } else if checksums.Type == types.ChecksumTypeComposite { + case types.ChecksumTypeComposite: err := compositeChecksumRdr.Process(getPartChecksum(checksumAlgorithm, part)) if err != nil { - return res, "", fmt.Errorf("process %v part checksum: %w", *part.PartNumber, err) + pf.Close() + return res, "", fmt.Errorf("process %v part checksum: %w", + *part.PartNumber, err) } } @@ -1621,7 +1655,11 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM case types.ChecksumTypeComposite: sum = compositeChecksumRdr.Sum() case types.ChecksumTypeFullObject: - sum = hashRdr.Sum() + if !composableCRC { + sum = hashRdr.Sum() + } else { + sum = composableCsum + } } switch checksumAlgorithm { diff --git a/s3api/utils/crc.go b/s3api/utils/crc.go new file mode 100644 index 0000000..d373050 --- /dev/null +++ b/s3api/utils/crc.go @@ -0,0 +1,180 @@ +// Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. +// +// Jean-loup Gailly Mark Adler +// jloup@gzip.org madler@alumni.caltech.edu + +// Original implementation is from +// https://github.com/vimeo/go-util/blob/8cd4c737f091d9317f72b25df78ce6cf869f7d30/crc32combine/crc32combine.go +// extended for crc64 support. + +// Following is ported from C to Go in 2016 by Justin Ruggles, with minimal alteration. +// Used uint for unsigned long. Used uint32 for input arguments in order to match +// the Go hash/crc32 package. zlib CRC32 combine (https://github.com/madler/zlib) + +package utils + +import ( + "hash/crc64" +) + +const crc64NVME = 0x9a6c_9329_ac4b_c9b5 + +var crc64NVMETable = crc64.MakeTable(crc64NVME) + +func gf2MatrixTimes(mat []uint64, vec uint64) uint64 { + var sum uint64 + + for vec != 0 { + if vec&1 != 0 { + sum ^= mat[0] + } + vec >>= 1 + mat = mat[1:] + } + return sum +} + +func gf2MatrixSquare(square, mat []uint64) { + if len(square) != len(mat) { + panic("square matrix size mismatch") + } + for n := range mat { + square[n] = gf2MatrixTimes(mat, mat[n]) + } +} + +// crc32Combine returns the combined CRC-32 hash value of the two passed CRC-32 +// hash values crc1 and crc2. poly represents the generator polynomial +// and len2 specifies the byte length that the crc2 hash covers. +func crc32Combine(poly uint32, crc1, crc2 uint32, len2 int64) uint32 { + // degenerate case (also disallow negative lengths) + if len2 <= 0 { + return crc1 + } + + even := make([]uint64, 32) // even-power-of-two zeros operator + odd := make([]uint64, 32) // odd-power-of-two zeros operator + + // put operator for one zero bit in odd + odd[0] = uint64(poly) // CRC-32 polynomial + row := uint64(1) + for n := 1; n < 32; n++ { + odd[n] = row + row <<= 1 + } + + // put operator for two zero bits in even + gf2MatrixSquare(even, odd) + + // put operator for four zero bits in odd + gf2MatrixSquare(odd, even) + + // apply len2 zeros to crc1 (first square will put the operator for one + // zero byte, eight zero bits, in even) + crc1n := uint64(crc1) + for { + // apply zeros operator for this bit of len2 + gf2MatrixSquare(even, odd) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(even, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + + // another iteration of the loop with odd and even swapped + gf2MatrixSquare(odd, even) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(odd, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + } + + // return combined crc + crc1n ^= uint64(crc2) + return uint32(crc1n) +} + +// crc64Combine returns the combined CRC-64 hash value of the two passed CRC-64 +// hash values crc1 and crc2. poly represents the generator polynomial +// and len2 specifies the byte length that the crc2 hash covers. +func crc64Combine(poly uint64, crc1, crc2 uint64, len2 int64) uint64 { + // degenerate case (also disallow negative lengths) + if len2 <= 0 { + return crc1 + } + + even := make([]uint64, 64) // even-power-of-two zeros operator + odd := make([]uint64, 64) // odd-power-of-two zeros operator + + // put operator for one zero bit in odd + odd[0] = poly // CRC-64 polynomial + row := uint64(1) + for n := 1; n < 64; n++ { + odd[n] = row + row <<= 1 + } + + // put operator for two zero bits in even + gf2MatrixSquare(even, odd) + + // put operator for four zero bits in odd + gf2MatrixSquare(odd, even) + + // apply len2 zeros to crc1 (first square will put the operator for one + // zero byte, eight zero bits, in even) + crc1n := crc1 + for { + // apply zeros operator for this bit of len2 + gf2MatrixSquare(even, odd) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(even, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + + // another iteration of the loop with odd and even swapped + gf2MatrixSquare(odd, even) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(odd, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + } + + // return combined crc + crc1n ^= crc2 + return crc1n +} diff --git a/s3api/utils/crc_test.go b/s3api/utils/crc_test.go new file mode 100644 index 0000000..e46af6a --- /dev/null +++ b/s3api/utils/crc_test.go @@ -0,0 +1,57 @@ +// Copyright 2025 Versity Software +// This file is licensed under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package utils + +import ( + "hash/crc32" + "hash/crc64" + "testing" +) + +func TestCRC32Combine(t *testing.T) { + data := []byte("The quick brown fox jumps over the lazy dog") + mid := len(data) / 2 + part1 := data[:mid] + part2 := data[mid:] + + var poly uint32 = crc32.IEEE + tab := crc32.MakeTable(poly) + crc1 := crc32.Checksum(part1, tab) + crc2 := crc32.Checksum(part2, tab) + combined := crc32Combine(poly, crc1, crc2, int64(len(part2))) + full := crc32.Checksum(data, tab) + + if combined != full { + t.Errorf("crc32Combine failed: got %08x, want %08x", combined, full) + } +} + +func TestCRC64Combine(t *testing.T) { + data := []byte("The quick brown fox jumps over the lazy dog") + mid := len(data) / 2 + part1 := data[:mid] + part2 := data[mid:] + + var poly uint64 = crc64NVME + tab := crc64NVMETable + crc1 := crc64.Checksum(part1, tab) + crc2 := crc64.Checksum(part2, tab) + combined := crc64Combine(poly, crc1, crc2, int64(len(part2))) + full := crc64.Checksum(data, tab) + + if combined != full { + t.Errorf("crc64Combine failed: got %016x, want %016x", combined, full) + } +} diff --git a/s3api/utils/csum-reader.go b/s3api/utils/csum-reader.go index ce2e35e..d8fe62d 100644 --- a/s3api/utils/csum-reader.go +++ b/s3api/utils/csum-reader.go @@ -26,7 +26,6 @@ import ( "hash/crc32" "hash/crc64" "io" - "math/bits" "github.com/aws/aws-sdk-go-v2/service/s3/types" "github.com/versity/versitygw/s3err" @@ -89,7 +88,7 @@ func NewHashReader(r io.Reader, expectedSum string, ht HashType) (*HashReader, e case HashTypeCRC32C: hash = crc32.New(crc32.MakeTable(crc32.Castagnoli)) case HashTypeCRC64NVME: - hash = crc64.New(crc64.MakeTable(bits.Reverse64(0xad93d23594c93659))) + hash = crc64.New(crc64NVMETable) case HashTypeNone: hash = noop{} default: @@ -185,7 +184,7 @@ func (hr *HashReader) Type() HashType { return hr.hashType } -// Md5SumString converts the hash bytes to the string checksum value +// Base64SumString converts the hash bytes to the b64 encoded string checksum value func Base64SumString(b []byte) string { return base64.StdEncoding.EncodeToString(b) } @@ -198,6 +197,108 @@ func (n noop) Reset() {} func (n noop) Size() int { return 0 } func (n noop) BlockSize() int { return 1 } +// IsChecksumComposable tests if the final foll object crc can be calculated +// based on the part crc values. +func IsChecksumComposable(algo types.ChecksumAlgorithm) bool { + switch algo { + case types.ChecksumAlgorithmCrc32, types.ChecksumAlgorithmCrc32c, types.ChecksumAlgorithmCrc64nvme: + return true + default: + return false + } +} + +// AddCRCChecksum calculates the composite CRC checksum after adding the part crc. +// Only CRC32, CRC32C, and CRC64NVME are supported. The input checksums must be base64-encoded strings. +func AddCRCChecksum(algo types.ChecksumAlgorithm, crc, partCrc string, partLen int64) (string, error) { + switch algo { + case types.ChecksumAlgorithmCrc32: + data, err := base64.StdEncoding.DecodeString(partCrc) + if err != nil { + return "", fmt.Errorf("base64 decode partCrc: %w", err) + } + if len(data) != 4 { + return "", fmt.Errorf("invalid crc32 part checksum length: %d", len(data)) + } + currentCRC, err := base64.StdEncoding.DecodeString(crc) + if err != nil { + return "", fmt.Errorf("base64 decode crc: %w", err) + } + if len(currentCRC) != 4 { + return "", fmt.Errorf("invalid crc32 checksum length: %d", len(currentCRC)) + } + + currentVal := uint32(currentCRC[0])<<24 | uint32(currentCRC[1])<<16 | uint32(currentCRC[2])<<8 | uint32(currentCRC[3]) + val := uint32(data[0])<<24 | uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3]) + composite := crc32Combine(crc32.IEEE, currentVal, val, partLen) + + out := []byte{ + byte(composite >> 24), + byte(composite >> 16), + byte(composite >> 8), + byte(composite), + } + return base64.StdEncoding.EncodeToString(out), nil + case types.ChecksumAlgorithmCrc32c: + data, err := base64.StdEncoding.DecodeString(partCrc) + if err != nil { + return "", fmt.Errorf("base64 decode partCrc: %w", err) + } + if len(data) != 4 { + return "", fmt.Errorf("invalid crc32 part checksum length: %d", len(data)) + } + currentCRC, err := base64.StdEncoding.DecodeString(crc) + if err != nil { + return "", fmt.Errorf("base64 decode crc: %w", err) + } + if len(currentCRC) != 4 { + return "", fmt.Errorf("invalid crc32 checksum length: %d", len(currentCRC)) + } + + currentVal := uint32(currentCRC[0])<<24 | uint32(currentCRC[1])<<16 | uint32(currentCRC[2])<<8 | uint32(currentCRC[3]) + val := uint32(data[0])<<24 | uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3]) + composite := crc32Combine(crc32.Castagnoli, currentVal, val, partLen) + + // Convert composite to big-endian bytes + out := []byte{ + byte(composite >> 24), + byte(composite >> 16), + byte(composite >> 8), + byte(composite), + } + return base64.StdEncoding.EncodeToString(out), nil + case types.ChecksumAlgorithmCrc64nvme: + data, err := base64.StdEncoding.DecodeString(partCrc) + if err != nil { + return "", fmt.Errorf("base64 decode partCrc: %w", err) + } + if len(data) != 8 { + return "", fmt.Errorf("invalid crc64 part checksum length: %d", len(data)) + } + currentCRC, err := base64.StdEncoding.DecodeString(crc) + if err != nil { + return "", fmt.Errorf("base64 decode crc: %w", err) + } + if len(currentCRC) != 8 { + return "", fmt.Errorf("invalid crc64 checksum length: %d", len(currentCRC)) + } + + currentVal := uint64(currentCRC[0])<<56 | uint64(currentCRC[1])<<48 | uint64(currentCRC[2])<<40 | uint64(currentCRC[3])<<32 | + uint64(currentCRC[4])<<24 | uint64(currentCRC[5])<<16 | uint64(currentCRC[6])<<8 | uint64(currentCRC[7]) + val := uint64(data[0])<<56 | uint64(data[1])<<48 | uint64(data[2])<<40 | uint64(data[3])<<32 | + uint64(data[4])<<24 | uint64(data[5])<<16 | uint64(data[6])<<8 | uint64(data[7]) + composite := crc64Combine(crc64NVME, currentVal, val, partLen) + + out := []byte{ + byte(composite >> 56), byte(composite >> 48), byte(composite >> 40), byte(composite >> 32), + byte(composite >> 24), byte(composite >> 16), byte(composite >> 8), byte(composite), + } + return base64.StdEncoding.EncodeToString(out), nil + default: + return "", fmt.Errorf("composite checksum not supported for algorithm: %v", algo) + } +} + // NewCompositeChecksumReader initializes a composite checksum // processor, which decodes and validates the provided // checksums and returns the final checksum based on diff --git a/s3api/utils/csum-reader_test.go b/s3api/utils/csum-reader_test.go new file mode 100644 index 0000000..e45e981 --- /dev/null +++ b/s3api/utils/csum-reader_test.go @@ -0,0 +1,120 @@ +// Copyright 2025 Versity Software +// This file is licensed under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package utils + +import ( + "encoding/base64" + "hash/crc32" + "hash/crc64" + "testing" + + "github.com/aws/aws-sdk-go-v2/service/s3/types" +) + +func TestAddCRCChecksum_CRC32(t *testing.T) { + data := []byte("this is a test buffer for crc32") + mid := len(data) / 2 + part1 := data[:mid] + part2 := data[mid:] + + crc1 := crc32.Checksum(part1, crc32.IEEETable) + crc2 := crc32.Checksum(part2, crc32.IEEETable) + crcFull := crc32.Checksum(data, crc32.IEEETable) + + crc1b := []byte{byte(crc1 >> 24), byte(crc1 >> 16), byte(crc1 >> 8), byte(crc1)} + crc2b := []byte{byte(crc2 >> 24), byte(crc2 >> 16), byte(crc2 >> 8), byte(crc2)} + crc1b64 := base64.StdEncoding.EncodeToString(crc1b) + crc2b64 := base64.StdEncoding.EncodeToString(crc2b) + + combined, err := AddCRCChecksum(types.ChecksumAlgorithmCrc32, crc1b64, crc2b64, int64(len(part2))) + if err != nil { + t.Fatalf("AddCRCChecksum failed: %v", err) + } + combinedBytes, err := base64.StdEncoding.DecodeString(combined) + if err != nil { + t.Fatalf("base64 decode failed: %v", err) + } + combinedVal := uint32(combinedBytes[0])<<24 | uint32(combinedBytes[1])<<16 | uint32(combinedBytes[2])<<8 | uint32(combinedBytes[3]) + if combinedVal != crcFull { + t.Errorf("CRC32 combine mismatch: got %x, want %x", combinedVal, crcFull) + } +} + +func TestAddCRCChecksum_CRC32c(t *testing.T) { + data := []byte("this is a test buffer for crc32c") + mid := len(data) / 2 + part1 := data[:mid] + part2 := data[mid:] + + castagnoli := crc32.MakeTable(crc32.Castagnoli) + crc1 := crc32.Checksum(part1, castagnoli) + crc2 := crc32.Checksum(part2, castagnoli) + crcFull := crc32.Checksum(data, castagnoli) + + crc1b := []byte{byte(crc1 >> 24), byte(crc1 >> 16), byte(crc1 >> 8), byte(crc1)} + crc2b := []byte{byte(crc2 >> 24), byte(crc2 >> 16), byte(crc2 >> 8), byte(crc2)} + crc1b64 := base64.StdEncoding.EncodeToString(crc1b) + crc2b64 := base64.StdEncoding.EncodeToString(crc2b) + + combined, err := AddCRCChecksum(types.ChecksumAlgorithmCrc32c, crc1b64, crc2b64, int64(len(part2))) + if err != nil { + t.Fatalf("AddCRCChecksum failed: %v", err) + } + combinedBytes, err := base64.StdEncoding.DecodeString(combined) + if err != nil { + t.Fatalf("base64 decode failed: %v", err) + } + combinedVal := uint32(combinedBytes[0])<<24 | uint32(combinedBytes[1])<<16 | uint32(combinedBytes[2])<<8 | uint32(combinedBytes[3]) + if combinedVal != crcFull { + t.Errorf("CRC32c combine mismatch: got %x, want %x", combinedVal, crcFull) + } +} + +func TestAddCRCChecksum_CRC64NVME(t *testing.T) { + data := []byte("this is a test buffer for crc64nvme") + mid := len(data) / 2 + part1 := data[:mid] + part2 := data[mid:] + + table := crc64NVMETable + crc1 := crc64.Checksum(part1, table) + crc2 := crc64.Checksum(part2, table) + crcFull := crc64.Checksum(data, table) + + crc1b := []byte{ + byte(crc1 >> 56), byte(crc1 >> 48), byte(crc1 >> 40), byte(crc1 >> 32), + byte(crc1 >> 24), byte(crc1 >> 16), byte(crc1 >> 8), byte(crc1), + } + crc2b := []byte{ + byte(crc2 >> 56), byte(crc2 >> 48), byte(crc2 >> 40), byte(crc2 >> 32), + byte(crc2 >> 24), byte(crc2 >> 16), byte(crc2 >> 8), byte(crc2), + } + crc1b64 := base64.StdEncoding.EncodeToString(crc1b) + crc2b64 := base64.StdEncoding.EncodeToString(crc2b) + + combined, err := AddCRCChecksum(types.ChecksumAlgorithmCrc64nvme, crc1b64, crc2b64, int64(len(part2))) + if err != nil { + t.Fatalf("AddCRCChecksum failed: %v", err) + } + combinedBytes, err := base64.StdEncoding.DecodeString(combined) + if err != nil { + t.Fatalf("base64 decode failed: %v", err) + } + combinedVal := uint64(combinedBytes[0])<<56 | uint64(combinedBytes[1])<<48 | uint64(combinedBytes[2])<<40 | uint64(combinedBytes[3])<<32 | + uint64(combinedBytes[4])<<24 | uint64(combinedBytes[5])<<16 | uint64(combinedBytes[6])<<8 | uint64(combinedBytes[7]) + if combinedVal != crcFull { + t.Errorf("CRC64NVME combine mismatch: got %x, want %x", combinedVal, crcFull) + } +}