Merge pull request #1348 from versity/ben/crc-mp-complete

feat: calculate full object crc for multi-part uploads for compatible checksums
This commit is contained in:
Ben McClelland
2025-07-04 09:50:21 -07:00
committed by GitHub
5 changed files with 508 additions and 12 deletions

View File

@@ -1352,6 +1352,8 @@ func getPartChecksum(algo types.ChecksumAlgorithm, part types.CompletedPart) str
return backend.GetStringFromPtr(part.ChecksumSHA1)
case types.ChecksumAlgorithmSha256:
return backend.GetStringFromPtr(part.ChecksumSHA256)
case types.ChecksumAlgorithmCrc64nvme:
return backend.GetStringFromPtr(part.ChecksumCRC64NVME)
default:
return ""
}
@@ -1418,6 +1420,12 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM
last := len(parts) - 1
var totalsize int64
var composableCRC bool
switch checksums.Type {
case types.ChecksumTypeFullObject:
composableCRC = utils.IsChecksumComposable(checksumAlgorithm)
}
// The initialie values is the lower limit of partNumber: 0
var partNumber int32
for i, part := range parts {
@@ -1441,7 +1449,7 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM
}
totalsize += fi.Size()
// all parts except the last need to be greater, thena
// all parts except the last need to be greater, than or equal to
// the minimum allowed size (5 Mib)
if i < last && fi.Size() < backend.MinPartSize {
return res, "", s3err.GetAPIError(s3err.ErrEntityTooSmall)
@@ -1476,9 +1484,11 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM
var compositeChecksumRdr *utils.CompositeChecksumReader
switch checksums.Type {
case types.ChecksumTypeFullObject:
hashRdr, err = utils.NewHashReader(nil, "", utils.HashType(strings.ToLower(string(checksumAlgorithm))))
if err != nil {
return res, "", fmt.Errorf("initialize hash reader: %w", err)
if !composableCRC {
hashRdr, err = utils.NewHashReader(nil, "", utils.HashType(strings.ToLower(string(checksumAlgorithm))))
if err != nil {
return res, "", fmt.Errorf("initialize hash reader: %w", err)
}
}
case types.ChecksumTypeComposite:
compositeChecksumRdr, err = utils.NewCompositeChecksumReader(utils.HashType(strings.ToLower(string(checksumAlgorithm))))
@@ -1497,22 +1507,46 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM
}
defer f.cleanup()
for _, part := range parts {
var composableCsum string
for i, part := range parts {
partObjPath := filepath.Join(objdir, uploadID, fmt.Sprintf("%v", *part.PartNumber))
fullPartPath := filepath.Join(bucket, partObjPath)
pf, err := os.Open(fullPartPath)
if err != nil {
return res, "", fmt.Errorf("open part %v: %v", *part.PartNumber, err)
}
pfi, err := pf.Stat()
if err != nil {
pf.Close()
return res, "", fmt.Errorf("stat part %v: %v", *part.PartNumber, err)
}
var rdr io.Reader = pf
if checksums.Type == types.ChecksumTypeFullObject {
switch checksums.Type {
case types.ChecksumTypeFullObject:
if composableCRC {
if i == 0 {
composableCsum = getPartChecksum(checksumAlgorithm, part)
break
}
composableCsum, err = utils.AddCRCChecksum(checksumAlgorithm,
composableCsum, getPartChecksum(checksumAlgorithm, part),
pfi.Size())
if err != nil {
pf.Close()
return res, "", fmt.Errorf("add part %v checksum: %w",
*part.PartNumber, err)
}
break
}
hashRdr.SetReader(rdr)
rdr = hashRdr
} else if checksums.Type == types.ChecksumTypeComposite {
case types.ChecksumTypeComposite:
err := compositeChecksumRdr.Process(getPartChecksum(checksumAlgorithm, part))
if err != nil {
return res, "", fmt.Errorf("process %v part checksum: %w", *part.PartNumber, err)
pf.Close()
return res, "", fmt.Errorf("process %v part checksum: %w",
*part.PartNumber, err)
}
}
@@ -1621,7 +1655,11 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM
case types.ChecksumTypeComposite:
sum = compositeChecksumRdr.Sum()
case types.ChecksumTypeFullObject:
sum = hashRdr.Sum()
if !composableCRC {
sum = hashRdr.Sum()
} else {
sum = composableCsum
}
}
switch checksumAlgorithm {

180
s3api/utils/crc.go Normal file
View File

@@ -0,0 +1,180 @@
// Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgment in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
//
// Jean-loup Gailly Mark Adler
// jloup@gzip.org madler@alumni.caltech.edu
// Original implementation is from
// https://github.com/vimeo/go-util/blob/8cd4c737f091d9317f72b25df78ce6cf869f7d30/crc32combine/crc32combine.go
// extended for crc64 support.
// Following is ported from C to Go in 2016 by Justin Ruggles, with minimal alteration.
// Used uint for unsigned long. Used uint32 for input arguments in order to match
// the Go hash/crc32 package. zlib CRC32 combine (https://github.com/madler/zlib)
package utils
import (
"hash/crc64"
)
const crc64NVME = 0x9a6c_9329_ac4b_c9b5
var crc64NVMETable = crc64.MakeTable(crc64NVME)
func gf2MatrixTimes(mat []uint64, vec uint64) uint64 {
var sum uint64
for vec != 0 {
if vec&1 != 0 {
sum ^= mat[0]
}
vec >>= 1
mat = mat[1:]
}
return sum
}
func gf2MatrixSquare(square, mat []uint64) {
if len(square) != len(mat) {
panic("square matrix size mismatch")
}
for n := range mat {
square[n] = gf2MatrixTimes(mat, mat[n])
}
}
// crc32Combine returns the combined CRC-32 hash value of the two passed CRC-32
// hash values crc1 and crc2. poly represents the generator polynomial
// and len2 specifies the byte length that the crc2 hash covers.
func crc32Combine(poly uint32, crc1, crc2 uint32, len2 int64) uint32 {
// degenerate case (also disallow negative lengths)
if len2 <= 0 {
return crc1
}
even := make([]uint64, 32) // even-power-of-two zeros operator
odd := make([]uint64, 32) // odd-power-of-two zeros operator
// put operator for one zero bit in odd
odd[0] = uint64(poly) // CRC-32 polynomial
row := uint64(1)
for n := 1; n < 32; n++ {
odd[n] = row
row <<= 1
}
// put operator for two zero bits in even
gf2MatrixSquare(even, odd)
// put operator for four zero bits in odd
gf2MatrixSquare(odd, even)
// apply len2 zeros to crc1 (first square will put the operator for one
// zero byte, eight zero bits, in even)
crc1n := uint64(crc1)
for {
// apply zeros operator for this bit of len2
gf2MatrixSquare(even, odd)
if len2&1 != 0 {
crc1n = gf2MatrixTimes(even, crc1n)
}
len2 >>= 1
// if no more bits set, then done
if len2 == 0 {
break
}
// another iteration of the loop with odd and even swapped
gf2MatrixSquare(odd, even)
if len2&1 != 0 {
crc1n = gf2MatrixTimes(odd, crc1n)
}
len2 >>= 1
// if no more bits set, then done
if len2 == 0 {
break
}
}
// return combined crc
crc1n ^= uint64(crc2)
return uint32(crc1n)
}
// crc64Combine returns the combined CRC-64 hash value of the two passed CRC-64
// hash values crc1 and crc2. poly represents the generator polynomial
// and len2 specifies the byte length that the crc2 hash covers.
func crc64Combine(poly uint64, crc1, crc2 uint64, len2 int64) uint64 {
// degenerate case (also disallow negative lengths)
if len2 <= 0 {
return crc1
}
even := make([]uint64, 64) // even-power-of-two zeros operator
odd := make([]uint64, 64) // odd-power-of-two zeros operator
// put operator for one zero bit in odd
odd[0] = poly // CRC-64 polynomial
row := uint64(1)
for n := 1; n < 64; n++ {
odd[n] = row
row <<= 1
}
// put operator for two zero bits in even
gf2MatrixSquare(even, odd)
// put operator for four zero bits in odd
gf2MatrixSquare(odd, even)
// apply len2 zeros to crc1 (first square will put the operator for one
// zero byte, eight zero bits, in even)
crc1n := crc1
for {
// apply zeros operator for this bit of len2
gf2MatrixSquare(even, odd)
if len2&1 != 0 {
crc1n = gf2MatrixTimes(even, crc1n)
}
len2 >>= 1
// if no more bits set, then done
if len2 == 0 {
break
}
// another iteration of the loop with odd and even swapped
gf2MatrixSquare(odd, even)
if len2&1 != 0 {
crc1n = gf2MatrixTimes(odd, crc1n)
}
len2 >>= 1
// if no more bits set, then done
if len2 == 0 {
break
}
}
// return combined crc
crc1n ^= crc2
return crc1n
}

57
s3api/utils/crc_test.go Normal file
View File

@@ -0,0 +1,57 @@
// Copyright 2025 Versity Software
// This file is licensed under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package utils
import (
"hash/crc32"
"hash/crc64"
"testing"
)
func TestCRC32Combine(t *testing.T) {
data := []byte("The quick brown fox jumps over the lazy dog")
mid := len(data) / 2
part1 := data[:mid]
part2 := data[mid:]
var poly uint32 = crc32.IEEE
tab := crc32.MakeTable(poly)
crc1 := crc32.Checksum(part1, tab)
crc2 := crc32.Checksum(part2, tab)
combined := crc32Combine(poly, crc1, crc2, int64(len(part2)))
full := crc32.Checksum(data, tab)
if combined != full {
t.Errorf("crc32Combine failed: got %08x, want %08x", combined, full)
}
}
func TestCRC64Combine(t *testing.T) {
data := []byte("The quick brown fox jumps over the lazy dog")
mid := len(data) / 2
part1 := data[:mid]
part2 := data[mid:]
var poly uint64 = crc64NVME
tab := crc64NVMETable
crc1 := crc64.Checksum(part1, tab)
crc2 := crc64.Checksum(part2, tab)
combined := crc64Combine(poly, crc1, crc2, int64(len(part2)))
full := crc64.Checksum(data, tab)
if combined != full {
t.Errorf("crc64Combine failed: got %016x, want %016x", combined, full)
}
}

View File

@@ -26,7 +26,6 @@ import (
"hash/crc32"
"hash/crc64"
"io"
"math/bits"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/versity/versitygw/s3err"
@@ -89,7 +88,7 @@ func NewHashReader(r io.Reader, expectedSum string, ht HashType) (*HashReader, e
case HashTypeCRC32C:
hash = crc32.New(crc32.MakeTable(crc32.Castagnoli))
case HashTypeCRC64NVME:
hash = crc64.New(crc64.MakeTable(bits.Reverse64(0xad93d23594c93659)))
hash = crc64.New(crc64NVMETable)
case HashTypeNone:
hash = noop{}
default:
@@ -185,7 +184,7 @@ func (hr *HashReader) Type() HashType {
return hr.hashType
}
// Md5SumString converts the hash bytes to the string checksum value
// Base64SumString converts the hash bytes to the b64 encoded string checksum value
func Base64SumString(b []byte) string {
return base64.StdEncoding.EncodeToString(b)
}
@@ -198,6 +197,108 @@ func (n noop) Reset() {}
func (n noop) Size() int { return 0 }
func (n noop) BlockSize() int { return 1 }
// IsChecksumComposable tests if the final foll object crc can be calculated
// based on the part crc values.
func IsChecksumComposable(algo types.ChecksumAlgorithm) bool {
switch algo {
case types.ChecksumAlgorithmCrc32, types.ChecksumAlgorithmCrc32c, types.ChecksumAlgorithmCrc64nvme:
return true
default:
return false
}
}
// AddCRCChecksum calculates the composite CRC checksum after adding the part crc.
// Only CRC32, CRC32C, and CRC64NVME are supported. The input checksums must be base64-encoded strings.
func AddCRCChecksum(algo types.ChecksumAlgorithm, crc, partCrc string, partLen int64) (string, error) {
switch algo {
case types.ChecksumAlgorithmCrc32:
data, err := base64.StdEncoding.DecodeString(partCrc)
if err != nil {
return "", fmt.Errorf("base64 decode partCrc: %w", err)
}
if len(data) != 4 {
return "", fmt.Errorf("invalid crc32 part checksum length: %d", len(data))
}
currentCRC, err := base64.StdEncoding.DecodeString(crc)
if err != nil {
return "", fmt.Errorf("base64 decode crc: %w", err)
}
if len(currentCRC) != 4 {
return "", fmt.Errorf("invalid crc32 checksum length: %d", len(currentCRC))
}
currentVal := uint32(currentCRC[0])<<24 | uint32(currentCRC[1])<<16 | uint32(currentCRC[2])<<8 | uint32(currentCRC[3])
val := uint32(data[0])<<24 | uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3])
composite := crc32Combine(crc32.IEEE, currentVal, val, partLen)
out := []byte{
byte(composite >> 24),
byte(composite >> 16),
byte(composite >> 8),
byte(composite),
}
return base64.StdEncoding.EncodeToString(out), nil
case types.ChecksumAlgorithmCrc32c:
data, err := base64.StdEncoding.DecodeString(partCrc)
if err != nil {
return "", fmt.Errorf("base64 decode partCrc: %w", err)
}
if len(data) != 4 {
return "", fmt.Errorf("invalid crc32 part checksum length: %d", len(data))
}
currentCRC, err := base64.StdEncoding.DecodeString(crc)
if err != nil {
return "", fmt.Errorf("base64 decode crc: %w", err)
}
if len(currentCRC) != 4 {
return "", fmt.Errorf("invalid crc32 checksum length: %d", len(currentCRC))
}
currentVal := uint32(currentCRC[0])<<24 | uint32(currentCRC[1])<<16 | uint32(currentCRC[2])<<8 | uint32(currentCRC[3])
val := uint32(data[0])<<24 | uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3])
composite := crc32Combine(crc32.Castagnoli, currentVal, val, partLen)
// Convert composite to big-endian bytes
out := []byte{
byte(composite >> 24),
byte(composite >> 16),
byte(composite >> 8),
byte(composite),
}
return base64.StdEncoding.EncodeToString(out), nil
case types.ChecksumAlgorithmCrc64nvme:
data, err := base64.StdEncoding.DecodeString(partCrc)
if err != nil {
return "", fmt.Errorf("base64 decode partCrc: %w", err)
}
if len(data) != 8 {
return "", fmt.Errorf("invalid crc64 part checksum length: %d", len(data))
}
currentCRC, err := base64.StdEncoding.DecodeString(crc)
if err != nil {
return "", fmt.Errorf("base64 decode crc: %w", err)
}
if len(currentCRC) != 8 {
return "", fmt.Errorf("invalid crc64 checksum length: %d", len(currentCRC))
}
currentVal := uint64(currentCRC[0])<<56 | uint64(currentCRC[1])<<48 | uint64(currentCRC[2])<<40 | uint64(currentCRC[3])<<32 |
uint64(currentCRC[4])<<24 | uint64(currentCRC[5])<<16 | uint64(currentCRC[6])<<8 | uint64(currentCRC[7])
val := uint64(data[0])<<56 | uint64(data[1])<<48 | uint64(data[2])<<40 | uint64(data[3])<<32 |
uint64(data[4])<<24 | uint64(data[5])<<16 | uint64(data[6])<<8 | uint64(data[7])
composite := crc64Combine(crc64NVME, currentVal, val, partLen)
out := []byte{
byte(composite >> 56), byte(composite >> 48), byte(composite >> 40), byte(composite >> 32),
byte(composite >> 24), byte(composite >> 16), byte(composite >> 8), byte(composite),
}
return base64.StdEncoding.EncodeToString(out), nil
default:
return "", fmt.Errorf("composite checksum not supported for algorithm: %v", algo)
}
}
// NewCompositeChecksumReader initializes a composite checksum
// processor, which decodes and validates the provided
// checksums and returns the final checksum based on

View File

@@ -0,0 +1,120 @@
// Copyright 2025 Versity Software
// This file is licensed under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package utils
import (
"encoding/base64"
"hash/crc32"
"hash/crc64"
"testing"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
)
func TestAddCRCChecksum_CRC32(t *testing.T) {
data := []byte("this is a test buffer for crc32")
mid := len(data) / 2
part1 := data[:mid]
part2 := data[mid:]
crc1 := crc32.Checksum(part1, crc32.IEEETable)
crc2 := crc32.Checksum(part2, crc32.IEEETable)
crcFull := crc32.Checksum(data, crc32.IEEETable)
crc1b := []byte{byte(crc1 >> 24), byte(crc1 >> 16), byte(crc1 >> 8), byte(crc1)}
crc2b := []byte{byte(crc2 >> 24), byte(crc2 >> 16), byte(crc2 >> 8), byte(crc2)}
crc1b64 := base64.StdEncoding.EncodeToString(crc1b)
crc2b64 := base64.StdEncoding.EncodeToString(crc2b)
combined, err := AddCRCChecksum(types.ChecksumAlgorithmCrc32, crc1b64, crc2b64, int64(len(part2)))
if err != nil {
t.Fatalf("AddCRCChecksum failed: %v", err)
}
combinedBytes, err := base64.StdEncoding.DecodeString(combined)
if err != nil {
t.Fatalf("base64 decode failed: %v", err)
}
combinedVal := uint32(combinedBytes[0])<<24 | uint32(combinedBytes[1])<<16 | uint32(combinedBytes[2])<<8 | uint32(combinedBytes[3])
if combinedVal != crcFull {
t.Errorf("CRC32 combine mismatch: got %x, want %x", combinedVal, crcFull)
}
}
func TestAddCRCChecksum_CRC32c(t *testing.T) {
data := []byte("this is a test buffer for crc32c")
mid := len(data) / 2
part1 := data[:mid]
part2 := data[mid:]
castagnoli := crc32.MakeTable(crc32.Castagnoli)
crc1 := crc32.Checksum(part1, castagnoli)
crc2 := crc32.Checksum(part2, castagnoli)
crcFull := crc32.Checksum(data, castagnoli)
crc1b := []byte{byte(crc1 >> 24), byte(crc1 >> 16), byte(crc1 >> 8), byte(crc1)}
crc2b := []byte{byte(crc2 >> 24), byte(crc2 >> 16), byte(crc2 >> 8), byte(crc2)}
crc1b64 := base64.StdEncoding.EncodeToString(crc1b)
crc2b64 := base64.StdEncoding.EncodeToString(crc2b)
combined, err := AddCRCChecksum(types.ChecksumAlgorithmCrc32c, crc1b64, crc2b64, int64(len(part2)))
if err != nil {
t.Fatalf("AddCRCChecksum failed: %v", err)
}
combinedBytes, err := base64.StdEncoding.DecodeString(combined)
if err != nil {
t.Fatalf("base64 decode failed: %v", err)
}
combinedVal := uint32(combinedBytes[0])<<24 | uint32(combinedBytes[1])<<16 | uint32(combinedBytes[2])<<8 | uint32(combinedBytes[3])
if combinedVal != crcFull {
t.Errorf("CRC32c combine mismatch: got %x, want %x", combinedVal, crcFull)
}
}
func TestAddCRCChecksum_CRC64NVME(t *testing.T) {
data := []byte("this is a test buffer for crc64nvme")
mid := len(data) / 2
part1 := data[:mid]
part2 := data[mid:]
table := crc64NVMETable
crc1 := crc64.Checksum(part1, table)
crc2 := crc64.Checksum(part2, table)
crcFull := crc64.Checksum(data, table)
crc1b := []byte{
byte(crc1 >> 56), byte(crc1 >> 48), byte(crc1 >> 40), byte(crc1 >> 32),
byte(crc1 >> 24), byte(crc1 >> 16), byte(crc1 >> 8), byte(crc1),
}
crc2b := []byte{
byte(crc2 >> 56), byte(crc2 >> 48), byte(crc2 >> 40), byte(crc2 >> 32),
byte(crc2 >> 24), byte(crc2 >> 16), byte(crc2 >> 8), byte(crc2),
}
crc1b64 := base64.StdEncoding.EncodeToString(crc1b)
crc2b64 := base64.StdEncoding.EncodeToString(crc2b)
combined, err := AddCRCChecksum(types.ChecksumAlgorithmCrc64nvme, crc1b64, crc2b64, int64(len(part2)))
if err != nil {
t.Fatalf("AddCRCChecksum failed: %v", err)
}
combinedBytes, err := base64.StdEncoding.DecodeString(combined)
if err != nil {
t.Fatalf("base64 decode failed: %v", err)
}
combinedVal := uint64(combinedBytes[0])<<56 | uint64(combinedBytes[1])<<48 | uint64(combinedBytes[2])<<40 | uint64(combinedBytes[3])<<32 |
uint64(combinedBytes[4])<<24 | uint64(combinedBytes[5])<<16 | uint64(combinedBytes[6])<<8 | uint64(combinedBytes[7])
if combinedVal != crcFull {
t.Errorf("CRC64NVME combine mismatch: got %x, want %x", combinedVal, crcFull)
}
}