Files
versitygw/s3api/utils/csum-reader_test.go
niksis02 d2fa265fb8 feat: support sha512, md5, xxhash3, xxhash64, xxhash128 data integrity checksums
Integrate the new S3 checksum types in the gateway, including `SHA512`, `MD5`, `XXHASH64`, `XXHASH3`, and `XXHASH128`. This adds checksum calculation, validation, schema handling, and test coverage for the expanded checksum support.

These external packages have been used:
- `github.com/zeebo/xxh3` for `XXHASH3` and `XXHASH128`
- `github.com/cespare/xxhash/v2` for `XXHASH64`

Adjust integration tests because `aws-sdk-go-v2/service/s3` does not support automatic checksum calculation for the new checksum algorithms and returns an SDK-level error when only the checksum algorithm is provided. Only precalculated checksum values are acceptable for these checksum types.

References:
- `https://github.com/aws/aws-sdk-go-v2/issues/3404`
- `https://github.com/aws/aws-sdk-go-v2/issues/3403`
2026-05-04 08:50:39 -07:00

220 lines
7.7 KiB
Go

// Copyright 2025 Versity Software
// This file is licensed under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package utils
import (
"bytes"
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"encoding/base64"
"hash"
"hash/crc32"
"hash/crc64"
"io"
"testing"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/cespare/xxhash/v2"
"github.com/stretchr/testify/assert"
"github.com/zeebo/xxh3"
)
func TestAddCRCChecksum_CRC32(t *testing.T) {
data := []byte("this is a test buffer for crc32")
mid := len(data) / 2
part1 := data[:mid]
part2 := data[mid:]
crc1 := crc32.Checksum(part1, crc32.IEEETable)
crc2 := crc32.Checksum(part2, crc32.IEEETable)
crcFull := crc32.Checksum(data, crc32.IEEETable)
crc1b := []byte{byte(crc1 >> 24), byte(crc1 >> 16), byte(crc1 >> 8), byte(crc1)}
crc2b := []byte{byte(crc2 >> 24), byte(crc2 >> 16), byte(crc2 >> 8), byte(crc2)}
crc1b64 := base64.StdEncoding.EncodeToString(crc1b)
crc2b64 := base64.StdEncoding.EncodeToString(crc2b)
combined, err := AddCRCChecksum(types.ChecksumAlgorithmCrc32, crc1b64, crc2b64, int64(len(part2)))
if err != nil {
t.Fatalf("AddCRCChecksum failed: %v", err)
}
combinedBytes, err := base64.StdEncoding.DecodeString(combined)
if err != nil {
t.Fatalf("base64 decode failed: %v", err)
}
combinedVal := uint32(combinedBytes[0])<<24 | uint32(combinedBytes[1])<<16 | uint32(combinedBytes[2])<<8 | uint32(combinedBytes[3])
if combinedVal != crcFull {
t.Errorf("CRC32 combine mismatch: got %x, want %x", combinedVal, crcFull)
}
}
func TestAddCRCChecksum_CRC32c(t *testing.T) {
data := []byte("this is a test buffer for crc32c")
mid := len(data) / 2
part1 := data[:mid]
part2 := data[mid:]
castagnoli := crc32.MakeTable(crc32.Castagnoli)
crc1 := crc32.Checksum(part1, castagnoli)
crc2 := crc32.Checksum(part2, castagnoli)
crcFull := crc32.Checksum(data, castagnoli)
crc1b := []byte{byte(crc1 >> 24), byte(crc1 >> 16), byte(crc1 >> 8), byte(crc1)}
crc2b := []byte{byte(crc2 >> 24), byte(crc2 >> 16), byte(crc2 >> 8), byte(crc2)}
crc1b64 := base64.StdEncoding.EncodeToString(crc1b)
crc2b64 := base64.StdEncoding.EncodeToString(crc2b)
combined, err := AddCRCChecksum(types.ChecksumAlgorithmCrc32c, crc1b64, crc2b64, int64(len(part2)))
if err != nil {
t.Fatalf("AddCRCChecksum failed: %v", err)
}
combinedBytes, err := base64.StdEncoding.DecodeString(combined)
if err != nil {
t.Fatalf("base64 decode failed: %v", err)
}
combinedVal := uint32(combinedBytes[0])<<24 | uint32(combinedBytes[1])<<16 | uint32(combinedBytes[2])<<8 | uint32(combinedBytes[3])
if combinedVal != crcFull {
t.Errorf("CRC32c combine mismatch: got %x, want %x", combinedVal, crcFull)
}
}
func TestAddCRCChecksum_CRC64NVME(t *testing.T) {
data := []byte("this is a test buffer for crc64nvme")
mid := len(data) / 2
part1 := data[:mid]
part2 := data[mid:]
table := crc64NVMETable
crc1 := crc64.Checksum(part1, table)
crc2 := crc64.Checksum(part2, table)
crcFull := crc64.Checksum(data, table)
crc1b := []byte{
byte(crc1 >> 56), byte(crc1 >> 48), byte(crc1 >> 40), byte(crc1 >> 32),
byte(crc1 >> 24), byte(crc1 >> 16), byte(crc1 >> 8), byte(crc1),
}
crc2b := []byte{
byte(crc2 >> 56), byte(crc2 >> 48), byte(crc2 >> 40), byte(crc2 >> 32),
byte(crc2 >> 24), byte(crc2 >> 16), byte(crc2 >> 8), byte(crc2),
}
crc1b64 := base64.StdEncoding.EncodeToString(crc1b)
crc2b64 := base64.StdEncoding.EncodeToString(crc2b)
combined, err := AddCRCChecksum(types.ChecksumAlgorithmCrc64nvme, crc1b64, crc2b64, int64(len(part2)))
if err != nil {
t.Fatalf("AddCRCChecksum failed: %v", err)
}
combinedBytes, err := base64.StdEncoding.DecodeString(combined)
if err != nil {
t.Fatalf("base64 decode failed: %v", err)
}
combinedVal := uint64(combinedBytes[0])<<56 | uint64(combinedBytes[1])<<48 | uint64(combinedBytes[2])<<40 | uint64(combinedBytes[3])<<32 |
uint64(combinedBytes[4])<<24 | uint64(combinedBytes[5])<<16 | uint64(combinedBytes[6])<<8 | uint64(combinedBytes[7])
if combinedVal != crcFull {
t.Errorf("CRC64NVME combine mismatch: got %x, want %x", combinedVal, crcFull)
}
}
func base64HashForTest(h hash.Hash, data []byte) string {
h.Write(data)
return base64.StdEncoding.EncodeToString(h.Sum(nil))
}
func TestNewHashReader_NewChecksumAlgorithms(t *testing.T) {
data := []byte("checksum payload")
tests := []struct {
name string
hashType HashType
hasher hash.Hash
}{
{name: "md5", hashType: HashTypeMd5, hasher: md5.New()},
{name: "sha1", hashType: HashTypeSha1, hasher: sha1.New()},
{name: "sha256", hashType: HashTypeSha256, hasher: sha256.New()},
{name: "sha512", hashType: HashTypeSha512, hasher: sha512.New()},
{name: "crc32", hashType: HashTypeCRC32, hasher: crc32.NewIEEE()},
{name: "crc32c", hashType: HashTypeCRC32C, hasher: crc32.New(crc32.MakeTable(crc32.Castagnoli))},
{name: "crc64nvme", hashType: HashTypeCRC64NVME, hasher: crc64.New(crc64NVMETable)},
{name: "xxhash64", hashType: HashTypeXXHASH64, hasher: xxhash.New()},
{name: "xxhash3", hashType: HashTypeXXHASH3, hasher: xxh3.New()},
{name: "xxhash128", hashType: HashTypeXXHASH128, hasher: xxh3.New128()},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
expected := base64HashForTest(tt.hasher, data)
rdr, err := NewHashReader(bytes.NewReader(data), expected, tt.hashType)
if !assert.NoError(t, err) {
return
}
_, err = io.Copy(io.Discard, rdr)
assert.NoError(t, err)
assert.Equal(t, expected, rdr.Sum())
})
}
}
func TestNewCompositeChecksumReader_NewChecksumAlgorithms(t *testing.T) {
part1 := []byte("part one")
part2 := []byte("part two")
tests := []struct {
name string
hashType HashType
newHasher func() hash.Hash
}{
{name: "md5", hashType: HashTypeMd5, newHasher: md5.New},
{name: "sha1", hashType: HashTypeSha1, newHasher: sha1.New},
{name: "sha256", hashType: HashTypeSha256, newHasher: sha256.New},
{name: "sha512", hashType: HashTypeSha512, newHasher: sha512.New},
{name: "crc32", hashType: HashTypeCRC32, newHasher: func() hash.Hash { return crc32.NewIEEE() }},
{name: "crc32c", hashType: HashTypeCRC32C, newHasher: func() hash.Hash { return crc32.New(crc32.MakeTable(crc32.Castagnoli)) }},
{name: "xxhash64", hashType: HashTypeXXHASH64, newHasher: func() hash.Hash { return xxhash.New() }},
{name: "xxhash3", hashType: HashTypeXXHASH3, newHasher: func() hash.Hash { return xxh3.New() }},
{name: "xxhash128", hashType: HashTypeXXHASH128, newHasher: func() hash.Hash { return xxh3.New128() }},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
part1Sum := base64HashForTest(tt.newHasher(), part1)
part2Sum := base64HashForTest(tt.newHasher(), part2)
composite, err := NewCompositeChecksumReader(tt.hashType)
if !assert.NoError(t, err) {
return
}
assert.NoError(t, composite.Process(part1Sum))
assert.NoError(t, composite.Process(part2Sum))
part1Bytes, err := base64.StdEncoding.DecodeString(part1Sum)
if !assert.NoError(t, err) {
return
}
part2Bytes, err := base64.StdEncoding.DecodeString(part2Sum)
if !assert.NoError(t, err) {
return
}
expectedHasher := tt.newHasher()
expectedHasher.Write(part1Bytes)
expectedHasher.Write(part2Bytes)
expected := base64.StdEncoding.EncodeToString(expectedHasher.Sum(nil))
assert.Equal(t, expected, composite.Sum())
})
}
}