Streaming bitrot verification support (#7004)

This commit is contained in:
Krishna Srinivas
2019-01-17 04:58:18 -08:00
committed by Harshavardhana
parent 94c52e3816
commit 98c950aacd
29 changed files with 882 additions and 571 deletions

View File

@@ -20,6 +20,7 @@ import (
"context"
"errors"
"hash"
"io"
"github.com/minio/highwayhash"
"github.com/minio/minio/cmd/logger"
@@ -38,19 +39,22 @@ const (
SHA256 BitrotAlgorithm = 1 + iota
// HighwayHash256 represents the HighwayHash-256 hash function
HighwayHash256
// HighwayHash256 represents the Streaming HighwayHash-256 hash function
HighwayHash256S
// BLAKE2b512 represents the BLAKE2b-512 hash function
BLAKE2b512
)
// DefaultBitrotAlgorithm is the default algorithm used for bitrot protection.
const (
DefaultBitrotAlgorithm = HighwayHash256
DefaultBitrotAlgorithm = HighwayHash256S
)
var bitrotAlgorithms = map[BitrotAlgorithm]string{
SHA256: "sha256",
BLAKE2b512: "blake2b",
HighwayHash256: "highwayhash256",
SHA256: "sha256",
BLAKE2b512: "blake2b",
HighwayHash256: "highwayhash256",
HighwayHash256S: "highwayhash256S",
}
// New returns a new hash.Hash calculating the given bitrot algorithm.
@@ -64,6 +68,9 @@ func (a BitrotAlgorithm) New() hash.Hash {
case HighwayHash256:
hh, _ := highwayhash.New(magicHighwayHash256Key) // New will never return error since key is 256 bit
return hh
case HighwayHash256S:
hh, _ := highwayhash.New(magicHighwayHash256Key) // New will never return error since key is 256 bit
return hh
default:
logger.CriticalIf(context.Background(), errors.New("Unsupported bitrot algorithm"))
return nil
@@ -109,86 +116,71 @@ func BitrotAlgorithmFromString(s string) (a BitrotAlgorithm) {
return
}
// To read bit-rot verified data.
type bitrotReader struct {
disk StorageAPI
volume string
filePath string
verifier *BitrotVerifier // Holds the bit-rot info
endOffset int64 // Affects the length of data requested in disk.ReadFile depending on Read()'s offset
buf []byte // Holds bit-rot verified data
}
// newBitrotReader returns bitrotReader.
// Note that the buffer is allocated later in Read(). This is because we will know the buffer length only
// during the bitrotReader.Read(). Depending on when parallelReader fails-over, the buffer length can be different.
func newBitrotReader(disk StorageAPI, volume, filePath string, algo BitrotAlgorithm, endOffset int64, sum []byte) *bitrotReader {
return &bitrotReader{
disk: disk,
volume: volume,
filePath: filePath,
verifier: &BitrotVerifier{algo, sum},
endOffset: endOffset,
buf: nil,
func newBitrotWriter(disk StorageAPI, volume, filePath string, length int64, algo BitrotAlgorithm, shardSize int64) io.Writer {
if algo == HighwayHash256S {
return newStreamingBitrotWriter(disk, volume, filePath, length, algo, shardSize)
}
return newWholeBitrotWriter(disk, volume, filePath, length, algo, shardSize)
}
// ReadChunk returns requested data.
func (b *bitrotReader) ReadChunk(offset int64, length int64) ([]byte, error) {
if b.buf == nil {
b.buf = make([]byte, b.endOffset-offset)
if _, err := b.disk.ReadFile(b.volume, b.filePath, offset, b.buf, b.verifier); err != nil {
ctx := context.Background()
logger.GetReqInfo(ctx).AppendTags("disk", b.disk.String())
logger.LogIf(ctx, err)
return nil, err
func newBitrotReader(disk StorageAPI, bucket string, filePath string, tillOffset int64, algo BitrotAlgorithm, sum []byte, shardSize int64) io.ReaderAt {
if algo == HighwayHash256S {
return newStreamingBitrotReader(disk, bucket, filePath, tillOffset, algo, shardSize)
}
return newWholeBitrotReader(disk, bucket, filePath, algo, tillOffset, sum)
}
// Close all the readers.
func closeBitrotReaders(rs []io.ReaderAt) {
for _, r := range rs {
if br, ok := r.(*streamingBitrotReader); ok {
br.Close()
}
}
if int64(len(b.buf)) < length {
logger.LogIf(context.Background(), errLessData)
return nil, errLessData
}
retBuf := b.buf[:length]
b.buf = b.buf[length:]
return retBuf, nil
}
// To calculate the bit-rot of the written data.
type bitrotWriter struct {
disk StorageAPI
volume string
filePath string
h hash.Hash
}
// newBitrotWriter returns bitrotWriter.
func newBitrotWriter(disk StorageAPI, volume, filePath string, algo BitrotAlgorithm) *bitrotWriter {
return &bitrotWriter{
disk: disk,
volume: volume,
filePath: filePath,
h: algo.New(),
// Close all the writers.
func closeBitrotWriters(ws []io.Writer) {
for _, w := range ws {
if bw, ok := w.(*streamingBitrotWriter); ok {
bw.Close()
}
}
}
// Append appends the data and while calculating the hash.
func (b *bitrotWriter) Append(buf []byte) error {
n, err := b.h.Write(buf)
if err != nil {
return err
}
if n != len(buf) {
logger.LogIf(context.Background(), errUnexpected)
return errUnexpected
}
if err = b.disk.AppendFile(b.volume, b.filePath, buf); err != nil {
logger.LogIf(context.Background(), err)
return err
// Returns hash sum for whole-bitrot, nil for streaming-bitrot.
func bitrotWriterSum(w io.Writer) []byte {
if bw, ok := w.(*wholeBitrotWriter); ok {
return bw.Sum(nil)
}
return nil
}
// Sum returns bit-rot sum.
func (b *bitrotWriter) Sum() []byte {
return b.h.Sum(nil)
// Verify if a file has bitrot error.
func bitrotCheckFile(disk StorageAPI, volume string, filePath string, tillOffset int64, algo BitrotAlgorithm, sum []byte, shardSize int64) (err error) {
buf := make([]byte, shardSize)
if algo != HighwayHash256S {
// For whole-file bitrot we don't need to read the entire file as the bitrot verify happens on the server side even if we read small buffer
_, err = disk.ReadFile(volume, filePath, 0, buf, NewBitrotVerifier(algo, sum))
return err
}
r := newStreamingBitrotReader(disk, volume, filePath, tillOffset, algo, shardSize)
defer closeBitrotReaders([]io.ReaderAt{r})
var offset int64
for {
if offset == tillOffset {
break
}
var n int
tmpBuf := buf
if int64(len(tmpBuf)) > (tillOffset - offset) {
tmpBuf = tmpBuf[:(tillOffset - offset)]
}
n, err = r.ReadAt(tmpBuf, offset)
if err != nil {
return err
}
offset += int64(n)
}
return nil
}