Files
scylladb/utils/stream_compressor.hh
Michał Chojnowski 185a032044 utils/stream_compressor: allocate memory for zstd compressors externally
The default and recommended way to use zstd compressors is to let
zstd allocate and free memory for compressors on its own.

That's what we did for zstd compressors used in RPC compression.
But it turns out that it generates allocation patterns we dislike.

We expected zstd not to generate allocations after the context object
is initialized, but it turns out that it tries to downsize the context
sometimes (by reallocation). We don't want that because the allocations
generated by zstd are large (1 MiB with the parameters we use),
so repeating them periodically stresses the reclaimer.

We can avoid this by using the "static context" API of zstd,
in which the memory for context is allocated manually by the user
of the library. In this mode, zstd doesn't allocate anything
on its own.

The implementation details of this patch adds a consideration for
forward compatibility: later versions of Scylla can't use a
window size greater than the one we hardcoded in this patch
when talking to the old version of the decompressor.

(This is not a problem, since those compressors are only used
for RPC compression at the moment, where cross-version communication
can be prevented by bumping COMPRESSOR_NAME. But it's something
that the developer who changes the window size must _remember_ to do).

Fixes #24160
Fixes #24183

Closes scylladb/scylladb#24161
2025-05-27 12:43:11 +03:00

188 lines
8.5 KiB
C++

/*
* Copyright (C) 2023-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include <seastar/rpc/rpc_types.hh>
#include <vector>
#include <memory>
#include <span>
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>
#define LZ4_STATIC_LINKING_ONLY
#include <lz4.h>
namespace utils {
// The pairs (zstd_dstream, zstd_cstream) and (raw_stream, raw_stream)
// implement a common compressor interface, with similar semantics as the streaming interface of zstd.
// (That's why zstd_cstream and zstd_dstream are only thin wrappers, and why all take ZSTD_inBuffer/ZSTD_outBuffer args).
// The main difference with zstd's interface is that we communicate errors by exception rather than
// by error code.
struct stream_compressor {
virtual size_t compress(ZSTD_outBuffer* out, ZSTD_inBuffer* in, ZSTD_EndDirective end) = 0;
// After compress() throws, the compressor is left in an undefined state.
// In this state, it mustn't be used for compression.
// However, reset() can be called to reset the internal state and recycle the compressor.
virtual void reset() noexcept = 0;
virtual ~stream_compressor() {}
};
struct stream_decompressor {
virtual void decompress(ZSTD_outBuffer* out, ZSTD_inBuffer* in, bool end_of_frame) = 0;
// After decompress() throws, the decompressor is left in an undefined state.
// In this state, it mustn't be used for decompression.
// However, reset() can be called to reset the internal state and recycle the decompressor.
virtual void reset() noexcept = 0;
virtual ~stream_decompressor() {}
};
// Implements a streaming compression interface similar to ZSTD_CStream/ZSTD_DStream,
// but the "compression" is just memcpy.
struct raw_stream final : public stream_compressor, public stream_decompressor {
static size_t copy(ZSTD_outBuffer* out, ZSTD_inBuffer* in);
void decompress(ZSTD_outBuffer* out, ZSTD_inBuffer* in, bool) override;
size_t compress(ZSTD_outBuffer* out, ZSTD_inBuffer* in, ZSTD_EndDirective end) override;
void reset() noexcept override;
};
// Thin wrapper over ZSTD_DStream.
class zstd_dstream final : public stream_decompressor {
struct ctx_deleter {
void operator()(ZSTD_DStream* stream) const noexcept {
free(stream);
}
};
std::unique_ptr<ZSTD_DStream, ctx_deleter> _ctx;
const ZSTD_DDict* _dict;
bool _in_progress = false;
public:
zstd_dstream();
void reset() noexcept override;
void decompress(ZSTD_outBuffer* out, ZSTD_inBuffer* in, bool end_of_frame) override;
// The passed dict must live until it is unset by another set_dict(). (Or until the compressor is destroyed).
// The pointer can be null, this will unset the current dict.
void set_dict(const ZSTD_DDict* dict);
};
// Thin wrapper over ZSTD_CStream.
class zstd_cstream final : public stream_compressor {
struct ctx_deleter {
void operator()(ZSTD_CStream* stream) const noexcept {
free(stream);
}
};
std::unique_ptr<ZSTD_CStream, ctx_deleter> _ctx;
const ZSTD_CDict* _dict;
public:
zstd_cstream();
size_t compress(ZSTD_outBuffer* out, ZSTD_inBuffer* in, ZSTD_EndDirective end) override;
void reset() noexcept override;
// The passed dict must live until it is unset by another set_dict(). (Or until the compressor is destroyed).
// The pointer can be null, this will unset the current dict.
void set_dict(const ZSTD_CDict* dict);
};
seastar::rpc::snd_buf compress_impl(size_t head_space, const seastar::rpc::snd_buf& data, stream_compressor& compressor, bool end_of_frame, size_t chunk_size);
seastar::rpc::rcv_buf decompress_impl(const seastar::rpc::rcv_buf& data, stream_decompressor& decompressor, bool end_of_frame, size_t chunk_size);
uint32_t crc_impl(const seastar::rpc::snd_buf& data) noexcept;
uint32_t crc_impl(const seastar::rpc::rcv_buf& data) noexcept;
// Size of the history buffer maintained by both sides of the compressed connection.
// Governs the memory usage and effectiveness of streaming LZ4 compression.
//
// There is no value in making it greater than 64 kiB, because LZ4 doesn't support
// greater history sizes, at least in the official releases of LZ4.
// But it might be set smaller to reduce memory usage at the cost of lowered
// compression strength.
//
// If LZ4 streaming compression turns out effective, we should make this live-updatable
// and check the effectiveness of various sizes in practice.
//
// Must be equal on both sides of the connection. Currently this is achieved by making it
// a constant. If we want to make it live-updatable, changes in window size will have to
// be made a part of lz4_cstream's/lz4_dstream's internal protocol.
constexpr size_t max_lz4_window_size = 64 * 1024;
// Implements a streaming compression interface similar to ZSTD_CStream.
class lz4_cstream final : public stream_compressor {
// A ring buffer with recent stream history.
//
// To implement streaming compression, LZ4 doesn't copy the history to its own buffer.
// Instead, we maintain our own history buffer, and the LZ4 compressor only stores a view
// of the most recent contiguous 64 kiB chunk from that buffer.
//
// Thus the "contiguity" during decompression has to match the "contiguity" during compression.
// That is, for every block, the contiguous sum (up to 64 kiB) of most recent views passed to
// LZ4_decompress_safe_continue must be at least as long as the contiguous sum (up to 64 kiB)
// of most recent views passed to LZ4_compress_fast_continue for that same block.
//
// Thus there are some rules/schemes which have to be obeyed when maintaining the history buffer.
//
// We use a scheme which LZ4 calls "synchronized".
// The two history ringbuffers on both sides of the stream are in "lockstep",
// meaning that every compression call with compressor's _buf as source,
// has a matching decompression call with decompressor's _buf as target,
// with the same length and offset in _buf.
std::vector<char> _buf;
// The current position in the ringbuffer _buf. New input will be appended at this position.
size_t _buf_pos = 0;
// This pair describes the compressed data in `_lz4_scratch`, which is pending output.
// We have to copy it out before we can compress new data to the scratch buffer.
size_t _scratch_beg = 0;
size_t _scratch_end = 0;
LZ4_stream_t _ctx;
const LZ4_stream_t* _dict = nullptr;
public:
lz4_cstream(size_t window_size = max_lz4_window_size);
void reset() noexcept override;
void resetFast() noexcept;
// When new data arrives in `in`, we copy an arbitrary amount of it to `_buf`,
// (the amount is arbitrary, but it has to fit contiguously in `_buf`),
// compress the new block from `_buf` to `_lz4_scratch`,
// then we copy everything from `_lz4_scratch` to `out`.
// Repeat until `in` is empty.
size_t compress(ZSTD_outBuffer* out, ZSTD_inBuffer* in, ZSTD_EndDirective end) override;
// The passed dict must live until it is unset by another set_dict(). (Or until the compressor is destroyed).
// The pointer can be null, this will unset the current dict.
void set_dict(const LZ4_stream_t* dict);
};
// Implements a streaming compression interface similar to ZSTD_DStream.
class lz4_dstream final : public stream_decompressor {
// See the _buf comment in lz4_cstream.
std::vector<char> _buf;
// The write position in `_buf`. New input will be decompressed to this offset.
// It's updated in lockstep with `_buf_pos` of the compressor.
size_t _buf_end = 0;
// The read position in `_buf`. The chunk between `_buf_beg` and `_buf_end` is the data
// that was decompressed, but hasn't been copied to caller's `out` yet.
// We have to copy it out before we can overwrite it with new decompressed data.
size_t _buf_beg = 0;
// The amount of data accumulated in `_lz4_scratch`. Data accumulates in `_lz4_scratch` until
// a full LZ4 block (with prepended length) is available — only then we can decompress it to `_buf.
size_t _scratch_pos = 0;
LZ4_streamDecode_t _ctx;
std::span<const std::byte> _dict;
public:
lz4_dstream(size_t window_size = max_lz4_window_size);
void reset() noexcept override;
void decompress(ZSTD_outBuffer* out, ZSTD_inBuffer* in, bool has_more_fragments) override;
// The passed dict must live until it is unset by another set_dict(). (Or until the decompressor is destroyed).
// The span can be empty, this will unset the current dict.
void set_dict(std::span<const std::byte> dict);
};
} // namespace utils