Files
scylladb/utils/shared_dict.hh
Michał Chojnowski 2bd393849c utils/hashers: add get_sha256()
Add a helper function which computes the SHA256 for a blob.
We will use it to compute identifiers for SSTable compression
dictionaries later.
2025-04-01 00:07:28 +02:00

57 lines
2.0 KiB
C++

/*
* Copyright (C) 2023-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>
#define LZ4_STATIC_LINKING_ONLY
#include <lz4.h>
#include "utils/UUID.hh"
#include <memory>
namespace utils {
// For performance reasons (cache pressure), it is desirable to have only
// one instance of a particular dictionary on a node.
//
// `shared_dict` takes a raw dictionary buffer (which preferably contains
// a dictionary in zstd format, but any content is fine), and wraps around
// it with compressor-specific dictionary types. (Each compressor attached
// some algorithm-specific hash indices and entropy tables to it).
//
// This way different compressors and decompressors can share the same
// raw dictionary buffer.
//
// Dictionaries are always read-only, so it's fine (and strongly preferable)
// to share this object between shards.
struct shared_dict {
struct dict_id {
uint64_t timestamp = 0;
UUID origin_node{};
std::array<std::byte, 32> content_sha256{};
bool operator==(const dict_id&) const = default;
};
dict_id id{};
std::vector<std::byte> data;
std::unique_ptr<ZSTD_DDict, decltype(&ZSTD_freeDDict)> zstd_ddict{nullptr, ZSTD_freeDDict};
std::unique_ptr<ZSTD_CDict, decltype(&ZSTD_freeCDict)> zstd_cdict{nullptr, ZSTD_freeCDict};
std::unique_ptr<LZ4_stream_t, decltype(&LZ4_freeStream)> lz4_cdict{nullptr, LZ4_freeStream};
std::span<const std::byte> lz4_ddict;
// I got burned by an LZ4 bug (`<` used instead of `<=`) once when dealing with exactly 64 kiB,
// prefixes, so I'm using 64 kiB - 1 because of the trauma.
// But 64 kiB would probably work for this use case too.
constexpr static size_t max_lz4_dict_size = 64 * 1024 - 1;
shared_dict() = default;
shared_dict(std::span<const std::byte> d, uint64_t timestamp, UUID origin_node, int zstd_compression_level = 1);
};
} // namespace utils