Files
scylladb/sstables/compressor.hh
Nikos Dragazis 76b2d0f961 db: config: Add accessor for sstable_compression_user_table_options
The `sstable_compression_user_table_options` config option determines
the default compression settings for user tables.

In patch 2fc812a1b9, the default value of this option was changed from
LZ4 to LZ4WithDicts and a fallback logic was introduced during startup
to temporarily revert the option to LZ4 until the dictionary compression
feature is enabled.

Replace this fallback logic with an accessor that returns the correct
settings depending on the feature flag. This is cleaner and more
consistent with the way we handle the `sstable_format` option, where the
same problem appears (see `get_preferred_sstable_version()`).

As a consequence, the configuration option must always be accessed
through this accessor. Add a comment to point this out.

Signed-off-by: Nikos Dragazis <nikolaos.dragazis@scylladb.com>
2026-01-13 18:30:38 +02:00

150 lines
5.0 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include <map>
#include <optional>
#include <seastar/core/future.hh>
#include <seastar/core/sstring.hh>
#include <seastar/util/bool_class.hh>
#include "seastarx.hh"
class compression_parameters;
class compressor {
public:
enum class algorithm {
lz4,
lz4_with_dicts,
zstd,
zstd_with_dicts,
snappy,
deflate,
none,
};
virtual ~compressor() {}
/**
* Unpacks data in "input" to output. If output_len is of insufficient size,
* exception is thrown. I.e. you should keep track of the uncompressed size.
*/
virtual size_t uncompress(const char* input, size_t input_len, char* output,
size_t output_len) const = 0;
/**
* Packs data in "input" to output. If output_len is of insufficient size,
* exception is thrown. Maximum required size is obtained via "compress_max_size"
*/
virtual size_t compress(const char* input, size_t input_len, char* output,
size_t output_len) const = 0;
/**
* Returns the maximum output size for compressing data on "input_len" size.
*/
virtual size_t compress_max_size(size_t input_len) const = 0;
/**
* Returns metadata which must be written together with the compressed
* data and used to construct a corresponding decompressor.
*/
virtual std::map<sstring, sstring> options() const;
static bool is_hidden_option_name(std::string_view sv);
std::string name() const;
virtual algorithm get_algorithm() const = 0;
virtual std::optional<unsigned> get_dict_owner_for_test() const;
using ptr_type = std::unique_ptr<compressor>;
};
using compressor_ptr = compressor::ptr_type;
compressor_ptr make_lz4_sstable_compressor_for_tests();
// Per-table compression options, parsed and validated.
//
// Compression options are configured through the JSON-like `compression` entry in the schema.
// The CQL layer parses the text of that entry to a `map<string, string>`.
// A `compression_parameters` object is constructed from this map.
// and the passed keys and values are parsed and validated in the constructor.
// This object can be then used to create a `compressor` objects for sstable readers and writers.
class compression_parameters {
public:
using algorithm = compressor::algorithm;
static constexpr std::string_view name_prefix = "org.apache.cassandra.io.compress.";
static constexpr int32_t DEFAULT_CHUNK_LENGTH = 4 * 1024;
static constexpr double DEFAULT_CRC_CHECK_CHANCE = 1.0;
static const sstring SSTABLE_COMPRESSION;
static const sstring CHUNK_LENGTH_KB;
static const sstring CHUNK_LENGTH_KB_ERR;
static const sstring CRC_CHECK_CHANCE;
private:
algorithm _algorithm;
std::optional<int> _chunk_length;
std::optional<double> _crc_check_chance;
std::optional<int> _zstd_compression_level;
public:
compression_parameters();
compression_parameters(algorithm);
compression_parameters(const std::map<sstring, sstring>& options);
~compression_parameters();
int32_t chunk_length() const { return _chunk_length.value_or(int(DEFAULT_CHUNK_LENGTH)); }
double crc_check_chance() const { return _crc_check_chance.value_or(double(DEFAULT_CRC_CHECK_CHANCE)); }
algorithm get_algorithm() const { return _algorithm; }
std::optional<int> zstd_compression_level() const { return _zstd_compression_level; }
using dicts_feature_enabled = bool_class<struct dicts_feature_enabled_tag>;
void validate(dicts_feature_enabled) const;
std::map<sstring, sstring> get_options() const;
bool compression_enabled() const {
return _algorithm != algorithm::none;
}
bool uses_dictionary_compressor() const {
return _algorithm == algorithm::lz4_with_dicts
|| _algorithm == algorithm::zstd_with_dicts;
}
static compression_parameters no_compression() {
return compression_parameters(algorithm::none);
}
bool operator==(const compression_parameters&) const = default;
static std::string_view algorithm_to_name(algorithm);
static std::string algorithm_to_qualified_name(algorithm);
static algorithm non_dict_equivalent(algorithm algo) {
switch (algo) {
case algorithm::lz4_with_dicts:
return algorithm::lz4;
case algorithm::zstd_with_dicts:
return algorithm::zstd;
default:
return algo;
}
}
private:
static void validate_options(const std::map<sstring, sstring>&);
static algorithm name_to_algorithm(std::string_view name);
};
// Stream operator for boost::program_options support
std::istream& operator>>(std::istream& is, compression_parameters& cp);
template <>
struct fmt::formatter<compression_parameters> : fmt::formatter<std::string_view> {
auto format(const compression_parameters& cp, fmt::format_context& ctx) const -> decltype(ctx.out()) {
return fmt::format_to(ctx.out(), "{}", cp.get_options());
}
};