db/config: add an option that disables dict-aware sstable compressors in DDL statements

For reasons, we want to be able to disallow dictionary-aware compressors
in chosen deployments.

This patch adds a knob for that. When the knob is disabled,
dictionary-aware compressors will be rejected in the validation
stage of CREATE and ALTER statements.

Closes scylladb/scylladb#24355
This commit is contained in:
Michał Chojnowski
2025-06-02 18:02:10 +02:00
committed by Avi Kivity
parent 2d716f3ffe
commit 7d26d3c7cb
6 changed files with 110 additions and 10 deletions

View File

@@ -536,13 +536,17 @@ compression_parameters::compression_parameters(const std::map<sstring, sstring>&
}
}
void compression_parameters::validate(const gms::feature_service& fs) {
if (!fs.sstable_compression_dicts) {
if (_algorithm == algorithm::zstd_with_dicts || _algorithm == algorithm::lz4_with_dicts) {
void compression_parameters::validate(dicts_feature_enabled dicts_enabled, dicts_usage_allowed dicts_allowed) {
if (_algorithm == algorithm::zstd_with_dicts || _algorithm == algorithm::lz4_with_dicts) {
if (!dicts_enabled) {
throw std::runtime_error(std::format("sstable_compression {} can't be used before "
"all nodes are upgraded to a versions which supports it",
algorithm_to_name(_algorithm)));
}
if (!dicts_allowed) {
throw std::runtime_error(std::format("sstable_compression {} has been disabled by `sstable_compression_dictionaries_allow_in_ddl: false`",
algorithm_to_name(_algorithm)));
}
}
if (_chunk_length) {
auto chunk_length = _chunk_length.value();

View File

@@ -13,12 +13,9 @@
#include <seastar/core/future.hh>
#include <seastar/core/sstring.hh>
#include <seastar/util/bool_class.hh>
#include "seastarx.hh"
namespace gms {
class feature_service;
} // namespace gms
class compression_parameters;
class compressor {
@@ -108,7 +105,10 @@ public:
algorithm get_algorithm() const { return _algorithm; }
std::optional<int> zstd_compression_level() const { return _zstd_compression_level; }
void validate(const gms::feature_service&);
using dicts_feature_enabled = bool_class<struct dicts_feature_enabled_tag>;
using dicts_usage_allowed = bool_class<struct dicts_usage_allowed_tag>;
void validate(dicts_feature_enabled, dicts_usage_allowed);
std::map<sstring, sstring> get_options() const;
bool compression_enabled() const {

View File

@@ -23,6 +23,7 @@
#include "db/per_partition_rate_limit_options.hh"
#include "db/tablet_options.hh"
#include "utils/bloom_calculations.hh"
#include "db/config.hh"
#include <boost/algorithm/string/predicate.hpp>
@@ -135,7 +136,9 @@ void cf_prop_defs::validate(const data_dictionary::database db, sstring ks_name,
throw exceptions::configuration_exception(sstring("Missing sub-option '") + compression_parameters::SSTABLE_COMPRESSION + "' for the '" + KW_COMPRESSION + "' option.");
}
compression_parameters cp(*compression_options);
cp.validate(db.features());
cp.validate(
compression_parameters::dicts_feature_enabled(bool(db.features().sstable_compression_dicts)),
compression_parameters::dicts_usage_allowed(db.get_config().sstable_compression_dictionaries_allow_in_ddl()));
}
auto per_partition_rate_limit_options = get_per_partition_rate_limit_options(schema_extensions);

View File

@@ -1243,6 +1243,13 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, enable_sstables_mc_format(this, "enable_sstables_mc_format", value_status::Unused, true, "Enable SSTables 'mc' format to be used as the default file format. Deprecated, please use \"sstable_format\" instead.")
, enable_sstables_md_format(this, "enable_sstables_md_format", value_status::Unused, true, "Enable SSTables 'md' format to be used as the default file format. Deprecated, please use \"sstable_format\" instead.")
, sstable_format(this, "sstable_format", value_status::Used, "me", "Default sstable file format", {"md", "me"})
, sstable_compression_dictionaries_allow_in_ddl(this, "sstable_compression_dictionaries_allow_in_ddl", liveness::LiveUpdate, value_status::Used, true,
"Allows for configuring tables to use SSTable compression with shared dictionaries. "
"If the option is disabled, Scylla will reject CREATE and ALTER statements which try to set dictionary-based sstable compressors.\n"
"This is only enforced when this node validates a new DDL statement; disabling the option won't disable dictionary-based compression "
"on tables which already have it configured, and won't do anything to existing sstables.\n"
"To affect existing tables, you can ALTER them to a non-dictionary compressor, or disable dictionary compression "
"for the whole node through `sstable_compression_dictionaries_enable_writing`.")
, sstable_compression_dictionaries_enable_writing(this, "sstable_compression_dictionaries_enable_writing", liveness::LiveUpdate, value_status::Used, true,
"Enables SSTable compression with shared dictionaries (for tables which opt in). If set to false, this node won't write any new SSTables using dictionary compression.\n"
"Option meant not for regular usage, but for unforeseen problems that call for disabling dictionaries without modifying table schema.")

View File

@@ -436,6 +436,7 @@ public:
named_value<bool> enable_sstables_mc_format;
named_value<bool> enable_sstables_md_format;
named_value<sstring> sstable_format;
named_value<bool> sstable_compression_dictionaries_allow_in_ddl;
named_value<bool> sstable_compression_dictionaries_enable_writing;
named_value<float> sstable_compression_dictionaries_memory_budget_fraction;
named_value<float> sstable_compression_dictionaries_retrain_period_in_seconds;

View File

@@ -8,9 +8,13 @@ import logging
import pytest
import itertools
import time
import contextlib
from test.pylib.manager_client import ManagerClient, ServerInfo
from test.pylib.rest_client import read_barrier, ScyllaMetrics
from test.pylib.rest_client import read_barrier, ScyllaMetrics, HTTPError
from cassandra.cluster import ConsistencyLevel, Session as CassandraSession
from cassandra.policies import FallthroughRetryPolicy, ConstantReconnectionPolicy
from cassandra.protocol import ServerError
from cassandra.query import SimpleStatement
from test.pylib.util import wait_for_cql_and_get_hosts
logger = logging.getLogger(__name__)
@@ -465,3 +469,84 @@ async def test_sstable_compression_dictionaries_enable_writing(manager: ManagerC
for algo in nondict_algorithms:
assert (await get_compressor_names(algo)) == {name_prefix + f"{algo}Compressor"}
assert (await get_compressor_names(no_compression)) == set()
async def test_sstable_compression_dictionaries_allow_in_ddl(manager: ManagerClient):
"""
Tests the sstable_compression_dictionaries_allow_in_ddl option.
When it's disabled, ALTER and CREATE statements should not be allowed
to configure tables to use compression dictionaries for sstables.
"""
# Bootstrap cluster and configure server
logger.info("Bootstrapping cluster")
servers = (await manager.servers_add(1, cmdline=[
*common_debug_cli_options,
"--sstable-compression-dictionaries-allow-in-ddl=false",
], auto_rack_dc="dc1"))
@contextlib.asynccontextmanager
async def with_expect_server_error(msg):
try:
yield
except ServerError as e:
if e.message != msg:
raise
else:
raise Exception('Expected a ServerError, got no exceptions')
cql = manager.get_cql()
hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
await cql.run_async("""
CREATE KEYSPACE test
WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}
""")
for new_algo in ['LZ4WithDicts', 'ZstdWithDicts']:
logger.info(f"Tested algorithm: {new_algo}")
table_name = f"test.{new_algo}"
logger.info("Check that disabled sstable_compression_dictionaries_allow_in_ddl prevents CREATE with dict compression")
async with with_expect_server_error(f"sstable_compression {new_algo}Compressor has been disabled by `sstable_compression_dictionaries_allow_in_ddl: false`"):
await cql.run_async(SimpleStatement(f'''
CREATE TABLE {table_name} (pk int PRIMARY KEY, c blob)
WITH COMPRESSION = {{'sstable_compression': '{new_algo}Compressor'}};
''', retry_policy=FallthroughRetryPolicy()), host=hosts[0])
logger.info("Enable the config option")
await live_update_config(manager, servers, 'sstable_compression_dictionaries_allow_in_ddl', "true")
logger.info("CREATE the table with dict compression")
await cql.run_async(SimpleStatement(f'''
CREATE TABLE {table_name} (pk int PRIMARY KEY, c blob)
WITH COMPRESSION = {{'sstable_compression': '{new_algo}Compressor'}};
''', retry_policy=FallthroughRetryPolicy()), host=hosts[0])
logger.info("Disable compression on the table")
await cql.run_async(SimpleStatement(f'''
ALTER TABLE {table_name}
WITH COMPRESSION = {{'sstable_compression': ''}};
''', retry_policy=FallthroughRetryPolicy()), host=hosts[0])
logger.info("Disable the config option again")
await live_update_config(manager, servers, 'sstable_compression_dictionaries_allow_in_ddl', "false")
logger.info("Check that disabled sstable_compression_dictionaries_allow_in_ddl prevents ALTER with dict compression")
async with with_expect_server_error(f"sstable_compression {new_algo}Compressor has been disabled by `sstable_compression_dictionaries_allow_in_ddl: false`"):
await cql.run_async(SimpleStatement(f'''
ALTER TABLE {table_name}
WITH COMPRESSION = {{'sstable_compression': '{new_algo}Compressor'}};
''', retry_policy=FallthroughRetryPolicy()), host=hosts[0])
logger.info("Enable the config option again")
await live_update_config(manager, servers, 'sstable_compression_dictionaries_allow_in_ddl', "true")
logger.info("ALTER the table with dict compression")
await cql.run_async(SimpleStatement(f'''
ALTER TABLE {table_name}
WITH COMPRESSION = {{'sstable_compression': '{new_algo}Compressor'}};
''', retry_policy=FallthroughRetryPolicy()), host=hosts[0])
logger.info("Enable the config option again")
logger.info("Disable the config option for the next test")
await live_update_config(manager, servers, 'sstable_compression_dictionaries_allow_in_ddl', "false")