From 042825247f838203c14a770b753ff26a23e6836b Mon Sep 17 00:00:00 2001 From: Piotr Smaron Date: Mon, 19 Aug 2024 13:37:12 +0200 Subject: [PATCH 1/8] cql: remove unused helper function from test_tablets `change_default_rf` is not used anywhere, moreover it uses `replication_factor` tag, which is forbidden in ALTER tablets KS statement. --- test/cql-pytest/test_tablets.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/cql-pytest/test_tablets.py b/test/cql-pytest/test_tablets.py index 64b47aabec..12d2acf098 100644 --- a/test/cql-pytest/test_tablets.py +++ b/test/cql-pytest/test_tablets.py @@ -306,8 +306,6 @@ def test_alter_tablet_keyspace(cql, this_dc): cql.execute(f"ALTER KEYSPACE {keyspace} WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', '{rf_opt}' : {new_rf} }}") def change_dc_rf(new_rf): change_opt_rf(this_dc, new_rf) - def change_default_rf(new_rf): - change_opt_rf("replication_factor", new_rf) change_dc_rf(2) change_dc_rf(3) From adf453af3f391dbd10f787eb16aa8c8ea61f32d5 Mon Sep 17 00:00:00 2001 From: Piotr Smaron Date: Mon, 19 Aug 2024 13:39:19 +0200 Subject: [PATCH 2/8] cql: refactor test_tablets::test_alter_tablet_keyspace 1. Renamed the testcase to emphasize that it only focuses on testing changing RF - there are other tests that test ALTER tablets KS in general. 2. Fixed whitespaces according to PEP8 --- test/cql-pytest/test_tablets.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/cql-pytest/test_tablets.py b/test/cql-pytest/test_tablets.py index 12d2acf098..2aec920cae 100644 --- a/test/cql-pytest/test_tablets.py +++ b/test/cql-pytest/test_tablets.py @@ -299,11 +299,13 @@ def test_lwt_support_with_tablets(cql, test_keyspace, skip_without_tablets): # We want to ensure that we can only change the RF of any DC by at most 1 at a time # if we use tablets. That provides us with the guarantee that the old and the new QUORUM # overlap by at least one node. -def test_alter_tablet_keyspace(cql, this_dc): +def test_alter_tablet_keyspace_rf(cql, this_dc): with new_test_keyspace(cql, f"WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', '{this_dc}' : 1 }} " f"AND TABLETS = {{ 'enabled': true, 'initial': 128 }}") as keyspace: def change_opt_rf(rf_opt, new_rf): - cql.execute(f"ALTER KEYSPACE {keyspace} WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', '{rf_opt}' : {new_rf} }}") + cql.execute(f"ALTER KEYSPACE {keyspace} " + f"WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', '{rf_opt}' : {new_rf} }}") + def change_dc_rf(new_rf): change_opt_rf(this_dc, new_rf) From 9c5950533fb92ca7e44152dda761fa3a212da526 Mon Sep 17 00:00:00 2001 From: Piotr Smaron Date: Mon, 19 Aug 2024 13:49:30 +0200 Subject: [PATCH 3/8] cql: extend test_alter_tablet_keyspace_rf Added cases to also test decreasing RF and setting the same RF. Also added extra explanatory comments. --- test/cql-pytest/test_tablets.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/cql-pytest/test_tablets.py b/test/cql-pytest/test_tablets.py index 2aec920cae..163469b783 100644 --- a/test/cql-pytest/test_tablets.py +++ b/test/cql-pytest/test_tablets.py @@ -309,12 +309,17 @@ def test_alter_tablet_keyspace_rf(cql, this_dc): def change_dc_rf(new_rf): change_opt_rf(this_dc, new_rf) - change_dc_rf(2) - change_dc_rf(3) + change_dc_rf(2) # increase RF by 1 should be OK + change_dc_rf(3) # increase RF by 1 again should be OK + change_dc_rf(3) # setting the same RF shouldn't cause problems + change_dc_rf(4) # increase RF by 1 again should be OK + change_dc_rf(3) # decrease RF by 1 should be OK with pytest.raises(InvalidRequest): - change_dc_rf(5) + change_dc_rf(5) # increase RF by 2 should fail with pytest.raises(InvalidRequest): - change_dc_rf(1) + change_dc_rf(1) # decrease RF by 2 should fail with pytest.raises(InvalidRequest): - change_dc_rf(10) + change_dc_rf(10) # increase RF by 2+ should fail + with pytest.raises(InvalidRequest): + change_dc_rf(0) # decrease RF by 2+ should fail From 47acdc1f9847a89fcf5a4336d8e79b3d1fdeb671 Mon Sep 17 00:00:00 2001 From: Piotr Smaron Date: Mon, 19 Aug 2024 15:01:52 +0200 Subject: [PATCH 4/8] cql: validate RF change for new DCs in ALTER tablets KS ALTER tablets KS validated if RF is not changed by more than 1 for DCs that already had replicas, but not for DCs that didn't have them yet, so specifying an RF jump from 0 to 2 was possible when listing a new DC in ALTER tablets KS statement, which violated internal invariants of tablets load balancer. This PR fixes that bug and adds a multi-dc testcases to check if adding replicas to a new DC and removing replicas from a DC is honoring the RF change constraints. Refs: #20039 --- cql3/statements/alter_keyspace_statement.cc | 3 +- test/topology_custom/test_tablets.py | 31 ++++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/cql3/statements/alter_keyspace_statement.cc b/cql3/statements/alter_keyspace_statement.cc index ad910e4293..8f47ae9f57 100644 --- a/cql3/statements/alter_keyspace_statement.cc +++ b/cql3/statements/alter_keyspace_statement.cc @@ -87,7 +87,8 @@ void cql3::statements::alter_keyspace_statement::validate(query_processor& qp, c const std::map& current_rfs = ks.metadata()->strategy_options(); for (const auto& [new_dc, new_rf] : _attrs->get_replication_options()) { auto it = current_rfs.find(new_dc); - if (it != current_rfs.end() && !validate_rf_difference(it->second, new_rf)) { + sstring old_rf = it != current_rfs.end() ? it->second : "0"; + if (!validate_rf_difference(old_rf, new_rf)) { throw exceptions::invalid_request_exception("Cannot modify replication factor of any DC by more than 1 at a time."); } } diff --git a/test/topology_custom/test_tablets.py b/test/topology_custom/test_tablets.py index 594769d0aa..661edec46f 100644 --- a/test/topology_custom/test_tablets.py +++ b/test/topology_custom/test_tablets.py @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: AGPL-3.0-or-later # -from cassandra.protocol import ConfigurationException +from cassandra.protocol import ConfigurationException, InvalidRequest from cassandra.query import SimpleStatement, ConsistencyLevel from test.pylib.manager_client import ManagerClient from test.pylib.rest_client import HTTPError, read_barrier @@ -190,6 +190,35 @@ async def test_tablet_mutation_fragments_unowned_partition(manager: ManagerClien await cql.run_async(f"SELECT partition_region FROM MUTATION_FRAGMENTS(test.test) WHERE pk={k}", host=host[0]) +# ALTER tablets KS cannot change RF of any DC by more than 1 at a time. +# In a multi-dc environment, we can create replicas in a DC that didn't have replicas before, +# but the above requirement should still be honoured, because we'd be changing RF from 0 to N in the new DC. +# Reproduces https://github.com/scylladb/scylladb/issues/20039#issuecomment-2271365060 +# See also cql-pytest/test_tablets.py::test_alter_tablet_keyspace_rf for basic scenarios tested +@pytest.mark.asyncio +async def test_multidc_alter_tablets_rf(request: pytest.FixtureRequest, manager: ManagerClient) -> None: + config = {"endpoint_snitch": "GossipingPropertyFileSnitch", "enable_tablets": "true"} + + logger.info("Creating a new cluster of 1 node in 1st DC and 2 nodes in 2nd DC") + await manager.server_add(config=config, property_file={'dc': f'dc1', 'rack': 'myrack'}) + await manager.server_add(config=config, property_file={'dc': f'dc2', 'rack': 'myrack'}) + await manager.server_add(config=config, property_file={'dc': f'dc2', 'rack': 'myrack'}) + + conn = manager.get_cql() + conn.execute("create keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 1};") + conn.execute("create table ks.t (pk int primary key);") # need a table to not only change schema, but also replicas + with pytest.raises(InvalidRequest, match="Cannot modify replication factor of any DC by more than 1 at a time."): + # changing RF of dc2 from 0 to 2 should fail + conn.execute("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 2};") + + # changing RF of dc2 from 0 to 1 should succeed + conn.execute("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 1};") + # also check that we can safely remove all replicas by changing RF of dc2 from 1 to 0 back again + conn.execute("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 0};") + # we may also remove all the replicas from the cluster, i.e. change RF of dc1 from 1 to 0 as well: + conn.execute("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 0};") + + # Reproducer for https://github.com/scylladb/scylladb/issues/18110 # Check that an existing cached read, will be cleaned up when the tablet it reads # from is migrated away. From 93d61d7031b59a4338599a798ebc20febb2633d2 Mon Sep 17 00:00:00 2001 From: Piotr Smaron Date: Fri, 23 Aug 2024 10:00:34 +0200 Subject: [PATCH 5/8] cql: harden `alter_keyspace_statement.cc::validate_rf_difference` This function assumed that strings passed as arguments will be of integer types, but that wasn't the case, and we missed that because this function didn't have any validation, so this change adds proper validation and error logging. Arguments passed to this function were forwarded from a call to `ks_prop_defs::get_replication_options`, which, among rf-per-dc mapping, returns also `class:replication_strategy` pair. Second pair's member has been casted into an `int` type and somehow the code was still running fine, but only extra testing added later discovered a bug in here. --- cql3/statements/alter_keyspace_statement.cc | 24 ++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cql3/statements/alter_keyspace_statement.cc b/cql3/statements/alter_keyspace_statement.cc index 8f47ae9f57..cf799b9587 100644 --- a/cql3/statements/alter_keyspace_statement.cc +++ b/cql3/statements/alter_keyspace_statement.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "alter_keyspace_statement.hh" #include "prepared_statement.hh" @@ -43,18 +44,17 @@ future<> cql3::statements::alter_keyspace_statement::check_access(query_processo return state.has_keyspace_access(_name, auth::permission::ALTER); } -static bool validate_rf_difference(const std::string_view curr_rf, const std::string_view new_rf) { - auto to_number = [] (const std::string_view rf) { - int result; - // We assume the passed string view represents a valid decimal number, - // so we don't need the error code. - (void) std::from_chars(rf.begin(), rf.end(), result); - return result; - }; - - // We want to ensure that each DC's RF is going to change by at most 1 - // because in that case the old and new quorums must overlap. - return std::abs(to_number(curr_rf) - to_number(new_rf)) <= 1; +// We want to ensure that each DC's RF is going to change by at most 1 +static bool validate_rf_difference(const std::string& curr_rf, const std::string& new_rf) { + try { + return std::abs(std::stoi(curr_rf) - std::stoi(new_rf)) <= 1; + } catch (std::invalid_argument const& ex) { + on_internal_error(mylogger, format("validate_rf_difference expects integer arguments, " + "but got curr_rf:{} and new_rf:{}", curr_rf, new_rf)); + } catch (std::out_of_range const& ex) { + on_internal_error(mylogger, format("validate_rf_difference expects integer arguments to fit into `int` type, " + "but got curr_rf:{} and new_rf:{}", curr_rf, new_rf)); + } } void cql3::statements::alter_keyspace_statement::validate(query_processor& qp, const service::client_state& state) const { From 6676e473711161333ec4686fef4167d8eda89b3f Mon Sep 17 00:00:00 2001 From: Piotr Smaron Date: Fri, 23 Aug 2024 12:01:01 +0200 Subject: [PATCH 6/8] cql: fix validation of ALTERing RFs in tablets KS The validation has been corrected with: 1. Checking if a DC specified in ALTER exists. 2. Removing `REPLICATION_STRATEGY_CLASS_KEY` key from a map of RFs that needs their RFs to be validated. --- cql3/statements/alter_keyspace_statement.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/cql3/statements/alter_keyspace_statement.cc b/cql3/statements/alter_keyspace_statement.cc index cf799b9587..cfbdbb14fc 100644 --- a/cql3/statements/alter_keyspace_statement.cc +++ b/cql3/statements/alter_keyspace_statement.cc @@ -84,10 +84,21 @@ void cql3::statements::alter_keyspace_statement::validate(query_processor& qp, c auto new_ks = _attrs->as_ks_metadata_update(ks.metadata(), *qp.proxy().get_token_metadata_ptr(), qp.proxy().features()); if (ks.get_replication_strategy().uses_tablets()) { - const std::map& current_rfs = ks.metadata()->strategy_options(); - for (const auto& [new_dc, new_rf] : _attrs->get_replication_options()) { - auto it = current_rfs.find(new_dc); - sstring old_rf = it != current_rfs.end() ? it->second : "0"; + const std::map& current_rf_per_dc = ks.metadata()->strategy_options(); + auto new_rf_per_dc = _attrs->get_replication_options(); + new_rf_per_dc.erase(ks_prop_defs::REPLICATION_STRATEGY_CLASS_KEY); + for (const auto& [new_dc, new_rf] : new_rf_per_dc) { + sstring old_rf = "0"; + if (auto new_dc_in_current_mapping = current_rf_per_dc.find(new_dc); + new_dc_in_current_mapping != current_rf_per_dc.end()) { + old_rf = new_dc_in_current_mapping->second; + } else if (!qp.proxy().get_token_metadata_ptr()->get_topology().get_datacenters().contains(new_dc)) { + // This means that the DC listed in ALTER doesn't exist. This error will be reported later, + // during validation in abstract_replication_strategy::validate_replication_strategy. + // We can't report this error now, because it'd change the order of errors reported: + // first we need to report non-existing DCs, then if RFs aren't changed by too much. + continue; + } if (!validate_rf_difference(old_rf, new_rf)) { throw exceptions::invalid_request_exception("Cannot modify replication factor of any DC by more than 1 at a time."); } From 2aabe7f09cad0639532b8b98c65539e64cfb6140 Mon Sep 17 00:00:00 2001 From: Piotr Smaron Date: Fri, 23 Aug 2024 17:25:16 +0200 Subject: [PATCH 7/8] cql: join new and old KS options in ALTER tablets KS A bug has been discovered while trying to ALTER tablets KS and specifying only 1 out of 2 DCs - the not specified DC's RF has been zeroed. This is because ALTER tablets KS updated the KS only with the RF-per-DC mapping specified in the ALTER tablets KS statement, so if a DC was ommitted, it was assigned a value of RF=0. This commit fixes that plus additionally passes all the KS options, not only the replication options, to the topology coordinator, where the KS update is performed. `initial_tablets` is a special case, which requires a special handling in the source code, as we cannot simply update old initial_tablet's settings with the new ones, because if only ` and TABLETS = {'enabled': true}` is specified in the ALTER tablets KS statement, we should not zero the `initial_tablets`, but rather keep the old value - this is tested by the `test_alter_preserves_tablets_if_initial_tablets_skipped` testcase. Other than that, the above mentioned testcase started to fail with these changes, and it appeared to be an issue with the test not waiting until ALTER is completed, and thus reading the old value, hence the test's body has been modified to wait for ALTER to complete before performing validation. --- cql3/statements/alter_keyspace_statement.cc | 66 ++++++++++++++++++++- cql3/statements/ks_prop_defs.cc | 16 ----- cql3/statements/ks_prop_defs.hh | 1 - cql3/statements/property_definitions.hh | 4 +- test/topology_custom/test_tablets.py | 25 ++++---- 5 files changed, 80 insertions(+), 32 deletions(-) diff --git a/cql3/statements/alter_keyspace_statement.cc b/cql3/statements/alter_keyspace_statement.cc index cfbdbb14fc..1e7636a8cb 100644 --- a/cql3/statements/alter_keyspace_statement.cc +++ b/cql3/statements/alter_keyspace_statement.cc @@ -130,6 +130,63 @@ bool cql3::statements::alter_keyspace_statement::changes_tablets(query_processor return ks.get_replication_strategy().uses_tablets() && !_attrs->get_replication_options().empty(); } +namespace { +// These functions are used to flatten all the options in the keyspace definition into a single-level map. +// (Currently options are stored in a nested structure that looks more like a map>). +// Flattening is simply joining the keys of maps from both levels with a colon ':' character, +// or in other words: prefixing the keys in the output map with the option type, e.g. 'replication', 'storage', etc., +// so that the output map contains entries like: "replication:dc1" -> "3". +// This is done to avoid key conflicts and to be able to de-flatten the map back into the original structure. + +void add_prefixed_key(const sstring& prefix, const std::map& in, std::map& out) { + for (const auto& [in_key, in_value]: in) { + out[prefix + ":" + in_key] = in_value; + } +}; + +std::map get_current_options_flattened(const shared_ptr& ks, + bool include_tablet_options, + const gms::feature_service& feat) { + std::map all_options; + + add_prefixed_key(ks->KW_REPLICATION, ks->get_replication_options(), all_options); + add_prefixed_key(ks->KW_STORAGE, ks->get_storage_options().to_map(), all_options); + // if no tablet options are specified in ATLER KS statement, + // we want to preserve the old ones and hence cannot overwrite them with defaults + if (include_tablet_options) { + auto initial_tablets = ks->get_initial_tablets(std::nullopt); + add_prefixed_key(ks->KW_TABLETS, + {{"enabled", initial_tablets ? "true" : "false"}, + {"initial", std::to_string(initial_tablets.value_or(0))}}, + all_options); + } + add_prefixed_key(ks->KW_DURABLE_WRITES, + {{sstring(ks->KW_DURABLE_WRITES), to_sstring(ks->get_boolean(ks->KW_DURABLE_WRITES, true))}}, + all_options); + + return all_options; +} + +std::map get_old_options_flattened(const data_dictionary::keyspace& ks, bool include_tablet_options) { + std::map all_options; + + using namespace cql3::statements; + add_prefixed_key(ks_prop_defs::KW_REPLICATION, ks.get_replication_strategy().get_config_options(), all_options); + add_prefixed_key(ks_prop_defs::KW_STORAGE, ks.metadata()->get_storage_options().to_map(), all_options); + if (include_tablet_options) { + add_prefixed_key(ks_prop_defs::KW_TABLETS, + {{"enabled", ks.metadata()->initial_tablets() ? "true" : "false"}, + {"initial", std::to_string(ks.metadata()->initial_tablets().value_or(0))}}, + all_options); + } + add_prefixed_key(ks_prop_defs::KW_DURABLE_WRITES, + {{sstring(ks_prop_defs::KW_DURABLE_WRITES), to_sstring(ks.metadata()->durable_writes())}}, + all_options); + + return all_options; +} +} // namespace + future, cql3::cql_warnings_vec>> cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_processor& qp, service::query_state& state, const query_options& options, service::group0_batch& mc) const { using namespace cql_transport; @@ -142,11 +199,18 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce auto ks_md_update = _attrs->as_ks_metadata_update(ks_md, tm, feat); std::vector muts; std::vector warnings; - auto ks_options = _attrs->get_all_options_flattened(feat); + bool include_tablet_options = _attrs->get_map(_attrs->KW_TABLETS).has_value(); + auto old_ks_options = get_old_options_flattened(ks, include_tablet_options); + auto ks_options = get_current_options_flattened(_attrs, include_tablet_options, feat); + ks_options.merge(old_ks_options); + auto ts = mc.write_timestamp(); auto global_request_id = mc.new_group0_state_id(); // we only want to run the tablets path if there are actually any tablets changes, not only schema changes + // TODO: the current `if (changes_tablets(qp))` is insufficient: someone may set the same RFs as before, + // and we'll unnecessarily trigger the processing path for ALTER tablets KS, + // when in reality nothing or only schema is being changed if (changes_tablets(qp)) { if (!qp.topology_global_queue_empty()) { return make_exception_future, cql3::cql_warnings_vec>>( diff --git a/cql3/statements/ks_prop_defs.cc b/cql3/statements/ks_prop_defs.cc index 00e0623ad6..518916b69c 100644 --- a/cql3/statements/ks_prop_defs.cc +++ b/cql3/statements/ks_prop_defs.cc @@ -185,22 +185,6 @@ bool ks_prop_defs::get_durable_writes() const { return get_boolean(KW_DURABLE_WRITES, true); } -std::map ks_prop_defs::get_all_options_flattened(const gms::feature_service& feat) const { - std::map all_options; - - auto ingest_flattened_options = [&all_options](const std::map& options, const sstring& prefix) { - for (auto& option: options) { - all_options[prefix + ":" + option.first] = option.second; - } - }; - ingest_flattened_options(get_replication_options(), KW_REPLICATION); - ingest_flattened_options(get_storage_options().to_map(), KW_STORAGE); - ingest_flattened_options(get_map(KW_TABLETS).value_or(std::map{}), KW_TABLETS); - ingest_flattened_options({{sstring(KW_DURABLE_WRITES), to_sstring(get_boolean(KW_DURABLE_WRITES, true))}}, KW_DURABLE_WRITES); - - return all_options; -} - lw_shared_ptr ks_prop_defs::as_ks_metadata(sstring ks_name, const locator::token_metadata& tm, const gms::feature_service& feat) { auto sc = get_replication_strategy_class().value(); // if tablets options have not been specified, but tablets are globally enabled, set the value to 0 for N.T.S. only diff --git a/cql3/statements/ks_prop_defs.hh b/cql3/statements/ks_prop_defs.hh index da0ba82913..207f24b416 100644 --- a/cql3/statements/ks_prop_defs.hh +++ b/cql3/statements/ks_prop_defs.hh @@ -58,7 +58,6 @@ public: std::optional get_initial_tablets(std::optional default_value) const; data_dictionary::storage_options get_storage_options() const; bool get_durable_writes() const; - std::map get_all_options_flattened(const gms::feature_service& feat) const; lw_shared_ptr as_ks_metadata(sstring ks_name, const locator::token_metadata&, const gms::feature_service&); lw_shared_ptr as_ks_metadata_update(lw_shared_ptr old, const locator::token_metadata&, const gms::feature_service&); }; diff --git a/cql3/statements/property_definitions.hh b/cql3/statements/property_definitions.hh index c115af9ebb..8e05682e32 100644 --- a/cql3/statements/property_definitions.hh +++ b/cql3/statements/property_definitions.hh @@ -46,14 +46,14 @@ public: protected: std::optional get_simple(const sstring& name) const; - std::optional> get_map(const sstring& name) const; - void remove_from_map_if_exists(const sstring& name, const sstring& key) const; public: bool has_property(const sstring& name) const; std::optional get(const sstring& name) const; + std::optional> get_map(const sstring& name) const; + sstring get_string(sstring key, sstring default_value) const; // Return a property value, typed as a Boolean diff --git a/test/topology_custom/test_tablets.py b/test/topology_custom/test_tablets.py index 661edec46f..4c216217b7 100644 --- a/test/topology_custom/test_tablets.py +++ b/test/topology_custom/test_tablets.py @@ -200,23 +200,24 @@ async def test_multidc_alter_tablets_rf(request: pytest.FixtureRequest, manager: config = {"endpoint_snitch": "GossipingPropertyFileSnitch", "enable_tablets": "true"} logger.info("Creating a new cluster of 1 node in 1st DC and 2 nodes in 2nd DC") - await manager.server_add(config=config, property_file={'dc': f'dc1', 'rack': 'myrack'}) - await manager.server_add(config=config, property_file={'dc': f'dc2', 'rack': 'myrack'}) - await manager.server_add(config=config, property_file={'dc': f'dc2', 'rack': 'myrack'}) + await manager.servers_add(3, config=config, property_file={'dc': f'dc1', 'rack': 'myrack'}) + await manager.servers_add(2, config=config, property_file={'dc': f'dc2', 'rack': 'myrack'}) + # await manager.server_add(config=config, property_file={'dc': f'dc2', 'rack': 'myrack'}) - conn = manager.get_cql() - conn.execute("create keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 1};") - conn.execute("create table ks.t (pk int primary key);") # need a table to not only change schema, but also replicas + cql = manager.get_cql() + await cql.run_async("create keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 1}") + # need to create a table to not change only the schema, but also tablets replicas + await cql.run_async("create table ks.t (pk int primary key)") with pytest.raises(InvalidRequest, match="Cannot modify replication factor of any DC by more than 1 at a time."): # changing RF of dc2 from 0 to 2 should fail - conn.execute("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 2};") + await cql.run_async("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 2}") # changing RF of dc2 from 0 to 1 should succeed - conn.execute("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 1};") - # also check that we can safely remove all replicas by changing RF of dc2 from 1 to 0 back again - conn.execute("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 0};") - # we may also remove all the replicas from the cluster, i.e. change RF of dc1 from 1 to 0 as well: - conn.execute("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 0};") + await cql.run_async("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 1}") + # we want to ensure that RFs of both DCs are equal to 1 now + res = await cql.run_async("SELECT * FROM system_schema.keyspaces WHERE keyspace_name = 'ks'") + assert res[0].replication['dc1'] == '1' + assert res[0].replication['dc2'] == '1' # Reproducer for https://github.com/scylladb/scylladb/issues/18110 From ee56bbfe6136a4f49be92705bca8c5df8e96bcc8 Mon Sep 17 00:00:00 2001 From: Piotr Smaron Date: Fri, 23 Aug 2024 21:52:22 +0200 Subject: [PATCH 8/8] cql: sum of abs RFs diffs cannot exceed 1 in ALTER tablets KS Tablets load balancer is unable to process more than a single pending replica, thus ALTER tablets KS cannot accept an ALTER statement which would result in creating 2+ pending replicas, hence it has to validate if the sum of absoulte differences of RFs specified in the statement is not greter than 1. --- cql3/statements/alter_keyspace_statement.cc | 18 ++++++------- test/topology_custom/test_tablets.py | 28 +++++++++++++++++---- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/cql3/statements/alter_keyspace_statement.cc b/cql3/statements/alter_keyspace_statement.cc index 1e7636a8cb..429b5ea21c 100644 --- a/cql3/statements/alter_keyspace_statement.cc +++ b/cql3/statements/alter_keyspace_statement.cc @@ -44,16 +44,15 @@ future<> cql3::statements::alter_keyspace_statement::check_access(query_processo return state.has_keyspace_access(_name, auth::permission::ALTER); } -// We want to ensure that each DC's RF is going to change by at most 1 -static bool validate_rf_difference(const std::string& curr_rf, const std::string& new_rf) { +static unsigned get_abs_rf_diff(const std::string& curr_rf, const std::string& new_rf) { try { - return std::abs(std::stoi(curr_rf) - std::stoi(new_rf)) <= 1; + return std::abs(std::stoi(curr_rf) - std::stoi(new_rf)); } catch (std::invalid_argument const& ex) { - on_internal_error(mylogger, format("validate_rf_difference expects integer arguments, " - "but got curr_rf:{} and new_rf:{}", curr_rf, new_rf)); + on_internal_error(mylogger, fmt::format("get_abs_rf_diff expects integer arguments, " + "but got curr_rf:{} and new_rf:{}", curr_rf, new_rf)); } catch (std::out_of_range const& ex) { - on_internal_error(mylogger, format("validate_rf_difference expects integer arguments to fit into `int` type, " - "but got curr_rf:{} and new_rf:{}", curr_rf, new_rf)); + on_internal_error(mylogger, fmt::format("get_abs_rf_diff expects integer arguments to fit into `int` type, " + "but got curr_rf:{} and new_rf:{}", curr_rf, new_rf)); } } @@ -87,6 +86,7 @@ void cql3::statements::alter_keyspace_statement::validate(query_processor& qp, c const std::map& current_rf_per_dc = ks.metadata()->strategy_options(); auto new_rf_per_dc = _attrs->get_replication_options(); new_rf_per_dc.erase(ks_prop_defs::REPLICATION_STRATEGY_CLASS_KEY); + unsigned total_abs_rfs_diff = 0; for (const auto& [new_dc, new_rf] : new_rf_per_dc) { sstring old_rf = "0"; if (auto new_dc_in_current_mapping = current_rf_per_dc.find(new_dc); @@ -99,8 +99,8 @@ void cql3::statements::alter_keyspace_statement::validate(query_processor& qp, c // first we need to report non-existing DCs, then if RFs aren't changed by too much. continue; } - if (!validate_rf_difference(old_rf, new_rf)) { - throw exceptions::invalid_request_exception("Cannot modify replication factor of any DC by more than 1 at a time."); + if (total_abs_rfs_diff += get_abs_rf_diff(old_rf, new_rf); total_abs_rfs_diff >= 2) { + throw exceptions::invalid_request_exception("Only one DC's RF can be changed at a time and not by more than 1"); } } } diff --git a/test/topology_custom/test_tablets.py b/test/topology_custom/test_tablets.py index 4c216217b7..594d6f6868 100644 --- a/test/topology_custom/test_tablets.py +++ b/test/topology_custom/test_tablets.py @@ -199,26 +199,44 @@ async def test_tablet_mutation_fragments_unowned_partition(manager: ManagerClien async def test_multidc_alter_tablets_rf(request: pytest.FixtureRequest, manager: ManagerClient) -> None: config = {"endpoint_snitch": "GossipingPropertyFileSnitch", "enable_tablets": "true"} - logger.info("Creating a new cluster of 1 node in 1st DC and 2 nodes in 2nd DC") - await manager.servers_add(3, config=config, property_file={'dc': f'dc1', 'rack': 'myrack'}) + logger.info("Creating a new cluster of 2 nodes in 1st DC and 2 nodes in 2nd DC") + # we have to have at least 2 nodes in each DC if we want to try setting RF to 2 in each DC + await manager.servers_add(2, config=config, property_file={'dc': f'dc1', 'rack': 'myrack'}) await manager.servers_add(2, config=config, property_file={'dc': f'dc2', 'rack': 'myrack'}) - # await manager.server_add(config=config, property_file={'dc': f'dc2', 'rack': 'myrack'}) cql = manager.get_cql() await cql.run_async("create keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 1}") # need to create a table to not change only the schema, but also tablets replicas await cql.run_async("create table ks.t (pk int primary key)") - with pytest.raises(InvalidRequest, match="Cannot modify replication factor of any DC by more than 1 at a time."): + with pytest.raises(InvalidRequest, match="Only one DC's RF can be changed at a time and not by more than 1"): # changing RF of dc2 from 0 to 2 should fail await cql.run_async("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 2}") # changing RF of dc2 from 0 to 1 should succeed await cql.run_async("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 1}") - # we want to ensure that RFs of both DCs are equal to 1 now + # ensure that RFs of both DCs are equal to 1 now, i.e. that omitting dc1 in above command didn't change it res = await cql.run_async("SELECT * FROM system_schema.keyspaces WHERE keyspace_name = 'ks'") assert res[0].replication['dc1'] == '1' assert res[0].replication['dc2'] == '1' + # incrementing RF of 2 DCs at once should NOT succeed, because it'd leave 2 pending tablets replicas + with pytest.raises(InvalidRequest, match="Only one DC's RF can be changed at a time and not by more than 1"): + await cql.run_async("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 2, 'dc2': 2}") + # as above, but decrementing + with pytest.raises(InvalidRequest, match="Only one DC's RF can be changed at a time and not by more than 1"): + await cql.run_async("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 0, 'dc2': 0}") + # as above, but decrement 1 RF and increment the other + with pytest.raises(InvalidRequest, match="Only one DC's RF can be changed at a time and not by more than 1"): + await cql.run_async("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 2, 'dc2': 0}") + # as above, but RFs are swapped + with pytest.raises(InvalidRequest, match="Only one DC's RF can be changed at a time and not by more than 1"): + await cql.run_async("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 0, 'dc2': 2}") + + # check that we can remove all replicas from dc2 by changing RF from 1 to 0 + await cql.run_async("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc2': 0}") + # check that we can remove all replicas from the cluster, i.e. change RF of dc1 from 1 to 0 as well: + await cql.run_async("alter keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'dc1': 0}") + # Reproducer for https://github.com/scylladb/scylladb/issues/18110 # Check that an existing cached read, will be cleaned up when the tablet it reads