mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-29 11:10:40 +00:00
schema_mutations, migration_manager: Ignore empty partitions in per-table digest
Schema digest is calculated by querying for mutations of all schema
tables, then compacting them so that all tombstones in them are
dropped. However, even if the mutation becomes empty after compaction,
we still feed its partition key. If the same mutations were compacted
prior to the query, because the tombstones expire, we won't get any
mutation at all and won't feed the partition key. So schema digest
will change once an empty partition of some schema table is compacted
away.
Tombstones expire 7 days after schema change which introduces them. If
one of the nodes is restarted after that, it will compute a different
table schema digest on boot. This may cause performance problems. When
sending a request from coordinator to replica, the replica needs
schema_ptr of exact schema version request by the coordinator. If it
doesn't know that version, it will request it from the coordinator and
perform a full schema merge. This adds latency to every such request.
Schema versions which are not referenced are currently kept in cache
for only 1 second, so if request flow has low-enough rate, this
situation results in perpetual schema pulls.
After ae8d2a550d, it is more liekly to
run into this situation, because table creation generates tombstones
for all schema tables relevant to the table, even the ones which
will be otherwise empty for the new table (e.g. computed_columns).
This change inroduces a cluster feature which when enabled will change
digest calculation to be insensitive to expiry by ignoring empty
partitions in digest calculation. When the feature is enabled,
schema_ptrs are reloaded so that the window of discrepancy during
transition is short and no rolling restart is required.
A similar problem was fixed for per-node digest calculation in
18f484cc753d17d1e3658bcb5c73ed8f319d32e8. Per-table digest calculation
was not fixed at that time because we didn't persist enabled features
and they were not enabled early-enough on boot for us to depend on
them in digest calculation. Now they are enabled before non-system
tables are loaded so digest calculation can rely on cluster features.
Fixes #4485.
This commit is contained in:
@@ -881,6 +881,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
, uuid_sstable_identifiers_enabled(this,
|
||||
"uuid_sstable_identifiers_enabled", liveness::LiveUpdate, value_status::Used, true, "If set to true, each newly created sstable will have a UUID "
|
||||
"based generation identifier, and such files are not readable by previous Scylla versions.")
|
||||
, table_digest_insensitive_to_expiry(this, "table_digest_insensitive_to_expiry", liveness::MustRestart, value_status::Used, true,
|
||||
"When enabled, per-table schema digest calculation ignores empty partitions.")
|
||||
, enable_dangerous_direct_import_of_cassandra_counters(this, "enable_dangerous_direct_import_of_cassandra_counters", value_status::Used, false, "Only turn this option on if you want to import tables from Cassandra containing counters, and you are SURE that no counters in that table were created in a version earlier than Cassandra 2.1."
|
||||
" It is not enough to have ever since upgraded to newer versions of Cassandra. If you EVER used a version earlier than 2.1 in the cluster where these SSTables come from, DO NOT TURN ON THIS OPTION! You will corrupt your data. You have been warned.")
|
||||
, enable_shard_aware_drivers(this, "enable_shard_aware_drivers", value_status::Used, true, "Enable native transport drivers to use connection-per-shard for better performance")
|
||||
|
||||
@@ -357,6 +357,7 @@ public:
|
||||
named_value<bool> enable_sstables_md_format;
|
||||
named_value<sstring> sstable_format;
|
||||
named_value<bool> uuid_sstable_identifiers_enabled;
|
||||
named_value<bool> table_digest_insensitive_to_expiry;
|
||||
named_value<bool> enable_dangerous_direct_import_of_cassandra_counters;
|
||||
named_value<bool> enable_shard_aware_drivers;
|
||||
named_value<bool> enable_ipv6_dns_lookup;
|
||||
|
||||
@@ -24,6 +24,10 @@ enum class schema_feature {
|
||||
PER_TABLE_PARTITIONERS,
|
||||
SCYLLA_KEYSPACES,
|
||||
SCYLLA_AGGREGATES,
|
||||
|
||||
// When enabled, schema_mutations::digest() will skip empty mutations (with only tombstones),
|
||||
// so that the digest remains the same after schema tables are compacted.
|
||||
TABLE_DIGEST_INSENSITIVE_TO_EXPIRY,
|
||||
};
|
||||
|
||||
using schema_features = enum_set<super_enum<schema_feature,
|
||||
@@ -33,7 +37,8 @@ using schema_features = enum_set<super_enum<schema_feature,
|
||||
schema_feature::CDC_OPTIONS,
|
||||
schema_feature::PER_TABLE_PARTITIONERS,
|
||||
schema_feature::SCYLLA_KEYSPACES,
|
||||
schema_feature::SCYLLA_AGGREGATES
|
||||
schema_feature::SCYLLA_AGGREGATES,
|
||||
schema_feature::TABLE_DIGEST_INSENSITIVE_TO_EXPIRY
|
||||
>>;
|
||||
|
||||
}
|
||||
|
||||
@@ -102,8 +102,10 @@ namespace {
|
||||
});
|
||||
}
|
||||
|
||||
schema_ctxt::schema_ctxt(const db::config& cfg, std::shared_ptr<data_dictionary::user_types_storage> uts, replica::database* db)
|
||||
schema_ctxt::schema_ctxt(const db::config& cfg, std::shared_ptr<data_dictionary::user_types_storage> uts,
|
||||
const gms::feature_service& features, replica::database* db)
|
||||
: _db(db)
|
||||
, _features(features)
|
||||
, _extensions(cfg.extensions())
|
||||
, _murmur3_partitioner_ignore_msb_bits(cfg.murmur3_partitioner_ignore_msb_bits())
|
||||
, _schema_registry_grace_period(cfg.schema_registry_grace_period())
|
||||
@@ -111,7 +113,7 @@ schema_ctxt::schema_ctxt(const db::config& cfg, std::shared_ptr<data_dictionary:
|
||||
{}
|
||||
|
||||
schema_ctxt::schema_ctxt(replica::database& db)
|
||||
: schema_ctxt(db.get_config(), db.as_user_types_storage(), &db)
|
||||
: schema_ctxt(db.get_config(), db.as_user_types_storage(), db.features(), &db)
|
||||
{}
|
||||
|
||||
schema_ctxt::schema_ctxt(distributed<replica::database>& db)
|
||||
@@ -3127,7 +3129,7 @@ schema_ptr create_table_from_mutations(const schema_ctxt& ctxt, schema_mutations
|
||||
if (version) {
|
||||
builder.with_version(*version);
|
||||
} else {
|
||||
builder.with_version(sm.digest());
|
||||
builder.with_version(sm.digest(ctxt.features().cluster_schema_features()));
|
||||
}
|
||||
|
||||
if (auto partitioner = sm.partitioner()) {
|
||||
@@ -3347,7 +3349,7 @@ view_ptr create_view_from_mutations(const schema_ctxt& ctxt, schema_mutations sm
|
||||
if (version) {
|
||||
builder.with_version(*version);
|
||||
} else {
|
||||
builder.with_version(sm.digest());
|
||||
builder.with_version(sm.digest(ctxt.features().cluster_schema_features()));
|
||||
}
|
||||
|
||||
auto base_id = table_id(row.get_nonnull<utils::UUID>("base_table_id"));
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include "schema_features.hh"
|
||||
#include "utils/hashing.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
#include "schema_mutations.hh"
|
||||
#include "types/map.hh"
|
||||
#include "query-result-set.hh"
|
||||
@@ -66,7 +67,8 @@ class config;
|
||||
|
||||
class schema_ctxt {
|
||||
public:
|
||||
schema_ctxt(const config&, std::shared_ptr<data_dictionary::user_types_storage> uts, replica::database* = nullptr);
|
||||
schema_ctxt(const config&, std::shared_ptr<data_dictionary::user_types_storage> uts, const gms::feature_service&,
|
||||
replica::database* = nullptr);
|
||||
schema_ctxt(replica::database&);
|
||||
schema_ctxt(distributed<replica::database>&);
|
||||
schema_ctxt(distributed<service::storage_proxy>&);
|
||||
@@ -87,11 +89,16 @@ public:
|
||||
return *_user_types;
|
||||
}
|
||||
|
||||
replica::database* get_db() {
|
||||
const gms::feature_service& features() const {
|
||||
return _features;
|
||||
}
|
||||
|
||||
replica::database* get_db() const {
|
||||
return _db;
|
||||
}
|
||||
private:
|
||||
replica::database* _db;
|
||||
const gms::feature_service& _features;
|
||||
const db::extensions& _extensions;
|
||||
const unsigned _murmur3_partitioner_ignore_msb_bits;
|
||||
const uint32_t _schema_registry_grace_period;
|
||||
|
||||
@@ -75,6 +75,9 @@ feature_config feature_config_from_db_config(const db::config& cfg, std::set<sst
|
||||
if (!cfg.uuid_sstable_identifiers_enabled()) {
|
||||
fcfg._disabled_features.insert("UUID_SSTABLE_IDENTIFIERS"s);
|
||||
}
|
||||
if (!cfg.table_digest_insensitive_to_expiry()) {
|
||||
fcfg._disabled_features.insert("TABLE_DIGEST_INSENSITIVE_TO_EXPIRY"s);
|
||||
}
|
||||
|
||||
if (!utils::get_local_injector().enter("features_enable_test_feature")) {
|
||||
fcfg._disabled_features.insert("TEST_ONLY_FEATURE"s);
|
||||
@@ -181,6 +184,7 @@ db::schema_features feature_service::cluster_schema_features() const {
|
||||
f.set_if<db::schema_feature::PER_TABLE_PARTITIONERS>(per_table_partitioners);
|
||||
f.set_if<db::schema_feature::SCYLLA_KEYSPACES>(keyspace_storage_options);
|
||||
f.set_if<db::schema_feature::SCYLLA_AGGREGATES>(aggregate_storage_options);
|
||||
f.set_if<db::schema_feature::TABLE_DIGEST_INSENSITIVE_TO_EXPIRY>(table_digest_insensitive_to_expiry);
|
||||
return f;
|
||||
}
|
||||
|
||||
|
||||
@@ -118,6 +118,7 @@ public:
|
||||
gms::feature secondary_indexes_on_static_columns { *this, "SECONDARY_INDEXES_ON_STATIC_COLUMNS"sv };
|
||||
gms::feature tablets { *this, "TABLETS"sv };
|
||||
gms::feature uuid_sstable_identifiers { *this, "UUID_SSTABLE_IDENTIFIERS"sv };
|
||||
gms::feature table_digest_insensitive_to_expiry { *this, "TABLE_DIGEST_INSENSITIVE_TO_EXPIRY"sv };
|
||||
|
||||
// A feature just for use in tests. It must not be advertised unless
|
||||
// the "features_enable_test_feature" injection is enabled.
|
||||
|
||||
@@ -49,7 +49,7 @@ void schema_mutations::copy_to(std::vector<mutation>& dst) const {
|
||||
}
|
||||
}
|
||||
|
||||
table_schema_version schema_mutations::digest() const {
|
||||
table_schema_version schema_mutations::digest(db::schema_features sf) const {
|
||||
if (_scylla_tables) {
|
||||
auto rs = query::result_set(*_scylla_tables);
|
||||
if (!rs.empty()) {
|
||||
@@ -62,16 +62,19 @@ table_schema_version schema_mutations::digest() const {
|
||||
}
|
||||
|
||||
md5_hasher h;
|
||||
db::schema_features sf = db::schema_features::full();
|
||||
|
||||
// Disable this feature so that the digest remains compactible with Scylla
|
||||
// versions prior to this feature.
|
||||
// This digest affects the table schema version calculation and it's important
|
||||
// that all nodes arrive at the same table schema version to avoid needless schema version
|
||||
// pulls. Table schema versions are calculated on boot when we don't yet
|
||||
// know all the cluster features, so we could get different table versions after reboot
|
||||
// in an already upgraded cluster.
|
||||
sf.remove<db::schema_feature::DIGEST_INSENSITIVE_TO_EXPIRY>();
|
||||
if (!sf.contains<db::schema_feature::TABLE_DIGEST_INSENSITIVE_TO_EXPIRY>()) {
|
||||
// Disable this feature so that the digest remains compactible with Scylla
|
||||
// versions prior to this feature.
|
||||
// This digest affects the table schema version calculation and it's important
|
||||
// that all nodes arrive at the same table schema version to avoid needless schema version
|
||||
// pulls. It used to be the case that when table schema versions were calculated on boot we
|
||||
// didn't yet know all the cluster features, so we could get different table versions after reboot
|
||||
// in an already upgraded cluster. However, they are now available, and if
|
||||
// TABLE_DIGEST_INSENSITIVE_TO_EXPIRY is enabled, we can compute with DIGEST_INSENSITIVE_TO_EXPIRY
|
||||
// enabled.
|
||||
sf.remove<db::schema_feature::DIGEST_INSENSITIVE_TO_EXPIRY>();
|
||||
}
|
||||
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies, sf);
|
||||
db::schema_tables::feed_hash_for_schema_digest(h, _columns, sf);
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "mutation/mutation.hh"
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include "mutation/canonical_mutation.hh"
|
||||
#include "db/schema_features.hh"
|
||||
|
||||
// Commutative representation of table schema
|
||||
// Equality ignores tombstones.
|
||||
@@ -124,7 +125,7 @@ public:
|
||||
|
||||
bool is_view() const;
|
||||
|
||||
table_schema_version digest() const;
|
||||
table_schema_version digest(db::schema_features) const;
|
||||
std::optional<sstring> partitioner() const;
|
||||
|
||||
bool operator==(const schema_mutations&) const;
|
||||
|
||||
@@ -109,6 +109,14 @@ void migration_manager::init_messaging_service()
|
||||
_feature_listeners.push_back(_feat.cdc.when_enabled(update_schema));
|
||||
_feature_listeners.push_back(_feat.per_table_partitioners.when_enabled(update_schema));
|
||||
_feature_listeners.push_back(_feat.computed_columns.when_enabled(update_schema));
|
||||
|
||||
if (!_feat.table_digest_insensitive_to_expiry) {
|
||||
_feature_listeners.push_back(_feat.table_digest_insensitive_to_expiry.when_enabled([this] {
|
||||
(void) with_gate(_background_tasks, [this] {
|
||||
return reload_schema();
|
||||
});
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
_messaging.register_definitions_update([this] (const rpc::client_info& cinfo, std::vector<frozen_mutation> fm, rpc::optional<std::vector<canonical_mutation>> cm) {
|
||||
|
||||
@@ -812,8 +812,6 @@ future<> test_schema_digest_does_not_change_with_disabled_features(sstring data_
|
||||
|
||||
expect_digest(sf, expected_digests[4]);
|
||||
|
||||
// FIXME: schema_mutations::digest() is still sensitive to expiry, so we can check versions only after forward_jump_clocks()
|
||||
// otherwise the results would not be stable.
|
||||
expect_version("tests", "table1", expected_digests[5]);
|
||||
expect_version("ks", "tbl", expected_digests[6]);
|
||||
expect_version("ks", "tbl_view", expected_digests[7]);
|
||||
@@ -840,7 +838,8 @@ SEASTAR_TEST_CASE(test_schema_digest_does_not_change) {
|
||||
utils::UUID("de49e92f-a00d-3f24-8779-d07de26708cb"),
|
||||
};
|
||||
return test_schema_digest_does_not_change_with_disabled_features("./test/resource/sstables/schema_digest_test",
|
||||
std::set<sstring>{"COMPUTED_COLUMNS", "CDC", "KEYSPACE_STORAGE_OPTIONS"}, std::move(expected_digests), [] (cql_test_env& e) {});
|
||||
std::set<sstring>{"COMPUTED_COLUMNS", "CDC", "KEYSPACE_STORAGE_OPTIONS", "TABLE_DIGEST_INSENSITIVE_TO_EXPIRY"},
|
||||
std::move(expected_digests), [] (cql_test_env& e) {});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_schema_digest_does_not_change_after_computed_columns) {
|
||||
@@ -857,7 +856,7 @@ SEASTAR_TEST_CASE(test_schema_digest_does_not_change_after_computed_columns) {
|
||||
utils::UUID("94606636-ae43-3e0a-b238-e7f0e33ef600"),
|
||||
};
|
||||
return test_schema_digest_does_not_change_with_disabled_features("./test/resource/sstables/schema_digest_test_computed_columns",
|
||||
std::set<sstring>{"CDC", "KEYSPACE_STORAGE_OPTIONS"}, std::move(expected_digests), [] (cql_test_env& e) {});
|
||||
std::set<sstring>{"CDC", "KEYSPACE_STORAGE_OPTIONS", "TABLE_DIGEST_INSENSITIVE_TO_EXPIRY"}, std::move(expected_digests), [] (cql_test_env& e) {});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_schema_digest_does_not_change_with_functions) {
|
||||
@@ -875,7 +874,7 @@ SEASTAR_TEST_CASE(test_schema_digest_does_not_change_with_functions) {
|
||||
};
|
||||
return test_schema_digest_does_not_change_with_disabled_features(
|
||||
"./test/resource/sstables/schema_digest_with_functions_test",
|
||||
std::set<sstring>{"CDC", "KEYSPACE_STORAGE_OPTIONS"},
|
||||
std::set<sstring>{"CDC", "KEYSPACE_STORAGE_OPTIONS", "TABLE_DIGEST_INSENSITIVE_TO_EXPIRY"},
|
||||
std::move(expected_digests),
|
||||
[] (cql_test_env& e) {
|
||||
e.execute_cql("create function twice(val int) called on null input returns int language lua as 'return 2 * val';").get();
|
||||
@@ -900,7 +899,7 @@ SEASTAR_TEST_CASE(test_schema_digest_does_not_change_with_cdc_options) {
|
||||
};
|
||||
return test_schema_digest_does_not_change_with_disabled_features(
|
||||
"./test/resource/sstables/schema_digest_test_cdc_options",
|
||||
std::set<sstring>{"KEYSPACE_STORAGE_OPTIONS"},
|
||||
std::set<sstring>{"KEYSPACE_STORAGE_OPTIONS", "TABLE_DIGEST_INSENSITIVE_TO_EXPIRY"},
|
||||
std::move(expected_digests),
|
||||
[] (cql_test_env& e) {
|
||||
e.execute_cql("create table tests.table_cdc (pk int primary key, c1 int, c2 int) with cdc = {'enabled':'true'};").get();
|
||||
@@ -923,7 +922,7 @@ SEASTAR_TEST_CASE(test_schema_digest_does_not_change_with_keyspace_storage_optio
|
||||
};
|
||||
return test_schema_digest_does_not_change_with_disabled_features(
|
||||
"./test/resource/sstables/schema_digest_test_keyspace_storage_options",
|
||||
std::set<sstring>{},
|
||||
std::set<sstring>{"TABLE_DIGEST_INSENSITIVE_TO_EXPIRY"},
|
||||
std::move(expected_digests),
|
||||
[] (cql_test_env& e) {
|
||||
e.execute_cql("create keyspace tests_s3 with replication = { 'class' : 'NetworkTopologyStrategy', 'replication_factor' : 1 }"
|
||||
|
||||
@@ -34,10 +34,12 @@ static schema_ptr random_schema() {
|
||||
|
||||
struct dummy_init {
|
||||
std::unique_ptr<db::config> config;
|
||||
gms::feature_service fs;
|
||||
|
||||
dummy_init() {
|
||||
config = std::make_unique<db::config>();
|
||||
local_schema_registry().init(db::schema_ctxt(*config,std::make_shared<data_dictionary::dummy_user_types_storage>()));
|
||||
dummy_init()
|
||||
: config(std::make_unique<db::config>())
|
||||
, fs(gms::feature_config_from_db_config(*config)) {
|
||||
local_schema_registry().init(db::schema_ctxt(*config, std::make_shared<data_dictionary::dummy_user_types_storage>(), fs));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -534,7 +534,9 @@ schema_ptr do_load_schema_from_schema_tables(std::filesystem::path scylla_data_p
|
||||
|
||||
db::config dbcfg;
|
||||
auto user_type_storage = std::make_shared<single_keyspace_user_types_storage>(std::move(utm));
|
||||
db::schema_ctxt ctxt(dbcfg, user_type_storage);
|
||||
gms::feature_service features(gms::feature_config_from_db_config(dbcfg));
|
||||
db::schema_ctxt ctxt(dbcfg, user_type_storage, features);
|
||||
|
||||
schema_mutations muts(std::move(*tables), std::move(*columns), std::move(view_virtual_columns), std::move(computed_columns), std::move(indexes),
|
||||
std::move(dropped_columns), std::move(scylla_tables));
|
||||
return db::schema_tables::create_table_from_mutations(ctxt, muts);
|
||||
|
||||
Reference in New Issue
Block a user