From e99a0c7b893c19a3c91dbbe21afa0aeb1a25d126 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Thu, 10 Sep 2020 00:28:12 +0200 Subject: [PATCH] schema: Fix race in schema version recalculation leading to stale schema version in gossip Migration manager installs several feature change listeners: if (this_shard_id() == 0) { _feature_listeners.push_back(_feat.cluster_supports_view_virtual_columns().when_enabled(update_schema)); _feature_listeners.push_back(_feat.cluster_supports_digest_insensitive_to_expiry().when_enabled(update_schema)); _feature_listeners.push_back(_feat.cluster_supports_cdc().when_enabled(update_schema)); _feature_listeners.push_back(_feat.cluster_supports_per_table_partitioners().when_enabled(update_schema)); } They will call update_schema_version_and_announce() when features are enabled, which does this: return update_schema_version(proxy, features).then([] (utils::UUID uuid) { return announce_schema_version(uuid); }); So it first updates the schema version and then publishes it via gossip in announce_schema_version(). It is possible that the announce_schema_version() part of the first schema change will be deferred and will execute after the other four calls to update_schema_version_and_announce(). It will install the old schema version in gossip instead of the more recent one. The fix is to serialize schema digest calculation and publishing. Fixes #7200 (cherry picked from commit 1a57d641d18b66a0e5ebbda8c4d758b94da85e8c) --- db/schema_tables.cc | 8 ++++++++ db/schema_tables.hh | 7 +++++++ service/migration_manager.cc | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/db/schema_tables.cc b/db/schema_tables.cc index bf9c6ac04d..5d94b54992 100644 --- a/db/schema_tables.cc +++ b/db/schema_tables.cc @@ -822,6 +822,14 @@ future<> merge_schema(distributed& proxy, gms::feature_s }); } +future<> recalculate_schema_version(distributed& proxy, gms::feature_service& feat) { + return merge_lock().then([&proxy, &feat] { + return update_schema_version_and_announce(proxy, feat.cluster_schema_features()); + }).finally([] { + return merge_unlock(); + }); +} + future<> merge_schema(distributed& proxy, std::vector mutations, bool do_flush) { return merge_lock().then([&proxy, mutations = std::move(mutations), do_flush] () mutable { diff --git a/db/schema_tables.hh b/db/schema_tables.hh index f828c00c45..51345cf6a9 100644 --- a/db/schema_tables.hh +++ b/db/schema_tables.hh @@ -170,6 +170,13 @@ future<> merge_schema(distributed& proxy, gms::feature_s future<> merge_schema(distributed& proxy, std::vector mutations, bool do_flush); +// Recalculates the local schema version and publishes it in gossip. +// +// It is safe to call concurrently with recalculate_schema_version() and merge_schema() in which case it +// is guaranteed that the schema version we end up with after all calls will reflect the most recent state +// of feature_service and schema tables. +future<> recalculate_schema_version(distributed& proxy, gms::feature_service& feat); + future> merge_keyspaces(distributed& proxy, schema_result&& before, schema_result&& after); std::vector make_create_keyspace_mutations(lw_shared_ptr keyspace, api::timestamp_type timestamp, bool with_tables_and_types_and_functions = true); diff --git a/service/migration_manager.cc b/service/migration_manager.cc index 1d3cdce603..3d13653775 100644 --- a/service/migration_manager.cc +++ b/service/migration_manager.cc @@ -92,7 +92,7 @@ void migration_manager::init_messaging_service() //FIXME: future discarded. (void)with_gate(_background_tasks, [this] { mlogger.debug("features changed, recalculating schema version"); - return update_schema_version_and_announce(get_storage_proxy(), _feat.cluster_schema_features()); + return db::schema_tables::recalculate_schema_version(get_storage_proxy(), _feat); }); };