From 282ccbb072ea0fecfe0a7e5eccf407436f894622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Fri, 15 Feb 2019 13:02:01 +0200 Subject: [PATCH] service/storage_service: fix pre-bootstrap wait for schema agreement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When bootstrapping, a node should to wait to have a schema agreement with its peers, before it can join the ring. This is to ensure it can immediately accept writes. Failing to reach schema agreement before joining is not fatal, as the node can pull unknown schemas on writes on-demand. However, if such a schema contains references to UDFs, the node will reject writes using it, due to #3760. To ensure that schema agreement is reached before joining the ring, `storage_service::join_token_ring()` has to checks. First it checks that at least one peer was connected previously. For this it compares `database::get_version()` with `database::empty_version`. The (implied) assumption is that this will become something other than `database::empty_version` only after having connected (and pulled schemas from) at least one peer. This assumption doesn't hold anymore, as we now set the version earlier in the boot process. The second check verifies that we have the same schema version as all known, live peers. This check assumes (since 3e415e2) that we have already "met" all (or at least some) of our peers and if there is just one known node (us) it concludes that this is a single-node cluster, which automatically has schema agreement. It's easy to see how these two checks will fail. The first fails to ensure that we have met our peers, and the second wrongfully concludes that we are a one-node cluster, and hence have schema agreement. To fix this, modify the first check. Instead of relying on the presence of a non-empty database version, supposedly implying that we already talked to our peers, explicitely make sure that we have really talked to *at least* one other node, before proceeding to the second check, which will now do the correct thing, actually checking the schema versions. Fixes: #4196 Branches: 3.0, 2.3 Signed-off-by: Botond Dénes Message-Id: <40b95b18e09c787e31ba6c5519fb64d68b4ca32e.1550228389.git.bdenes@scylladb.com> (cherry picked from commit 2125e99531f8053efd54565b480b5cdb27bec3eb) --- service/storage_service.cc | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index d3fab333ac..c2ffebf729 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -478,13 +478,9 @@ void storage_service::join_token_ring(int delay) { db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::IN_PROGRESS).get(); } set_mode(mode::JOINING, "waiting for ring information", true); - // first sleep the delay to make sure we see all our peers - for (int i = 0; i < delay; i += 1000) { - // if we see schema, we can proceed to the next check directly - if (_db.local().get_version() != database::empty_version) { - slogger.debug("got schema: {}", _db.local().get_version()); - break; - } + auto& gossiper = gms::get_gossiper().local(); + // first sleep the delay to make sure we see *at least* one other node + for (int i = 0; i < delay && gossiper.get_live_members().size() < 2; i += 1000) { sleep(std::chrono::seconds(1)).get(); } // if our schema hasn't matched yet, keep sleeping until it does @@ -541,7 +537,6 @@ void storage_service::join_token_ring(int delay) { for (auto token : _bootstrap_tokens) { auto existing = _token_metadata.get_endpoint(token); if (existing) { - auto& gossiper = gms::get_local_gossiper(); auto* eps = gossiper.get_endpoint_state_for_endpoint_ptr(*existing); if (eps && eps->get_update_timestamp() > gms::gossiper::clk::now() - std::chrono::milliseconds(delay)) { throw std::runtime_error("Cannot replace a live node...");