From 801bf42ea2622355fee799bc1e461e635e3f4d21 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Tue, 24 Jun 2025 17:45:42 +0200 Subject: [PATCH] sharder: add try_get_shard_for_reads method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we use storage_proxy/get_cas_shard -> sharder.shard_for_reads to decide which shard to use for LWT code execution on both replicas and the coordinator. If the coordinator is not a replica, shard_for_reads returns 0 — the 'default' shard. This behavior has at least two problems: * Shard 0 may become overloaded, because all LWT coordinators that are not replicas will be served on it. * The zero shard does not match shard_for_reads on replicas, which hinders the "same shard for client and server" RPC-level optimization. To fix this, we need to know whether the current node hosts a replica for the tablet corresponding to the given token. Currently, there is no API we could use for this. For historical reasons, sharder::shard_for_reads returns 0 when the node does not host the shard, which leads to ambiguity. This commit introduces try_get_shard_for_reads, which returns a disengaged std::optional when the tablet is not present on the local node. We leave shard_for_reads method in the base sharder class, it calls try_get_shard_for_reads and returns zero by default. We need to rename tablet_sharder private methods shard_for_reads and shard_for_writes so that they don't conflict with the sharder::shard_for_reads. --- dht/auto_refreshing_sharder.hh | 2 +- dht/i_partitioner.cc | 8 ++++---- dht/token-sharding.hh | 15 +++++++++++++-- locator/tablet_sharder.hh | 15 ++++++--------- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/dht/auto_refreshing_sharder.hh b/dht/auto_refreshing_sharder.hh index 7ad6e6ed22..de74449f5b 100644 --- a/dht/auto_refreshing_sharder.hh +++ b/dht/auto_refreshing_sharder.hh @@ -41,7 +41,7 @@ public: virtual ~auto_refreshing_sharder(); - virtual unsigned shard_for_reads(const token& t) const override; + virtual std::optional try_get_shard_for_reads(const token& t) const override; virtual dht::shard_replica_set shard_for_writes(const token& t, std::optional sel) const override; diff --git a/dht/i_partitioner.cc b/dht/i_partitioner.cc index 203f61a0f3..1a03af4fa7 100644 --- a/dht/i_partitioner.cc +++ b/dht/i_partitioner.cc @@ -43,8 +43,8 @@ static_sharder::shard_of(const token& t) const { return dht::shard_of(_shard_count, _sharding_ignore_msb_bits, t); } -unsigned -static_sharder::shard_for_reads(const token& t) const { +std::optional +static_sharder::try_get_shard_for_reads(const token& t) const { return shard_of(t); } @@ -513,8 +513,8 @@ auto_refreshing_sharder::refresh() { }); } -unsigned auto_refreshing_sharder::shard_for_reads(const token& t) const { - return _sharder->shard_for_reads(t); +std::optional auto_refreshing_sharder::try_get_shard_for_reads(const token& t) const { + return _sharder->try_get_shard_for_reads(t); } dht::shard_replica_set diff --git a/dht/token-sharding.hh b/dht/token-sharding.hh index 3cb4a08648..86aa1b8572 100644 --- a/dht/token-sharding.hh +++ b/dht/token-sharding.hh @@ -54,6 +54,12 @@ public: sharder(unsigned shard_count = smp::count, unsigned sharding_ignore_msb_bits = 0); virtual ~sharder() = default; + /** + * Returns the shard that handles a particular token for reads, or empty if this + * node doesn't contain data for it. + */ + virtual std::optional try_get_shard_for_reads(const token& t) const = 0; + /** * Returns the shard that handles a particular token for reads. * Use shard_for_writes() to determine the set of shards that should receive writes. @@ -67,7 +73,12 @@ public: * } * */ - virtual unsigned shard_for_reads(const token& t) const = 0; + unsigned shard_for_reads(const token& t) const { + // FIXME: Consider throwing when there is no owning shard on the current host rather than returning 0. + // It's a coordination mistake to route requests to non-owners. Topology coordinator should synchronize + // with request coordinators before moving the shard away. + return try_get_shard_for_reads(t).value_or(0); + } /** * Returns the set of shards which should receive a write to token t. @@ -134,7 +145,7 @@ public: virtual std::optional next_shard(const token& t) const; virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans = 1) const; - virtual unsigned shard_for_reads(const token& t) const override; + virtual std::optional try_get_shard_for_reads(const token& t) const override; virtual shard_replica_set shard_for_writes(const token& t, std::optional sel) const override; virtual token token_for_next_shard_for_reads(const token& t, shard_id shard, unsigned spans = 1) const override; virtual std::optional next_shard_for_reads(const token& t) const override; diff --git a/locator/tablet_sharder.hh b/locator/tablet_sharder.hh index 9b52a8fb29..38a43b7859 100644 --- a/locator/tablet_sharder.hh +++ b/locator/tablet_sharder.hh @@ -40,7 +40,7 @@ private: return std::nullopt; }; - dht::shard_replica_set shard_for_writes(tablet_id tid, host_id host, std::optional sel = std::nullopt) const { + dht::shard_replica_set choose_shard_for_writes(tablet_id tid, host_id host, std::optional sel = std::nullopt) const { auto* trinfo = _tmap->get_tablet_transition_info(tid); auto& tinfo = _tmap->get_tablet_info(tid); dht::shard_replica_set shards; @@ -78,7 +78,7 @@ private: return shards; } - std::optional shard_for_reads(tablet_id tid, host_id host) const { + std::optional choose_shard_for_reads(tablet_id tid, host_id host) const { ensure_tablet_map(); auto* trinfo = _tmap->get_tablet_transition_info(tid); auto& tinfo = _tmap->get_tablet_info(tid); @@ -107,13 +107,10 @@ public: virtual ~tablet_sharder() = default; - virtual unsigned shard_for_reads(const token& t) const override { + virtual std::optional try_get_shard_for_reads(const token& t) const override { ensure_tablet_map(); auto tid = _tmap->get_tablet_id(t); - // FIXME: Consider throwing when there is no owning shard on the current host rather than returning 0. - // It's a coordination mistake to route requests to non-owners. Topology coordinator should synchronize - // with request coordinators before moving the shard away. - auto shard = shard_for_reads(tid, _host).value_or(0); + auto shard = choose_shard_for_reads(tid, _host); tablet_logger.trace("[{}] shard_of({}) = {}, tablet={}", _table, t, shard, tid); return shard; } @@ -121,7 +118,7 @@ public: virtual dht::shard_replica_set shard_for_writes(const token& t, std::optional sel = std::nullopt) const override { ensure_tablet_map(); auto tid = _tmap->get_tablet_id(t); - auto shards = shard_for_writes(tid, _host, sel); + auto shards = choose_shard_for_writes(tid, _host, sel); tablet_logger.trace("[{}] shard_for_writes({}) = {}, tablet={}", _table, t, shards, tid); return shards; } @@ -130,7 +127,7 @@ public: ensure_tablet_map(); std::optional tb = _tmap->get_tablet_id(t); while ((tb = _tmap->next_tablet(*tb))) { - auto r = shard_for_reads(*tb, _host); + auto r = choose_shard_for_reads(*tb, _host); auto next = _tmap->get_first_token(*tb); tablet_logger.trace("[{}] token_for_next_shard({}) = {{{}, {}}}, tablet={}", _table, t, next, r, *tb); return dht::shard_and_token{r.value_or(0), next};