mirror of
https://github.com/scylladb/scylladb.git
synced 2026-06-01 04:26:48 +00:00
When a non-replica node handles a strongly consistent write, it must forward the request to a replica. If the closest replica is not the leader, the request gets redirected again, causing an extra roundtrip. Add a leader location cache in groups_manager, keyed by raft group_id. After a write request is forwarded, the CQL transport layer records the final node as the leader in the cache. Subsequent write requests from the same node for the same group are forwarded directly to the cached leader, eliminating the extra roundtrip. The cache is only used for writes. Reads can be served by any replica, so they skip the cache and use proximity-based routing instead. Cache entries are validated at use time: if the cached leader is no longer a replica (e.g. after tablet migration), the entry is evicted and the normal closest-replica path is taken. This prevents a scenario where two nodes keep redirecting to each other because both think that the other is the leader but actually both are non-replicas - such loop is broken as soon as the tablet maps are updated. On token_metadata updates, entries for groups that no longer exist (e.g. table dropped, tablet merged) are evicted. Entries for groups that still exist are kept — use-time validation handles staleness. An on_node_resolved callback is propagated through the redirect/bounce path so the transport layer can update the cache generically without coupling to the strong-consistency coordinator. The coordinator creates the callback only for writes (capturing the groups_manager and group_id) and attaches it to the bounce message; the transport layer invokes it once the final node is known, keeping the forwarding infrastructure subsystem-agnostic. We also add a test which verifies that after the initial redirect, following requests to the same node avoid the extra redirect and forward directly to the leader. Fixes: SCYLLADB-1064 Closes scylladb/scylladb#29392
104 lines
4.6 KiB
C++
104 lines
4.6 KiB
C++
/*
|
|
* Copyright (C) 2025-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
|
*/
|
|
|
|
#include "modification_statement.hh"
|
|
|
|
#include "db/consistency_level_type.hh"
|
|
#include "db/timeout_clock.hh"
|
|
#include "transport/messages/result_message.hh"
|
|
#include "cql3/query_processor.hh"
|
|
#include "service/strong_consistency/coordinator.hh"
|
|
#include "cql3/statements/strong_consistency/statement_helpers.hh"
|
|
#include "exceptions/exceptions.hh"
|
|
#include "utils/error_injection.hh"
|
|
|
|
namespace cql3::statements::strong_consistency {
|
|
static logging::logger logger("sc_modification_statement");
|
|
|
|
modification_statement::modification_statement(shared_ptr<base_statement> statement)
|
|
: cql_statement_opt_metadata(&timeout_config::write_timeout)
|
|
, _statement(std::move(statement))
|
|
{
|
|
}
|
|
|
|
using result_message = cql_transport::messages::result_message;
|
|
|
|
future<shared_ptr<result_message>> modification_statement::execute(query_processor& qp, service::query_state& qs,
|
|
const query_options& options, std::optional<service::group0_guard> guard) const
|
|
{
|
|
return execute_without_checking_exception_message(qp, qs, options, std::move(guard))
|
|
.then(cql_transport::messages::propagate_exception_as_future<shared_ptr<result_message>>);
|
|
}
|
|
|
|
static void validate_consistency_level(const db::consistency_level& cl) {
|
|
if (cl != db::consistency_level::QUORUM && cl != db::consistency_level::LOCAL_QUORUM) {
|
|
throw exceptions::invalid_request_exception("Strongly consistent writes must use QUORUM/LOCAL_QUORUM consistency level");
|
|
}
|
|
}
|
|
|
|
future<shared_ptr<result_message>> modification_statement::execute_without_checking_exception_message(
|
|
query_processor& qp, service::query_state& qs, const query_options& options,
|
|
std::optional<service::group0_guard> guard) const
|
|
{
|
|
validate_consistency_level(options.get_consistency());
|
|
|
|
auto timeout = db::timeout_clock::now() + _statement->get_timeout(qs.get_client_state(), options);
|
|
auto json_cache = base_statement::json_cache_opt{};
|
|
const auto keys = _statement->build_partition_keys(options, json_cache);
|
|
if (keys.size() != 1 || !query::is_single_partition(keys[0])) {
|
|
throw exceptions::invalid_request_exception("Strongly consistent queries can only target a single partition");
|
|
}
|
|
if (_statement->requires_read()) {
|
|
throw exceptions::invalid_request_exception("Strongly consistent updates don't support data prefetch");
|
|
}
|
|
if (_statement->is_timestamp_set()) {
|
|
throw exceptions::invalid_request_exception("Strongly consistent queries don't support user-provided timestamps");
|
|
}
|
|
|
|
auto [coordinator, holder] = qp.acquire_strongly_consistent_coordinator();
|
|
|
|
auto mutate_result = co_await coordinator.get().mutate(_statement->s,
|
|
keys[0].start()->value().token(),
|
|
[&](api::timestamp_type ts) {
|
|
const auto prefetch_data = update_parameters::prefetch_data(_statement->s);
|
|
const auto ttl = _statement->get_time_to_live(options);
|
|
const auto params = update_parameters(_statement->s, options, ts, ttl, prefetch_data);
|
|
const auto ranges = _statement->create_clustering_ranges(options, json_cache);
|
|
auto muts = _statement->apply_updates(keys, ranges, params, json_cache);
|
|
if (muts.size() != 1) {
|
|
on_internal_error(logger, ::format("statement '{}' has unexpected number of mutations {}",
|
|
raw_cql_statement, muts.size()));
|
|
}
|
|
return std::move(*muts.begin());
|
|
}, timeout, qs.get_client_state().get_abort_source());
|
|
|
|
using namespace service::strong_consistency;
|
|
if (auto* redirect = get_if<need_redirect>(&mutate_result)) {
|
|
bool is_write = true;
|
|
co_return co_await redirect_statement(qp, options, redirect->target, timeout, is_write, coordinator.get().get_stats(), std::move(redirect->on_node_resolved));
|
|
}
|
|
utils::get_local_injector().inject("sc_modification_statement_timeout", [&] {
|
|
throw exceptions::mutation_write_timeout_exception{"", "", options.get_consistency(), 0, 0, db::write_type::SIMPLE};
|
|
});
|
|
|
|
co_return seastar::make_shared<result_message::void_message>();
|
|
}
|
|
|
|
future<> modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
|
|
return _statement->check_access(qp, state);
|
|
}
|
|
|
|
uint32_t modification_statement::get_bound_terms() const {
|
|
return _statement->get_bound_terms();
|
|
}
|
|
|
|
bool modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
|
|
return _statement->depends_on(ks_name, cf_name);
|
|
}
|
|
}
|