Files
scylladb/idl/mapreduce_request.idl.hh
Andrzej Jackowski 26403df9ea mapreduce: add shard_id_hint to mapreduce request
If a partition range is not present locally,
`partition_ranges_owned_by_this_shard` assigns it to shard 0, which can
overload shard 0. To address this, this commit adds a `shard_id_hint`
to the mapreduce request. When `shard_id_hint` is set, the entire
partition range in the request is handled by the specified shard.

The `shard_id_hint` is set by the new tablet-aware mapreduce algorithm,
introduced in `dispatch_to_tablets`. This algorithm balances the
workload across shards, so the changes in this commit ensure that
load balancing is preserved, even during events such as tablet splits.

Fixes: scylladb#21831
2025-06-25 19:23:07 +02:00

51 lines
1.2 KiB
C++

/*
* Copyright 2022-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include "dht/i_partitioner_fwd.hh"
#include "idl/read_command.idl.hh"
#include "idl/consistency_level.idl.hh"
namespace db {
namespace functions {
class function_name {
sstring keyspace;
sstring name;
};
}
}
namespace query {
struct mapreduce_request {
struct aggregation_info {
db::functions::function_name name;
std::vector<sstring> column_names;
};
enum class reduction_type : uint8_t {
count,
aggregate
};
std::vector<query::mapreduce_request::reduction_type> reduction_types;
query::read_command cmd;
dht::partition_range_vector pr;
db::consistency_level cl;
lowres_system_clock::time_point timeout;
std::optional<std::vector<query::mapreduce_request::aggregation_info>> aggregation_infos [[version 5.1]];
std::optional<shard_id> shard_id_hint [[version 2025.3]];
};
struct mapreduce_result {
std::vector<bytes_opt> query_results;
};
verb [[cancellable]] mapreduce_request(query::mapreduce_request req [[ref]], std::optional<tracing::trace_info> trace_info [[ref]]) -> query::mapreduce_result;
}