mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-27 20:05:10 +00:00
This series is a reupload of #13792 with a few modifications, namely a test is added and the conflicts with recent tablet related changes are fixed. See https://github.com/scylladb/scylladb/issues/12379 and https://github.com/scylladb/scylladb/pull/13583 for a detailed description of the problem and discussions. This PR aims to extend the existing throttling mechanism to work with requests that internally generate a large amount of view updates, as suggested by @nyh. The existing mechanism works in the following way: * Client sends a request, we generate the view updates corresponding to the request and spawn background tasks which will send these updates to remote nodes * Each background task consumes some units from the `view_update_concurrency_semaphore`, but doesn't wait for these units, it's just for tracking * We keep track of the percent of consumed units on each node, this is called `view update backlog`. * Before sending a response to the client we sleep for a short amount of time. The amount of time to sleep for is based on the fullness of this `view update backlog`. For a well behaved client with limited concurrency this will limit the amount of incoming requests to a manageable level. This mechanism doesn't handle large DELETE queries. Deleting a partition is fast for the base table, but it requires us to generate a view update for every single deleted row. The number of deleted rows per single client request can be in the millions. Delaying response to the request doesn't help when a single request can generate millions of updates. To deal with this we could treat the view update generator just like any other client and force it to wait a bit of time before sending the next batch of updates. The amount of time to wait for is calculated just like in the existing throttling code, it's based on the fullness of `view update backlogs`. The new algorithm of view update generation looks something like this: ```c++ for(;;) { auto updates = generate_updates_batch_with_max_100_rows(); co_await seastar::sleep(calculate_sleep_time_from_backlogs()); spawn_background_tasks_for_updates(updates); } ``` Fixes: https://github.com/scylladb/scylladb/issues/12379 Closes scylladb/scylladb#16819 * github.com:scylladb/scylladb: test: add test for bad_allocs during large mv queries mv: throttle view update generation for large queries exceptions: add read_write_timeout_exception, a subclass of request_timeout_exception db/view: extract view throttling delay calculation to a global function view_update_generator: add get_storage_proxy() storage_proxy: make view backlog getters public
129 lines
3.9 KiB
C++
129 lines
3.9 KiB
C++
/*
|
|
* Copyright (C) 2018-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "sstables/shared_sstable.hh"
|
|
#include "db/timeout_clock.hh"
|
|
#include "db_clock.hh"
|
|
#include "utils/chunked_vector.hh"
|
|
#include "schema/schema_fwd.hh"
|
|
|
|
#include <seastar/core/sharded.hh>
|
|
#include <seastar/core/metrics_registration.hh>
|
|
#include <seastar/core/abort_source.hh>
|
|
#include <seastar/core/condition-variable.hh>
|
|
#include <seastar/core/semaphore.hh>
|
|
|
|
using namespace seastar;
|
|
|
|
struct frozen_mutation_and_schema;
|
|
class mutation;
|
|
class reader_permit;
|
|
class flat_mutation_reader_v2;
|
|
using flat_mutation_reader_v2_opt = optimized_optional<flat_mutation_reader_v2>;
|
|
|
|
namespace dht {
|
|
class token;
|
|
}
|
|
|
|
namespace tracing {
|
|
class trace_state_ptr;
|
|
}
|
|
|
|
namespace replica {
|
|
class database;
|
|
class table;
|
|
struct cf_stats;
|
|
}
|
|
|
|
namespace service {
|
|
class storage_proxy;
|
|
struct allow_hints_tag;
|
|
using allow_hints = bool_class<allow_hints_tag>;
|
|
}
|
|
|
|
namespace db::view {
|
|
|
|
class stats;
|
|
struct view_and_base;
|
|
struct wait_for_all_updates_tag {};
|
|
using wait_for_all_updates = bool_class<wait_for_all_updates_tag>;
|
|
|
|
class view_update_generator : public async_sharded_service<view_update_generator> {
|
|
public:
|
|
static constexpr size_t registration_queue_size = 100;
|
|
|
|
private:
|
|
replica::database& _db;
|
|
sharded<service::storage_proxy>& _proxy;
|
|
seastar::abort_source _as;
|
|
future<> _started = make_ready_future<>();
|
|
seastar::condition_variable _pending_sstables;
|
|
named_semaphore _registration_sem{registration_queue_size, named_semaphore_exception_factory{"view update generator"}};
|
|
std::unordered_map<lw_shared_ptr<replica::table>, std::vector<sstables::shared_sstable>> _sstables_with_tables;
|
|
std::unordered_map<lw_shared_ptr<replica::table>, std::vector<sstables::shared_sstable>> _sstables_to_move;
|
|
metrics::metric_groups _metrics;
|
|
class progress_tracker;
|
|
std::unique_ptr<progress_tracker> _progress_tracker;
|
|
optimized_optional<abort_source::subscription> _early_abort_subscription;
|
|
void do_abort() noexcept;
|
|
public:
|
|
view_update_generator(replica::database& db, sharded<service::storage_proxy>& proxy, abort_source& as);
|
|
~view_update_generator();
|
|
|
|
future<> start();
|
|
future<> drain();
|
|
future<> stop();
|
|
future<> register_staging_sstable(sstables::shared_sstable sst, lw_shared_ptr<replica::table> table);
|
|
|
|
replica::database& get_db() noexcept { return _db; }
|
|
|
|
const sharded<service::storage_proxy>& get_storage_proxy() const noexcept { return _proxy; };
|
|
|
|
private:
|
|
future<> mutate_MV(
|
|
schema_ptr base,
|
|
dht::token base_token,
|
|
utils::chunked_vector<frozen_mutation_and_schema> view_updates,
|
|
db::view::stats& stats,
|
|
replica::cf_stats& cf_stats,
|
|
tracing::trace_state_ptr tr_state,
|
|
db::timeout_semaphore_units pending_view_updates,
|
|
service::allow_hints allow_hints,
|
|
wait_for_all_updates wait_for_all);
|
|
|
|
public:
|
|
ssize_t available_register_units() const { return _registration_sem.available_units(); }
|
|
size_t queued_batches_count() const { return _sstables_with_tables.size(); }
|
|
|
|
// Reader's schema must be the same as the base schema of each of the views.
|
|
future<> populate_views(const replica::table& base,
|
|
std::vector<view_and_base>,
|
|
dht::token base_token,
|
|
flat_mutation_reader_v2&&,
|
|
gc_clock::time_point);
|
|
|
|
future<> generate_and_propagate_view_updates(const replica::table& table,
|
|
const schema_ptr& base,
|
|
reader_permit permit,
|
|
std::vector<view_and_base>&& views,
|
|
mutation&& m,
|
|
flat_mutation_reader_v2_opt existings,
|
|
tracing::trace_state_ptr tr_state,
|
|
gc_clock::time_point now,
|
|
db::timeout_clock::time_point timeout);
|
|
|
|
private:
|
|
bool should_throttle() const;
|
|
void setup_metrics();
|
|
void discover_staging_sstables();
|
|
};
|
|
|
|
}
|