Files
scylladb/utils/disk_space_monitor.cc
Botond Dénes 69c58c6589 Merge 'streaming: add oos protection in mutation based streaming' from Łukasz Paszkowski
The mutation-fragment-based streaming path in `stream_session.cc` did not check whether the receiving node was in critical disk utilization mode before accepting incoming mutation fragments. This meant that operations like `nodetool refresh --load-and-stream`, which stream data through the `STREAM_MUTATION_FRAGMENTS` RPC handler, could push data onto a node that had already reached critical disk usage.

The file-based streaming path in stream_blob.cc already had this protection, but the load&stream path was missing it.

This patch adds a check for `is_in_critical_disk_utilization_mode()` in the `stream_mutation_fragments` handler in `stream_session.cc`, throwing a `replica::critical_disk_utilization_exception` when the node is at critical disk usage. This mirrors the existing protection in the blob streaming path and closes the gap that allowed data to be written to a node that should have been rejecting all incoming writes.

Fixes https://scylladb.atlassian.net/browse/SCYLLADB-901

The out of space prevention mechanism was introduced in 2025.4. The fix should be backported there and all later versions.

Closes scylladb/scylladb#28873

* github.com:scylladb/scylladb:
  streaming: reject mutation fragments on critical disk utilization
  test/cluster/storage: Add a reproducer for load-and-stream out-of-space rejection
  sstables: clean up TemporaryHashes file in wipe()
  sstables: add error injection point in write_components
  test/cluster/storage: extract validate_data_existence to module scope
  test/cluster: enable suppress_disk_space_threshold_checks in tests using data_file_capacity
  utils/disk_space_monitor: add error injection to suppress threshold checks
2026-04-20 17:56:36 +03:00

158 lines
5.3 KiB
C++

/*
* Copyright (C) 2024-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
*/
#include <filesystem>
#include <seastar/core/reactor.hh>
#include <seastar/core/sleep.hh>
#include <seastar/core/thread.hh>
#include "utils/disk_space_monitor.hh"
#include "utils/assert.hh"
#include "utils/error_injection.hh"
#include "utils/log.hh"
using namespace std::chrono_literals;
seastar::logger dsmlog("disk_space_monitor");
namespace utils {
disk_space_monitor::disk_space_monitor(abort_source& as, std::filesystem::path data_dir, config cfg)
: _as_sub(as.subscribe([this] () noexcept {
_as.request_abort();
_poll_cv.broadcast();
}))
, _signal_barrier("disk_space_monitor::signal_barrier")
, _data_dir(std::move(data_dir))
, _cfg(std::move(cfg))
, _threshold_subscription(listen([this](const disk_space_monitor& dsm) -> future<> {
const bool dsm_disabled = utils::get_local_injector().is_enabled("suppress_disk_space_threshold_checks");
const float current_disk_utilization = dsm.disk_utilization();
if (current_disk_utilization < 0.0f || dsm_disabled) {
co_return;
}
dsmlog.debug("current disk utilization={}", current_disk_utilization);
for (auto& sub : _subscriptions) {
auto threshold_reached = above_threshold(current_disk_utilization > std::clamp(sub._threshold(), 0.0f, 1.0f));
const bool crossed_threshold = (sub._threshold_reached != threshold_reached);
const bool constant_update = !sub._trigger_options.only_crossing_threshold;
sub._threshold_reached = threshold_reached;
if (constant_update || crossed_threshold) {
if ((threshold_reached == above_threshold::yes && sub._trigger_options.when_above_threshold) ||
(threshold_reached == above_threshold::no && sub._trigger_options.when_below_threshold)) {
co_await sub(std::move(threshold_reached));
}
}
}
}))
{
_space_source = [this] {
return engine().file_system_space(_data_dir.native());
};
_capacity_observer = make_lw_shared(_cfg.capacity_override.observe([this] (auto) {
trigger_poll();
}));
}
disk_space_monitor::~disk_space_monitor() {
SCYLLA_ASSERT(_poller_fut.available());
}
disk_space_monitor::space_source_registration::space_source_registration(disk_space_monitor& m)
: _monitor(m)
, _prev_space_source(m._space_source)
{
}
disk_space_monitor::space_source_registration::~space_source_registration() {
_monitor._space_source = _prev_space_source;
}
future<> disk_space_monitor::start() {
_space_info = co_await get_filesystem_space();
_poller_fut = poll();
}
future<> disk_space_monitor::stop() noexcept {
_as.request_abort();
_poll_cv.broadcast();
co_await _signal_barrier.advance_and_await();
co_await std::exchange(_poller_fut, make_ready_future());
}
disk_space_monitor::signal_connection_type disk_space_monitor::listen(signal_callback_type callback) {
return _signal_source.connect([this, callback = std::move(callback)] () mutable -> future<> {
auto op = _signal_barrier.start();
co_await callback(*this);
});
}
auto disk_space_monitor::subscribe(updateable_value<float> threshold, subscription_callback_type cb, subscription_trigger_options opt) -> subscription {
auto sub = subscription(*this, threshold, std::move(cb), std::move(opt));
trigger_poll();
return sub;
}
future<> disk_space_monitor::poll() {
try {
while (!_as.abort_requested()) {
auto now = clock_type::now();
_space_info = co_await get_filesystem_space();
if (_as.abort_requested()) {
co_return;
}
co_await _signal_barrier.advance_and_await();
_signal_source();
auto passed = clock_type::now() - now;
auto interval = get_polling_interval();
if (interval > passed) {
seastar::timer<clock_type> timer([this] { _poll_cv.broadcast(); });
timer.arm(interval - passed);
co_await _poll_cv.wait();
}
}
} catch (const sleep_aborted&) {
} catch (const abort_requested_exception&) {
} catch (...) {
dsmlog.error("poll loop exited with error: {}", std::current_exception());
}
}
void disk_space_monitor::trigger_poll() noexcept {
_poll_cv.broadcast();
}
future<std::filesystem::space_info> disk_space_monitor::get_filesystem_space() {
auto space = co_await _space_source();
if (_cfg.capacity_override()) {
auto not_free = space.capacity - space.free;
auto not_available = space.capacity - space.available;
auto new_capacity = _cfg.capacity_override();
space = std::filesystem::space_info{
.capacity = new_capacity,
.free = new_capacity - std::min(not_free, new_capacity),
.available = new_capacity - std::min(not_available, new_capacity)
};
}
co_return space;
}
disk_space_monitor::clock_type::duration disk_space_monitor::get_polling_interval() const noexcept {
auto du = disk_utilization();
return std::chrono::seconds(du < _cfg.polling_interval_threshold.get() ? _cfg.normal_polling_interval.get() : _cfg.high_polling_interval.get());
}
} // namespace utils