From c19f980802a31c03a80efba198eb574de518d60c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Chojnowski?= Date: Thu, 6 Jun 2024 12:43:24 +0200 Subject: [PATCH] storage_proxy: avoid infinite growth of _throttled_writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit storage_proxy has a throttling mechanism which attempts to limit the number of background writes by forcefully raising CL to ALL (it's not implemented exactly like that, but that's the effect) when the amount of background and queued writes is above some fixed threshold. If this is applied to a write, it becomes "throttled", and its ID is appended to into _throttled_writes. Whenever the amount of background and queued writes falls below the threshold, writes are "unthrottled" — some IDs are popped from _throttled_writes and the writes represented by these IDs — if their handlers still exist — have their CL lowered back. The problem here is that IDs are only ever removed from _throttled_writes if the number of queued and background writes falls below the threshold. But this doesn't have to happen in any finite time, if there's constant write pressure. And in fact, in one load test, it hasn't happened in 3 hours, eventually causing the buffer to grow into gigabytes and trigger OOM. This patch is intended to be a good-enough-in-practice fix for the problem. Fixes #17476 Fixes #1834 (cherry picked from commit 97e1518eb9fcd07422b2e62b60d3f6def4ad0f12) Closes scylladb/scylladb#19179 --- service/storage_proxy.cc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index 1c97a381d8..74382d4167 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -2329,6 +2329,21 @@ bool storage_proxy::need_throttle_writes() const { } void storage_proxy::unthrottle() { + // Here, we garbage-collect (from _throttled_writes) the response IDs which are no longer + // relevant, because their handlers are gone. + // + // need_throttle_writes() may remain true for an indefinite amount of time, so without this piece of code, + // _throttled_writes might also grow without any limit. We saw this happen in a throughput test once. + // + // Note that we only remove the irrelevant entries which are in front of the list. + // We don't touch the middle of the list, so an irrelevant ID will still remain in the list if there is some + // earlier ID which is still relevant. But since writes should have some reasonable finite timeout, + // we assume that it's not a problem. + // + while (!_throttled_writes.empty() && !_response_handlers.contains(_throttled_writes.front())) { + _throttled_writes.pop_front(); + } + while(!need_throttle_writes() && !_throttled_writes.empty()) { auto id = _throttled_writes.front(); _throttled_writes.pop_front();