From 8d029a04aa41aaca84d59c1532db315cfa8cd7fa Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Tue, 24 Mar 2020 10:02:35 +0000 Subject: [PATCH] db::commitlog: Don't write trailing zero block unless needed Fixes #5899 When terminating (closing) a segment, we write a trailing block of zero so reader can have an empty region after last used chunk as end marker. This is due to using recycled, pre-allocated segments with potentially non-zero data extending over the point where we are ending the segment (i.e. we are not fully filling the segment due to a huge mutation or similar). However, if we reach end of segment writing the final block (typically many small mutations), the file will end naturally after the data written, and any trailing zero block would in fact just extend the file further. While this will only happen once per segment recycled (independent on how many times it is recycled), it is still both slightly breaking the disk usage contract and also potentially causing some disk stalls due to metadata changes (though of course very infrequent). We should only write trailing zero if we are below the max_size file size when terminating Adds a small size check to commitlog test to verify size bounds. (Which breaks without the patch) v2: - Fix test to take into account that files might be deleted behind our backs. v3: - Fix test better, by doing verification _before_ segments are queued for delete. Message-Id: <20200226121601.15347-2-calle@scylladb.com> Message-Id: <20200324100235.23982-1-calle@scylladb.com> (cherry picked from commit 9fee712d6285db8a4b0f8b8115cc750d12c49d7e) --- db/commitlog/commitlog.cc | 14 ++++++++++---- test/boost/commitlog_test.cc | 15 ++++++++++++--- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/db/commitlog/commitlog.cc b/db/commitlog/commitlog.cc index 7491f23654..08e8ce7a5a 100644 --- a/db/commitlog/commitlog.cc +++ b/db/commitlog/commitlog.cc @@ -614,11 +614,17 @@ public: future terminate() { assert(_closed); if (!std::exchange(_terminated, true)) { - clogger.trace("{} is closed but not terminated.", *this); - if (_buffer.empty()) { - new_buffer(0); + // write a terminating zero block iff we are ending (a reused) + // block before actual file end. + // we should only get here when all actual data is + // already flushed (see below, close()). + if (size_on_disk() < _segment_manager->max_size) { + clogger.trace("{} is closed but not terminated.", *this); + if (_buffer.empty()) { + new_buffer(0); + } + return cycle(true, true); } - return cycle(true, true); } return make_ready_future(shared_from_this()); } diff --git a/test/boost/commitlog_test.cc b/test/boost/commitlog_test.cc index 7b8e43e127..25b9c63d06 100644 --- a/test/boost/commitlog_test.cc +++ b/test/boost/commitlog_test.cc @@ -296,7 +296,9 @@ SEASTAR_TEST_CASE(test_commitlog_closed) { SEASTAR_TEST_CASE(test_commitlog_delete_when_over_disk_limit) { commitlog::config cfg; - cfg.commitlog_segment_size_in_mb = 2; + + constexpr auto max_size_mb = 2; + cfg.commitlog_segment_size_in_mb = max_size_mb; cfg.commitlog_total_space_in_mb = 1; cfg.commitlog_sync_period_in_ms = 1; return cl_test(cfg, [](commitlog& log) { @@ -306,8 +308,15 @@ SEASTAR_TEST_CASE(test_commitlog_delete_when_over_disk_limit) { // add a flush handler that simply says we're done with the range. auto r = log.add_flush_handler([&log, sem, segments](cf_id_type id, replay_position pos) { *segments = log.get_active_segment_names(); - log.discard_completed_segments(id); - sem->signal(); + // Verify #5899 - file size should not exceed the config max. + return parallel_for_each(*segments, [](sstring filename) { + return file_size(filename).then([](uint64_t size) { + BOOST_REQUIRE_LE(size, max_size_mb * 1024 * 1024); + }); + }).then([&log, sem, id] { + log.discard_completed_segments(id); + sem->signal(); + }); }); auto set = make_lw_shared>();